1/*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "gen_macros.h"
25
26#include "nir/nir_builder.h"
27#include "pan_encoder.h"
28#include "pan_shader.h"
29
30#include "panvk_private.h"
31
32static mali_ptr
33panvk_meta_copy_img_emit_texture(struct panfrost_device *pdev,
34                                 struct pan_pool *desc_pool,
35                                 const struct pan_image_view *view)
36{
37#if PAN_ARCH >= 6
38   struct panfrost_ptr texture =
39      pan_pool_alloc_desc(desc_pool, TEXTURE);
40   size_t payload_size =
41      GENX(panfrost_estimate_texture_payload_size)(view);
42   struct panfrost_ptr surfaces =
43      pan_pool_alloc_aligned(desc_pool, payload_size,
44                             pan_alignment(SURFACE_WITH_STRIDE));
45
46   GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
47
48   return texture.gpu;
49#else
50   size_t sz = pan_size(TEXTURE) +
51               GENX(panfrost_estimate_texture_payload_size)(view);
52   struct panfrost_ptr texture =
53      pan_pool_alloc_aligned(desc_pool, sz, pan_alignment(TEXTURE));
54   struct panfrost_ptr surfaces = {
55      .cpu = texture.cpu + pan_size(TEXTURE),
56      .gpu = texture.gpu + pan_size(TEXTURE),
57   };
58
59   GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
60
61   return pan_pool_upload_aligned(desc_pool, &texture.gpu,
62                                  sizeof(mali_ptr),
63                                  sizeof(mali_ptr));
64#endif
65}
66
67static mali_ptr
68panvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev,
69                                 struct pan_pool *desc_pool)
70{
71   struct panfrost_ptr sampler =
72      pan_pool_alloc_desc(desc_pool, SAMPLER);
73
74   pan_pack(sampler.cpu, SAMPLER, cfg) {
75#if PAN_ARCH >= 6
76      cfg.seamless_cube_map = false;
77#endif
78      cfg.normalized_coordinates = false;
79      cfg.minify_nearest = true;
80      cfg.magnify_nearest = true;
81   }
82
83   return sampler.gpu;
84}
85
86static void
87panvk_meta_copy_emit_varying(struct pan_pool *pool,
88                             mali_ptr coordinates,
89                             mali_ptr *varying_bufs,
90                             mali_ptr *varyings)
91{
92   /* Bifrost needs an empty desc to mark end of prefetching */
93   bool padding_buffer = PAN_ARCH >= 6;
94
95   struct panfrost_ptr varying =
96      pan_pool_alloc_desc(pool, ATTRIBUTE);
97   struct panfrost_ptr varying_buffer =
98      pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),
99                                     ATTRIBUTE_BUFFER);
100
101   pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
102      cfg.pointer = coordinates;
103      cfg.stride = 4 * sizeof(uint32_t);
104      cfg.size = cfg.stride * 4;
105   }
106
107   if (padding_buffer) {
108      pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
109               ATTRIBUTE_BUFFER, cfg);
110   }
111
112   pan_pack(varying.cpu, ATTRIBUTE, cfg) {
113      cfg.buffer_index = 0;
114      cfg.offset_enable = PAN_ARCH <= 5;
115      cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
116   }
117
118   *varyings = varying.gpu;
119   *varying_bufs = varying_buffer.gpu;
120}
121
122static void
123panvk_meta_copy_emit_dcd(struct pan_pool *pool,
124                         mali_ptr src_coords, mali_ptr dst_coords,
125                         mali_ptr texture, mali_ptr sampler,
126                         mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
127                         mali_ptr ubos, mali_ptr push_constants,
128                         void *out)
129{
130   pan_pack(out, DRAW, cfg) {
131      cfg.four_components_per_vertex = true;
132      cfg.draw_descriptor_is_64b = true;
133      cfg.thread_storage = tsd;
134      cfg.state = rsd;
135      cfg.uniform_buffers = ubos;
136      cfg.push_uniforms = push_constants;
137      cfg.position = dst_coords;
138      if (src_coords) {
139              panvk_meta_copy_emit_varying(pool, src_coords,
140                                           &cfg.varying_buffers,
141                                           &cfg.varyings);
142      }
143      cfg.viewport = vpd;
144      cfg.textures = texture;
145      cfg.samplers = sampler;
146   }
147}
148
149static struct panfrost_ptr
150panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool,
151                               struct pan_scoreboard *scoreboard,
152                               mali_ptr src_coords, mali_ptr dst_coords,
153                               mali_ptr texture, mali_ptr sampler,
154                               mali_ptr ubo, mali_ptr push_constants,
155                               mali_ptr vpd, mali_ptr rsd,
156                               mali_ptr tsd, mali_ptr tiler)
157{
158   struct panfrost_ptr job =
159      pan_pool_alloc_desc(desc_pool, TILER_JOB);
160
161   panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords,
162                            texture, sampler, vpd, tsd, rsd, ubo, push_constants,
163                            pan_section_ptr(job.cpu, TILER_JOB, DRAW));
164
165   pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
166      cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
167      cfg.index_count = 4;
168      cfg.job_task_split = 6;
169   }
170
171   pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
172      cfg.constant = 1.0f;
173   }
174
175   void *invoc = pan_section_ptr(job.cpu,
176                                 TILER_JOB,
177                                 INVOCATION);
178   panfrost_pack_work_groups_compute(invoc, 1, 4,
179                                     1, 1, 1, 1, true, false);
180
181#if PAN_ARCH >= 6
182   pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
183   pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
184      cfg.address = tiler;
185   }
186#endif
187
188   panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
189                    false, false, 0, 0, &job, false);
190   return job;
191}
192
193static struct panfrost_ptr
194panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool,
195                                 struct pan_scoreboard *scoreboard,
196                                 const struct pan_compute_dim *num_wg,
197                                 const struct pan_compute_dim *wg_sz,
198                                 mali_ptr texture, mali_ptr sampler,
199                                 mali_ptr ubo, mali_ptr push_constants,
200                                 mali_ptr rsd, mali_ptr tsd)
201{
202   struct panfrost_ptr job =
203      pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
204
205   void *invoc = pan_section_ptr(job.cpu,
206                                 COMPUTE_JOB,
207                                 INVOCATION);
208   panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z,
209                                     wg_sz->x, wg_sz->y, wg_sz->z,
210                                     false, false);
211
212   pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
213      cfg.job_task_split = 8;
214   }
215
216   panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler,
217                            0, tsd, rsd, ubo, push_constants,
218                            pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW));
219
220   panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
221                    false, false, 0, 0, &job, false);
222   return job;
223}
224
225
226#if PAN_ARCH >= 6
227static uint32_t
228panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)
229{
230   switch (texelsize) {
231   case 6: return MALI_RGB16UI << 12;
232   case 8: return MALI_RG32UI << 12;
233   case 12: return MALI_RGB32UI << 12;
234   case 16: return MALI_RGBA32UI << 12;
235   default: unreachable("Invalid texel size\n");
236   }
237}
238#endif
239
240static mali_ptr
241panvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev,
242                                struct pan_pool *desc_pool,
243                                mali_ptr shader,
244                                const struct pan_shader_info *shader_info,
245                                enum pipe_format fmt, unsigned wrmask,
246                                bool from_img)
247{
248   struct panfrost_ptr rsd_ptr =
249      pan_pool_alloc_desc_aggregate(desc_pool,
250                                    PAN_DESC(RENDERER_STATE),
251                                    PAN_DESC_ARRAY(1, BLEND));
252
253   bool raw = util_format_get_blocksize(fmt) > 4;
254   unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1;
255   bool partialwrite = fullmask != wrmask && !raw;
256   bool readstb = fullmask != wrmask && raw;
257
258   pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
259      pan_shader_prepare_rsd(shader_info, shader, &cfg);
260      if (from_img) {
261         cfg.shader.varying_count = 1;
262         cfg.shader.texture_count = 1;
263         cfg.shader.sampler_count = 1;
264      }
265      cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
266      cfg.multisample_misc.sample_mask = UINT16_MAX;
267      cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
268      cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
269      cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
270      cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
271      cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
272      cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
273      cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
274      cfg.stencil_front.mask = 0xFF;
275      cfg.stencil_back = cfg.stencil_front;
276
277#if PAN_ARCH >= 6
278      cfg.properties.allow_forward_pixel_to_be_killed = true;
279      cfg.properties.allow_forward_pixel_to_kill =
280         !partialwrite && !readstb;
281      cfg.properties.zs_update_operation =
282         MALI_PIXEL_KILL_STRONG_EARLY;
283      cfg.properties.pixel_kill_operation =
284         MALI_PIXEL_KILL_FORCE_EARLY;
285#else
286      cfg.properties.shader_reads_tilebuffer = readstb;
287      cfg.properties.work_register_count = shader_info->work_reg_count;
288      cfg.properties.force_early_z = true;
289      cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
290#endif
291   }
292
293   pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
294      cfg.round_to_fb_precision = true;
295      cfg.load_destination = partialwrite;
296      cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
297      cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
298      cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
299      cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
300      cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
301      cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
302#if PAN_ARCH >= 6
303      cfg.internal.mode =
304         partialwrite ?
305         MALI_BLEND_MODE_FIXED_FUNCTION :
306         MALI_BLEND_MODE_OPAQUE;
307      cfg.equation.color_mask = partialwrite ? wrmask : 0xf;
308      cfg.internal.fixed_function.num_comps = 4;
309      if (!raw) {
310         cfg.internal.fixed_function.conversion.memory_format =
311            panfrost_format_to_bifrost_blend(pdev, fmt, false);
312         cfg.internal.fixed_function.conversion.register_format =
313            MALI_REGISTER_FILE_FORMAT_F32;
314      } else {
315         unsigned imgtexelsz = util_format_get_blocksize(fmt);
316
317         cfg.internal.fixed_function.conversion.memory_format =
318            panvk_meta_copy_img_bifrost_raw_format(imgtexelsz);
319         cfg.internal.fixed_function.conversion.register_format =
320            (imgtexelsz & 2) ?
321            MALI_REGISTER_FILE_FORMAT_U16 :
322            MALI_REGISTER_FILE_FORMAT_U32;
323      }
324#else
325      cfg.equation.color_mask = wrmask;
326#endif
327   }
328
329   return rsd_ptr.gpu;
330}
331
332static mali_ptr
333panvk_meta_copy_emit_ubo(struct panfrost_device *pdev,
334                         struct pan_pool *pool,
335                         void *data, unsigned size)
336{
337   struct panfrost_ptr ubo = pan_pool_alloc_desc(pool, UNIFORM_BUFFER);
338
339   pan_pack(ubo.cpu, UNIFORM_BUFFER, cfg) {
340      cfg.entries = DIV_ROUND_UP(size, 16);
341      cfg.pointer = pan_pool_upload_aligned(pool, data, size, 16);
342   }
343
344   return ubo.gpu;
345}
346
347static mali_ptr
348panvk_meta_copy_emit_push_constants(struct panfrost_device *pdev,
349                                    const struct panfrost_ubo_push *pushmap,
350                                    struct pan_pool *pool,
351                                    const void *data, unsigned size)
352{
353   assert(pushmap->count <= (size / 4));
354
355   const uint32_t *in = data;
356   uint32_t pushvals[PAN_MAX_PUSH];
357
358   for (unsigned i = 0; i < pushmap->count; i++) {
359      assert(i < ARRAY_SIZE(pushvals));
360      assert(pushmap->words[i].ubo == 0);
361      assert(pushmap->words[i].offset < size);
362      pushvals[i] = in[pushmap->words[i].offset / 4];
363   }
364
365   return pan_pool_upload_aligned(pool, pushvals, size, 16);
366}
367
368static mali_ptr
369panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev,
370                                struct pan_pool *desc_pool,
371                                mali_ptr shader,
372                                const struct pan_shader_info *shader_info,
373                                bool from_img)
374{
375   struct panfrost_ptr rsd_ptr =
376      pan_pool_alloc_desc_aggregate(desc_pool,
377                                    PAN_DESC(RENDERER_STATE));
378
379   pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
380      pan_shader_prepare_rsd(shader_info, shader, &cfg);
381      if (from_img) {
382         cfg.shader.texture_count = 1;
383         cfg.shader.sampler_count = 1;
384      }
385   }
386
387   return rsd_ptr.gpu;
388}
389
390static mali_ptr
391panvk_meta_copy_img2img_shader(struct panfrost_device *pdev,
392                               struct pan_pool *bin_pool,
393                               enum pipe_format srcfmt,
394                               enum pipe_format dstfmt, unsigned dstmask,
395                               unsigned texdim, bool texisarray, bool is_ms,
396                               struct pan_shader_info *shader_info)
397{
398   nir_builder b =
399      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
400                                     GENX(pan_shader_get_compiler_options)(),
401                                     "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)",
402                                     util_format_name(srcfmt), util_format_name(dstfmt),
403                                     texdim, texisarray ? "[]" : "", is_ms ? ",ms" : "");
404
405   b.shader->info.internal = true;
406
407   nir_variable *coord_var =
408      nir_variable_create(b.shader, nir_var_shader_in,
409                          glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray),
410                          "coord");
411   coord_var->data.location = VARYING_SLOT_TEX0;
412   nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var));
413
414   nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1);
415   tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf;
416   tex->texture_index = 0;
417   tex->is_array = texisarray;
418   tex->dest_type = util_format_is_unorm(srcfmt) ?
419                    nir_type_float32 : nir_type_uint32;
420
421   switch (texdim) {
422   case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
423   case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
424   case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
425   default: unreachable("Invalid texture dimension");
426   }
427
428   tex->src[0].src_type = nir_tex_src_coord;
429   tex->src[0].src = nir_src_for_ssa(coord);
430   tex->coord_components = texdim + texisarray;
431
432   if (is_ms) {
433      tex->src[1].src_type = nir_tex_src_ms_index;
434      tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
435   }
436
437   nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
438                     nir_alu_type_get_type_size(tex->dest_type), NULL);
439   nir_builder_instr_insert(&b, &tex->instr);
440
441   nir_ssa_def *texel = &tex->dest.ssa;
442
443   unsigned dstcompsz =
444      util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
445   unsigned ndstcomps = util_format_get_nr_components(dstfmt);
446   const struct glsl_type *outtype = NULL;
447
448   if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) {
449      nir_ssa_def *rgb =
450         nir_f2u32(&b, nir_fmul(&b, texel,
451                                nir_vec3(&b,
452                                         nir_imm_float(&b, 31),
453                                         nir_imm_float(&b, 63),
454                                         nir_imm_float(&b, 31))));
455      nir_ssa_def *rg =
456         nir_vec2(&b,
457                  nir_ior(&b, nir_channel(&b, rgb, 0),
458                          nir_ishl(&b, nir_channel(&b, rgb, 1),
459                                   nir_imm_int(&b, 5))),
460                  nir_ior(&b,
461                          nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3),
462                          nir_ishl(&b, nir_channel(&b, rgb, 2),
463                                   nir_imm_int(&b, 3))));
464      rg = nir_iand_imm(&b, rg, 255);
465      texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255);
466      outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
467   } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) {
468      nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255));
469      nir_ssa_def *rgb =
470         nir_vec3(&b,
471                  nir_channel(&b, rg, 0),
472                  nir_ior(&b,
473                          nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5),
474                          nir_ishl(&b, nir_channel(&b, rg, 1),
475                                   nir_imm_int(&b, 3))),
476                  nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3));
477      rgb = nir_iand(&b, rgb,
478                     nir_vec3(&b,
479                              nir_imm_int(&b, 31),
480                              nir_imm_int(&b, 63),
481                              nir_imm_int(&b, 31)));
482      texel = nir_fmul(&b, nir_u2f32(&b, rgb),
483                       nir_vec3(&b,
484                                nir_imm_float(&b, 1.0 / 31),
485                                nir_imm_float(&b, 1.0 / 63),
486                                nir_imm_float(&b, 1.0 / 31)));
487      outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
488   } else {
489      assert(srcfmt == dstfmt);
490      enum glsl_base_type basetype;
491      if (util_format_is_unorm(dstfmt)) {
492         basetype = GLSL_TYPE_FLOAT;
493      } else if (dstcompsz == 16) {
494         basetype = GLSL_TYPE_UINT16;
495      } else {
496         assert(dstcompsz == 32);
497         basetype = GLSL_TYPE_UINT;
498      }
499
500      if (dstcompsz == 16)
501         texel = nir_u2u16(&b, texel);
502
503      texel = nir_channels(&b, texel, (1 << ndstcomps) - 1);
504      outtype = glsl_vector_type(basetype, ndstcomps);
505   }
506
507   nir_variable *out =
508      nir_variable_create(b.shader, nir_var_shader_out, outtype, "out");
509   out->data.location = FRAG_RESULT_DATA0;
510
511   unsigned fullmask = (1 << ndstcomps) - 1;
512   if (dstcompsz > 8 && dstmask != fullmask) {
513      nir_ssa_def *oldtexel = nir_load_var(&b, out);
514      nir_ssa_def *dstcomps[4];
515
516      for (unsigned i = 0; i < ndstcomps; i++) {
517         if (dstmask & BITFIELD_BIT(i))
518            dstcomps[i] = nir_channel(&b, texel, i);
519         else
520            dstcomps[i] = nir_channel(&b, oldtexel, i);
521      }
522
523      texel = nir_vec(&b, dstcomps, ndstcomps);
524   }
525
526   nir_store_var(&b, out, texel, 0xff);
527
528   struct panfrost_compile_inputs inputs = {
529      .gpu_id = pdev->gpu_id,
530      .is_blit = true,
531   };
532
533#if PAN_ARCH >= 6
534   pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
535      cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
536      cfg.register_format = dstcompsz == 2 ?
537                            MALI_REGISTER_FILE_FORMAT_U16 :
538                            MALI_REGISTER_FILE_FORMAT_U32;
539   }
540   inputs.bifrost.static_rt_conv = true;
541#endif
542
543   struct util_dynarray binary;
544
545   util_dynarray_init(&binary, NULL);
546   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
547
548   shader_info->fs.sample_shading = is_ms;
549
550   mali_ptr shader =
551      pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
552                              PAN_ARCH >= 6 ? 128 : 64);
553
554   util_dynarray_fini(&binary);
555   ralloc_free(b.shader);
556
557   return shader;
558}
559
560static enum pipe_format
561panvk_meta_copy_img_format(enum pipe_format fmt)
562{
563   /* We can't use a non-compressed format when handling a tiled/AFBC
564    * compressed format because the tile size differ (4x4 blocks for
565    * compressed formats and 16x16 texels for non-compressed ones).
566    */
567   assert(!util_format_is_compressed(fmt));
568
569   /* Pick blendable formats when we can, otherwise pick the UINT variant
570    * matching the texel size.
571    */
572   switch (util_format_get_blocksize(fmt)) {
573   case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
574   case 12: return PIPE_FORMAT_R32G32B32_UINT;
575   case 8: return PIPE_FORMAT_R32G32_UINT;
576   case 6: return PIPE_FORMAT_R16G16B16_UINT;
577   case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
578   case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM ||
579                   fmt == PIPE_FORMAT_B5G6R5_UNORM) ?
580                  PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
581   case 1: return PIPE_FORMAT_R8_UNORM;
582   default: unreachable("Unsupported format\n");
583   }
584}
585
586struct panvk_meta_copy_img2img_format_info {
587   enum pipe_format srcfmt;
588   enum pipe_format dstfmt;
589   unsigned dstmask;
590};
591
592static const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = {
593   { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1},
594   { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
595   { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
596   { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
597   { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
598   /* Z24S8(depth) */
599   { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
600   /* Z24S8(stencil) */
601   { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
602   { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
603   { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 },
604   { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 },
605   /* Z32S8X24(depth) */
606   { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 },
607   /* Z32S8X24(stencil) */
608   { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 },
609   { PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 },
610   { PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
611};
612
613static unsigned
614panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)
615{
616   STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
617
618   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
619      if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key)))
620         return i;
621   }
622
623   unreachable("Invalid image format\n");
624}
625
626static unsigned
627panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask)
628{
629   if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT &&
630       aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) {
631      enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt);
632
633      return (1 << util_format_get_nr_components(outfmt)) - 1;
634   }
635
636   switch (imgfmt) {
637   case PIPE_FORMAT_S8_UINT:
638      return 1;
639   case PIPE_FORMAT_Z16_UNORM:
640      return 3;
641   case PIPE_FORMAT_Z16_UNORM_S8_UINT:
642      return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8;
643   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
644      return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8;
645   case PIPE_FORMAT_Z24X8_UNORM:
646      assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT);
647      return 7;
648   case PIPE_FORMAT_Z32_FLOAT:
649      return 0xf;
650   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
651      return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2;
652   default:
653      unreachable("Invalid depth format\n");
654   }
655}
656
657static void
658panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
659                        const struct panvk_image *src,
660                        const struct panvk_image *dst,
661                        const VkImageCopy *region)
662{
663   struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
664   struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
665   struct panvk_meta_copy_img2img_format_info key = {
666      .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format),
667      .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format),
668      .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format,
669                                          region->dstSubresource.aspectMask),
670   };
671
672   assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples);
673
674   unsigned texdimidx =
675      panvk_meta_copy_tex_type(src->pimage.layout.dim,
676                               src->pimage.layout.array_size > 1);
677   unsigned fmtidx =
678      panvk_meta_copy_img2img_format_idx(key);
679   unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0;
680
681   mali_ptr rsd =
682      cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd;
683
684   struct pan_image_view srcview = {
685      .format = key.srcfmt,
686      .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
687             MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim,
688      .image = &src->pimage,
689      .nr_samples = src->pimage.layout.nr_samples,
690      .first_level = region->srcSubresource.mipLevel,
691      .last_level = region->srcSubresource.mipLevel,
692      .first_layer = region->srcSubresource.baseArrayLayer,
693      .last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1,
694      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
695   };
696
697   struct pan_image_view dstview = {
698      .format = key.dstfmt,
699      .dim = MALI_TEXTURE_DIMENSION_2D,
700      .image = &dst->pimage,
701      .nr_samples = dst->pimage.layout.nr_samples,
702      .first_level = region->dstSubresource.mipLevel,
703      .last_level = region->dstSubresource.mipLevel,
704      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
705   };
706
707   unsigned minx = MAX2(region->dstOffset.x, 0);
708   unsigned miny = MAX2(region->dstOffset.y, 0);
709   unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0);
710   unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0);
711
712   mali_ptr vpd =
713      panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
714                                         minx, miny, maxx, maxy);
715
716   float dst_rect[] = {
717      minx, miny, 0.0, 1.0,
718      maxx + 1, miny, 0.0, 1.0,
719      minx, maxy + 1, 0.0, 1.0,
720      maxx + 1, maxy + 1, 0.0, 1.0,
721   };
722
723   mali_ptr dst_coords =
724      pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
725                              sizeof(dst_rect), 64);
726
727   /* TODO: don't force preloads of dst resources if unneeded */
728
729   unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
730   unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
731   cmdbuf->state.fb.crc_valid[0] = false;
732   *fbinfo = (struct pan_fb_info){
733      .width = width,
734      .height = height,
735      .extent.minx = minx & ~31,
736      .extent.miny = miny & ~31,
737      .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1,
738      .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1,
739      .nr_samples = dst->pimage.layout.nr_samples,
740      .rt_count = 1,
741      .rts[0].view = &dstview,
742      .rts[0].preload = true,
743      .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
744   };
745
746   mali_ptr texture =
747      panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview);
748   mali_ptr sampler =
749      panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
750
751   panvk_per_arch(cmd_close_batch)(cmdbuf);
752
753   minx = MAX2(region->srcOffset.x, 0);
754   miny = MAX2(region->srcOffset.y, 0);
755   maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0);
756   maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0);
757   assert(region->dstOffset.z >= 0);
758
759   unsigned first_src_layer = MAX2(0, region->srcOffset.z);
760   unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z);
761   unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth);
762   for (unsigned l = 0; l < nlayers; l++) {
763      unsigned src_l = l + first_src_layer;
764      float src_rect[] = {
765         minx, miny, src_l, 1.0,
766         maxx + 1, miny, src_l, 1.0,
767         minx, maxy + 1, src_l, 1.0,
768         maxx + 1, maxy + 1, src_l, 1.0,
769      };
770
771      mali_ptr src_coords =
772         pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
773                                 sizeof(src_rect), 64);
774
775      struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
776
777      dstview.first_layer = dstview.last_layer = l + first_dst_layer;
778      batch->blit.src = src->pimage.data.bo;
779      batch->blit.dst = dst->pimage.data.bo;
780      panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
781      panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
782      panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
783
784      mali_ptr tsd, tiler;
785
786#if PAN_ARCH >= 6
787      tsd = batch->tls.gpu;
788      tiler = batch->tiler.descs.gpu;
789#else
790      tsd = batch->fb.desc.gpu;
791      tiler = 0;
792#endif
793
794      struct panfrost_ptr job;
795
796      job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
797                                           &batch->scoreboard,
798                                           src_coords, dst_coords,
799                                           texture, sampler, 0, 0,
800                                           vpd, rsd, tsd, tiler);
801
802      util_dynarray_append(&batch->jobs, void *, job.cpu);
803      panvk_per_arch(cmd_close_batch)(cmdbuf);
804   }
805}
806
807static void
808panvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms)
809{
810   STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
811
812   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
813      for (unsigned texdim = 1; texdim <= 3; texdim++) {
814         unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
815         assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
816
817         /* No MSAA on 3D textures */
818         if (texdim == 3 && is_ms) continue;
819
820         struct pan_shader_info shader_info;
821         mali_ptr shader =
822            panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
823                                           panvk_meta_copy_img2img_fmts[i].srcfmt,
824                                           panvk_meta_copy_img2img_fmts[i].dstfmt,
825                                           panvk_meta_copy_img2img_fmts[i].dstmask,
826                                           texdim, false, is_ms, &shader_info);
827         dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
828            panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
829                                            shader, &shader_info,
830                                            panvk_meta_copy_img2img_fmts[i].dstfmt,
831                                            panvk_meta_copy_img2img_fmts[i].dstmask,
832                                            true);
833         if (texdim == 3)
834            continue;
835
836         memset(&shader_info, 0, sizeof(shader_info));
837         texdimidx = panvk_meta_copy_tex_type(texdim, true);
838         assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
839         shader =
840            panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
841                                           panvk_meta_copy_img2img_fmts[i].srcfmt,
842                                           panvk_meta_copy_img2img_fmts[i].dstfmt,
843                                           panvk_meta_copy_img2img_fmts[i].dstmask,
844                                           texdim, true, is_ms, &shader_info);
845         dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
846            panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
847                                            shader, &shader_info,
848                                            panvk_meta_copy_img2img_fmts[i].dstfmt,
849                                            panvk_meta_copy_img2img_fmts[i].dstmask,
850                                            true);
851      }
852   }
853}
854
855void
856panvk_per_arch(CmdCopyImage)(VkCommandBuffer commandBuffer,
857                             VkImage srcImage,
858                             VkImageLayout srcImageLayout,
859                             VkImage destImage,
860                             VkImageLayout destImageLayout,
861                             uint32_t regionCount,
862                             const VkImageCopy *pRegions)
863{
864   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
865   VK_FROM_HANDLE(panvk_image, dst, destImage);
866   VK_FROM_HANDLE(panvk_image, src, srcImage);
867
868   for (unsigned i = 0; i < regionCount; i++) {
869      panvk_meta_copy_img2img(cmdbuf, src, dst, &pRegions[i]);
870   }
871}
872
873static unsigned
874panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask)
875{
876   unsigned imgtexelsz = util_format_get_blocksize(imgfmt);
877   unsigned nbufcomps = util_bitcount(mask);
878
879   if (nbufcomps == util_format_get_nr_components(imgfmt))
880      return imgtexelsz;
881
882   /* Special case for Z24 buffers which are not tightly packed */
883   if (mask == 7 && imgtexelsz == 4)
884      return 4;
885
886   /* Special case for S8 extraction from Z32_S8X24 */
887   if (mask == 2 && imgtexelsz == 8)
888      return 1;
889
890   unsigned compsz =
891      util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
892
893   assert(!(compsz % 8));
894
895   return nbufcomps * compsz / 8;
896}
897
898static enum pipe_format
899panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)
900{
901   /* Pick blendable formats when we can, and the FLOAT variant matching the
902    * texelsize otherwise.
903    */
904   switch (util_format_get_blocksize(imgfmt)) {
905   case 1: return PIPE_FORMAT_R8_UNORM;
906   /* AFBC stores things differently for RGB565,
907    * we can't simply map to R8G8 in that case */
908   case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
909                   imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
910                  PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
911   case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
912   case 6: return PIPE_FORMAT_R16G16B16_UINT;
913   case 8: return PIPE_FORMAT_R32G32_UINT;
914   case 12: return PIPE_FORMAT_R32G32B32_UINT;
915   case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
916   default: unreachable("Invalid format\n");
917   }
918}
919
920struct panvk_meta_copy_format_info {
921   enum pipe_format imgfmt;
922   unsigned mask;
923};
924
925static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = {
926   { PIPE_FORMAT_R8_UNORM, 0x1 },
927   { PIPE_FORMAT_R8G8_UNORM, 0x3 },
928   { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
929   { PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
930   { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
931   { PIPE_FORMAT_R32G32_UINT, 0x3 },
932   { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
933   { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
934   /* S8 -> Z24S8 */
935   { PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
936   /* S8 -> Z32_S8X24 */
937   { PIPE_FORMAT_R32G32_UINT, 0x2 },
938   /* Z24X8 -> Z24S8 */
939   { PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
940   /* Z32 -> Z32_S8X24 */
941   { PIPE_FORMAT_R32G32_UINT, 0x1 },
942};
943
944struct panvk_meta_copy_buf2img_info {
945   struct {
946      mali_ptr ptr;
947      struct {
948         unsigned line;
949         unsigned surf;
950      } stride;
951   } buf;
952};
953
954#define panvk_meta_copy_buf2img_get_info_field(b, field) \
955        nir_load_ubo((b), 1, \
956                     sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \
957                     nir_imm_int(b, 0), \
958                     nir_imm_int(b, offsetof(struct panvk_meta_copy_buf2img_info, field)), \
959                     .align_mul = 4, \
960                     .align_offset = 0, \
961                     .range_base = 0, \
962                     .range = ~0)
963
964static mali_ptr
965panvk_meta_copy_buf2img_shader(struct panfrost_device *pdev,
966                               struct pan_pool *bin_pool,
967                               struct panvk_meta_copy_format_info key,
968                               struct pan_shader_info *shader_info)
969{
970   nir_builder b =
971      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
972                                     GENX(pan_shader_get_compiler_options)(),
973                                     "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)",
974                                     util_format_name(key.imgfmt),
975                                     key.mask);
976
977   b.shader->info.internal = true;
978   b.shader->info.num_ubos = 1;
979
980   nir_variable *coord_var =
981      nir_variable_create(b.shader, nir_var_shader_in,
982                          glsl_vector_type(GLSL_TYPE_FLOAT, 3),
983                          "coord");
984   coord_var->data.location = VARYING_SLOT_TEX0;
985   nir_ssa_def *coord = nir_load_var(&b, coord_var);
986
987   coord = nir_f2u32(&b, coord);
988
989   nir_ssa_def *bufptr =
990      panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr);
991   nir_ssa_def *buflinestride =
992      panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line);
993   nir_ssa_def *bufsurfstride =
994      panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf);
995
996   unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
997   unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
998   unsigned writemask = key.mask;
999
1000   nir_ssa_def *offset =
1001      nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1002   offset = nir_iadd(&b, offset,
1003                     nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1004   offset = nir_iadd(&b, offset,
1005                     nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1006   bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1007
1008   unsigned imgcompsz =
1009      (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ?
1010      1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1011
1012   unsigned nimgcomps = imgtexelsz / imgcompsz;
1013   unsigned bufcompsz = MIN2(buftexelsz, imgcompsz);
1014   unsigned nbufcomps = buftexelsz / bufcompsz;
1015
1016   assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1017   assert(nbufcomps <= 4 && nimgcomps <= 4);
1018
1019   nir_ssa_def *texel =
1020      nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8);
1021
1022   enum glsl_base_type basetype;
1023   if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1024      texel = nir_vec3(&b,
1025                       nir_iand_imm(&b, texel, BITFIELD_MASK(5)),
1026                       nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)),
1027                       nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5)));
1028      texel = nir_fmul(&b,
1029                       nir_u2f32(&b, texel),
1030                       nir_vec3(&b,
1031                                nir_imm_float(&b, 1.0f / 31),
1032                                nir_imm_float(&b, 1.0f / 63),
1033                                nir_imm_float(&b, 1.0f / 31)));
1034      nimgcomps = 3;
1035      basetype = GLSL_TYPE_FLOAT;
1036   } else if (imgcompsz == 1) {
1037      assert(bufcompsz == 1);
1038      /* Blendable formats are unorm and the fixed-function blend unit
1039       * takes float values.
1040       */
1041      texel = nir_fmul(&b, nir_u2f32(&b, texel),
1042                       nir_imm_float(&b, 1.0f / 255));
1043      basetype = GLSL_TYPE_FLOAT;
1044   } else {
1045      texel = nir_u2uN(&b, texel, imgcompsz * 8);
1046      basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT;
1047   }
1048
1049   /* We always pass the texel using 32-bit regs for now */
1050   nir_variable *out =
1051      nir_variable_create(b.shader, nir_var_shader_out,
1052                          glsl_vector_type(basetype, nimgcomps),
1053                          "out");
1054   out->data.location = FRAG_RESULT_DATA0;
1055
1056   uint16_t fullmask = (1 << nimgcomps) - 1;
1057
1058   assert(fullmask >= writemask);
1059
1060   if (fullmask != writemask) {
1061      unsigned first_written_comp = ffs(writemask) - 1;
1062      nir_ssa_def *oldtexel = NULL;
1063      if (imgcompsz > 1)
1064         oldtexel = nir_load_var(&b, out);
1065
1066      nir_ssa_def *texel_comps[4];
1067      for (unsigned i = 0; i < nimgcomps; i++) {
1068         if (writemask & BITFIELD_BIT(i))
1069            texel_comps[i] = nir_channel(&b, texel, i - first_written_comp);
1070         else if (imgcompsz > 1)
1071            texel_comps[i] = nir_channel(&b, oldtexel, i);
1072         else
1073            texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size);
1074      }
1075
1076      texel = nir_vec(&b, texel_comps, nimgcomps);
1077   }
1078
1079   nir_store_var(&b, out, texel, 0xff);
1080
1081   struct panfrost_compile_inputs inputs = {
1082      .gpu_id = pdev->gpu_id,
1083      .is_blit = true,
1084   };
1085
1086#if PAN_ARCH >= 6
1087   pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
1088      cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
1089      cfg.register_format = imgcompsz == 2 ?
1090                            MALI_REGISTER_FILE_FORMAT_U16 :
1091                            MALI_REGISTER_FILE_FORMAT_U32;
1092   }
1093   inputs.bifrost.static_rt_conv = true;
1094#endif
1095
1096   struct util_dynarray binary;
1097
1098   util_dynarray_init(&binary, NULL);
1099   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1100
1101   /* Make sure UBO words have been upgraded to push constants */
1102   assert(shader_info->ubo_count == 1);
1103
1104   mali_ptr shader =
1105      pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1106                              PAN_ARCH >= 6 ? 128 : 64);
1107
1108   util_dynarray_fini(&binary);
1109   ralloc_free(b.shader);
1110
1111   return shader;
1112}
1113
1114static unsigned
1115panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)
1116{
1117   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1118      if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key)))
1119         return i;
1120   }
1121
1122   unreachable("Invalid image format\n");
1123}
1124
1125static void
1126panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
1127                        const struct panvk_buffer *buf,
1128                        const struct panvk_image *img,
1129                        const VkBufferImageCopy *region)
1130{
1131   struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1132   struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
1133   unsigned minx = MAX2(region->imageOffset.x, 0);
1134   unsigned miny = MAX2(region->imageOffset.y, 0);
1135   unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0);
1136   unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1137
1138   mali_ptr vpd =
1139      panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
1140                                         minx, miny, maxx, maxy);
1141
1142   float dst_rect[] = {
1143      minx, miny, 0.0, 1.0,
1144      maxx + 1, miny, 0.0, 1.0,
1145      minx, maxy + 1, 0.0, 1.0,
1146      maxx + 1, maxy + 1, 0.0, 1.0,
1147   };
1148   mali_ptr dst_coords =
1149      pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
1150                              sizeof(dst_rect), 64);
1151
1152   struct panvk_meta_copy_format_info key = {
1153      .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format),
1154      .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1155                                       region->imageSubresource.aspectMask),
1156   };
1157
1158   unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key);
1159
1160   mali_ptr rsd =
1161      cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd;
1162   const struct panfrost_ubo_push *pushmap =
1163      &cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].pushmap;
1164
1165   unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1166   struct panvk_meta_copy_buf2img_info info = {
1167      .buf.ptr = buf->bo->ptr.gpu + buf->bo_offset + region->bufferOffset,
1168      .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
1169   };
1170
1171   info.buf.stride.surf =
1172      (region->bufferImageHeight ? : region->imageExtent.height) * info.buf.stride.line;
1173
1174   mali_ptr pushconsts =
1175      panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
1176                                          &info, sizeof(info));
1177   mali_ptr ubo =
1178      panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
1179
1180   struct pan_image_view view = {
1181      .format = key.imgfmt,
1182      .dim = MALI_TEXTURE_DIMENSION_2D,
1183      .image = &img->pimage,
1184      .nr_samples = img->pimage.layout.nr_samples,
1185      .first_level = region->imageSubresource.mipLevel,
1186      .last_level = region->imageSubresource.mipLevel,
1187      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1188   };
1189
1190   /* TODO: don't force preloads of dst resources if unneeded */
1191   cmdbuf->state.fb.crc_valid[0] = false;
1192   *fbinfo = (struct pan_fb_info){
1193      .width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel),
1194      .height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel),
1195      .extent.minx = minx,
1196      .extent.maxx = maxx,
1197      .extent.miny = miny,
1198      .extent.maxy = maxy,
1199      .nr_samples = 1,
1200      .rt_count = 1,
1201      .rts[0].view = &view,
1202      .rts[0].preload = true,
1203      .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
1204   };
1205
1206   panvk_per_arch(cmd_close_batch)(cmdbuf);
1207
1208   assert(region->imageSubresource.layerCount == 1 ||
1209          region->imageExtent.depth == 1);
1210   assert(region->imageOffset.z >= 0);
1211   unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z);
1212   unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth);
1213   for (unsigned l = 0; l < nlayers; l++) {
1214      float src_rect[] = {
1215         0, 0, l, 1.0,
1216         region->imageExtent.width, 0, l, 1.0,
1217         0, region->imageExtent.height, l, 1.0,
1218         region->imageExtent.width, region->imageExtent.height, l, 1.0,
1219      };
1220
1221      mali_ptr src_coords =
1222         pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
1223                                 sizeof(src_rect), 64);
1224
1225      struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1226
1227      view.first_layer = view.last_layer = l + first_layer;
1228      batch->blit.src = buf->bo;
1229      batch->blit.dst = img->pimage.data.bo;
1230      panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1231      panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1232      panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
1233
1234      mali_ptr tsd, tiler;
1235
1236#if PAN_ARCH >= 6
1237      tsd = batch->tls.gpu;
1238      tiler = batch->tiler.descs.gpu;
1239#else
1240      tsd = batch->fb.desc.gpu;
1241      tiler = 0;
1242#endif
1243
1244      struct panfrost_ptr job;
1245
1246      job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
1247                                           &batch->scoreboard,
1248                                           src_coords, dst_coords,
1249                                           0, 0, ubo, pushconsts,
1250                                           vpd, rsd, tsd, tiler);
1251
1252      util_dynarray_append(&batch->jobs, void *, job.cpu);
1253      panvk_per_arch(cmd_close_batch)(cmdbuf);
1254   }
1255}
1256
1257static void
1258panvk_meta_copy_buf2img_init(struct panvk_physical_device *dev)
1259{
1260   STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS);
1261
1262   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1263      struct pan_shader_info shader_info;
1264      mali_ptr shader =
1265         panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
1266                                        panvk_meta_copy_buf2img_fmts[i],
1267                                        &shader_info);
1268      dev->meta.copy.buf2img[i].pushmap = shader_info.push;
1269      dev->meta.copy.buf2img[i].rsd =
1270         panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1271                                         shader, &shader_info,
1272                                         panvk_meta_copy_buf2img_fmts[i].imgfmt,
1273                                         panvk_meta_copy_buf2img_fmts[i].mask,
1274                                         false);
1275   }
1276}
1277
1278void
1279panvk_per_arch(CmdCopyBufferToImage)(VkCommandBuffer commandBuffer,
1280                                     VkBuffer srcBuffer,
1281                                     VkImage destImage,
1282                                     VkImageLayout destImageLayout,
1283                                     uint32_t regionCount,
1284                                     const VkBufferImageCopy *pRegions)
1285{
1286   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1287   VK_FROM_HANDLE(panvk_buffer, buf, srcBuffer);
1288   VK_FROM_HANDLE(panvk_image, img, destImage);
1289
1290   for (unsigned i = 0; i < regionCount; i++) {
1291      panvk_meta_copy_buf2img(cmdbuf, buf, img, &pRegions[i]);
1292   }
1293}
1294
1295static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = {
1296   { PIPE_FORMAT_R8_UINT, 0x1 },
1297   { PIPE_FORMAT_R8G8_UINT, 0x3 },
1298   { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
1299   { PIPE_FORMAT_R8G8B8A8_UINT, 0xf },
1300   { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
1301   { PIPE_FORMAT_R32G32_UINT, 0x3 },
1302   { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
1303   { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
1304   /* S8 -> Z24S8 */
1305   { PIPE_FORMAT_R8G8B8A8_UINT, 0x8 },
1306   /* S8 -> Z32_S8X24 */
1307   { PIPE_FORMAT_R32G32_UINT, 0x2 },
1308   /* Z24X8 -> Z24S8 */
1309   { PIPE_FORMAT_R8G8B8A8_UINT, 0x7 },
1310   /* Z32 -> Z32_S8X24 */
1311   { PIPE_FORMAT_R32G32_UINT, 0x1 },
1312};
1313
1314static enum pipe_format
1315panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)
1316{
1317   /* Pick blendable formats when we can, and the FLOAT variant matching the
1318    * texelsize otherwise.
1319    */
1320   switch (util_format_get_blocksize(imgfmt)) {
1321   case 1: return PIPE_FORMAT_R8_UINT;
1322   /* AFBC stores things differently for RGB565,
1323    * we can't simply map to R8G8 in that case */
1324   case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
1325                   imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
1326                  PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT;
1327   case 4: return PIPE_FORMAT_R8G8B8A8_UINT;
1328   case 6: return PIPE_FORMAT_R16G16B16_UINT;
1329   case 8: return PIPE_FORMAT_R32G32_UINT;
1330   case 12: return PIPE_FORMAT_R32G32B32_UINT;
1331   case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
1332   default: unreachable("Invalid format\n");
1333   }
1334}
1335
1336struct panvk_meta_copy_img2buf_info {
1337   struct {
1338      mali_ptr ptr;
1339      struct {
1340         unsigned line;
1341         unsigned surf;
1342      } stride;
1343   } buf;
1344   struct {
1345      struct {
1346         unsigned x, y, z;
1347      } offset;
1348      struct {
1349         unsigned minx, miny, maxx, maxy;
1350      } extent;
1351   } img;
1352};
1353
1354#define panvk_meta_copy_img2buf_get_info_field(b, field) \
1355        nir_load_ubo((b), 1, \
1356                     sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \
1357                     nir_imm_int(b, 0), \
1358                     nir_imm_int(b, offsetof(struct panvk_meta_copy_img2buf_info, field)), \
1359                     .align_mul = 4, \
1360                     .align_offset = 0, \
1361                     .range_base = 0, \
1362                     .range = ~0)
1363
1364static mali_ptr
1365panvk_meta_copy_img2buf_shader(struct panfrost_device *pdev,
1366                               struct pan_pool *bin_pool,
1367                               struct panvk_meta_copy_format_info key,
1368                               unsigned texdim, unsigned texisarray,
1369                               struct pan_shader_info *shader_info)
1370{
1371   unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
1372   unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1373
1374   /* FIXME: Won't work on compute queues, but we can't do that with
1375    * a compute shader if the destination is an AFBC surface.
1376    */
1377   nir_builder b =
1378      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1379                                     GENX(pan_shader_get_compiler_options)(),
1380                                     "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)",
1381                                     texdim, texisarray ? "[]" : "",
1382                                     util_format_name(key.imgfmt),
1383                                     key.mask);
1384
1385   b.shader->info.internal = true;
1386   b.shader->info.num_ubos = 1;
1387
1388   nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1389   nir_ssa_def *bufptr =
1390      panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr);
1391   nir_ssa_def *buflinestride =
1392      panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line);
1393   nir_ssa_def *bufsurfstride =
1394      panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf);
1395
1396   nir_ssa_def *imgminx =
1397      panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx);
1398   nir_ssa_def *imgminy =
1399      panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny);
1400   nir_ssa_def *imgmaxx =
1401      panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx);
1402   nir_ssa_def *imgmaxy =
1403      panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy);
1404
1405   nir_ssa_def *imgcoords, *inbounds;
1406
1407   switch (texdim + texisarray) {
1408   case 1:
1409      imgcoords =
1410         nir_iadd(&b,
1411                  nir_channel(&b, coord, 0),
1412                  panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x));
1413      inbounds =
1414         nir_iand(&b,
1415                  nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1416                  nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx));
1417      break;
1418   case 2:
1419      imgcoords =
1420         nir_vec2(&b,
1421                  nir_iadd(&b,
1422                           nir_channel(&b, coord, 0),
1423                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1424                  nir_iadd(&b,
1425                           nir_channel(&b, coord, 1),
1426                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1427      inbounds =
1428         nir_iand(&b,
1429                  nir_iand(&b,
1430                           nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1431                           nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1432                  nir_iand(&b,
1433                           nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1434                           nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1435      break;
1436   case 3:
1437      imgcoords =
1438         nir_vec3(&b,
1439                  nir_iadd(&b,
1440                           nir_channel(&b, coord, 0),
1441                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1442                  nir_iadd(&b,
1443                           nir_channel(&b, coord, 1),
1444                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)),
1445                  nir_iadd(&b,
1446                           nir_channel(&b, coord, 2),
1447                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1448      inbounds =
1449         nir_iand(&b,
1450                  nir_iand(&b,
1451                           nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1452                           nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1453                  nir_iand(&b,
1454                           nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1455                           nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1456      break;
1457   default:
1458      unreachable("Invalid texture dimension\n");
1459   }
1460
1461   nir_push_if(&b, inbounds);
1462
1463   /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4
1464    * blocks instead of 16x16 texels in that case, and there's nothing we can
1465    * do to force the tile size to 4x4 in the render path.
1466    * This being said, compressed textures are not compatible with AFBC, so we
1467    * could use a compute shader arranging the blocks properly.
1468    */
1469   nir_ssa_def *offset =
1470      nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1471   offset = nir_iadd(&b, offset,
1472                     nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1473   offset = nir_iadd(&b, offset,
1474                     nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1475   bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1476
1477   unsigned imgcompsz = imgtexelsz <= 4 ?
1478                        1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1479   unsigned nimgcomps = imgtexelsz / imgcompsz;
1480   assert(nimgcomps <= 4);
1481
1482   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
1483   tex->op = nir_texop_txf;
1484   tex->texture_index = 0;
1485   tex->is_array = texisarray;
1486   tex->dest_type = util_format_is_unorm(key.imgfmt) ?
1487                    nir_type_float32 : nir_type_uint32;
1488
1489   switch (texdim) {
1490   case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
1491   case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
1492   case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
1493   default: unreachable("Invalid texture dimension");
1494   }
1495
1496   tex->src[0].src_type = nir_tex_src_coord;
1497   tex->src[0].src = nir_src_for_ssa(imgcoords);
1498   tex->coord_components = texdim + texisarray;
1499   nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
1500                     nir_alu_type_get_type_size(tex->dest_type), NULL);
1501   nir_builder_instr_insert(&b, &tex->instr);
1502
1503   nir_ssa_def *texel = &tex->dest.ssa;
1504
1505   unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1;
1506   unsigned nbufcomps = util_bitcount(fullmask);
1507   if (key.mask != fullmask) {
1508      nir_ssa_def *bufcomps[4];
1509      nbufcomps = 0;
1510      for (unsigned i = 0; i < nimgcomps; i++) {
1511         if (key.mask & BITFIELD_BIT(i))
1512            bufcomps[nbufcomps++] = nir_channel(&b, texel, i);
1513      }
1514
1515      texel = nir_vec(&b, bufcomps, nbufcomps);
1516   }
1517
1518   unsigned bufcompsz = buftexelsz / nbufcomps;
1519
1520   if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1521      texel = nir_fmul(&b, texel,
1522                       nir_vec3(&b,
1523                                nir_imm_float(&b, 31),
1524                                nir_imm_float(&b, 63),
1525                                nir_imm_float(&b, 31)));
1526      texel = nir_f2u16(&b, texel);
1527      texel = nir_ior(&b, nir_channel(&b, texel, 0),
1528                      nir_ior(&b,
1529                              nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)),
1530                              nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11))));
1531      imgcompsz = 2;
1532      bufcompsz = 2;
1533      nbufcomps = 1;
1534      nimgcomps = 1;
1535   } else if (imgcompsz == 1) {
1536      nir_ssa_def *packed = nir_channel(&b, texel, 0);
1537      for (unsigned i = 1; i < nbufcomps; i++) {
1538         packed = nir_ior(&b, packed,
1539                          nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff),
1540                                   nir_imm_int(&b, i * 8)));
1541      }
1542      texel = packed;
1543
1544      bufcompsz = nbufcomps == 3 ? 4 : nbufcomps;
1545      nbufcomps = 1;
1546   }
1547
1548   assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1549   assert(nbufcomps <= 4 && nimgcomps <= 4);
1550   texel = nir_u2uN(&b, texel, bufcompsz * 8);
1551
1552   nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1);
1553   nir_pop_if(&b, NULL);
1554
1555   struct panfrost_compile_inputs inputs = {
1556      .gpu_id = pdev->gpu_id,
1557      .is_blit = true,
1558   };
1559
1560   struct util_dynarray binary;
1561
1562   util_dynarray_init(&binary, NULL);
1563   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1564
1565   /* Make sure UBO words have been upgraded to push constants and everything
1566    * is at the right place.
1567    */
1568   assert(shader_info->ubo_count == 1);
1569   assert(shader_info->push.count <= (sizeof(struct panvk_meta_copy_img2buf_info) / 4));
1570
1571   mali_ptr shader =
1572      pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1573                              PAN_ARCH >= 6 ? 128 : 64);
1574
1575   util_dynarray_fini(&binary);
1576   ralloc_free(b.shader);
1577
1578   return shader;
1579}
1580
1581static unsigned
1582panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)
1583{
1584   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1585      if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key)))
1586         return i;
1587   }
1588
1589   unreachable("Invalid texel size\n");
1590}
1591
1592static void
1593panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
1594                        const struct panvk_buffer *buf,
1595                        const struct panvk_image *img,
1596                        const VkBufferImageCopy *region)
1597{
1598   struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1599   struct panvk_meta_copy_format_info key = {
1600      .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format),
1601      .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1602                                       region->imageSubresource.aspectMask),
1603   };
1604   unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1605   unsigned texdimidx =
1606      panvk_meta_copy_tex_type(img->pimage.layout.dim,
1607                               img->pimage.layout.array_size > 1);
1608   unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key);
1609
1610   mali_ptr rsd =
1611      cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd;
1612   const struct panfrost_ubo_push *pushmap =
1613      &cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].pushmap;
1614
1615   struct panvk_meta_copy_img2buf_info info = {
1616      .buf.ptr = buf->bo->ptr.gpu + buf->bo_offset + region->bufferOffset,
1617      .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
1618      .img.offset.x = MAX2(region->imageOffset.x & ~15, 0),
1619      .img.extent.minx = MAX2(region->imageOffset.x, 0),
1620      .img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0),
1621   };
1622
1623   if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) {
1624      info.img.extent.maxy = region->imageSubresource.layerCount - 1;
1625   } else {
1626      info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0);
1627      info.img.offset.z = MAX2(region->imageOffset.z, 0);
1628      info.img.extent.miny = MAX2(region->imageOffset.y, 0);
1629      info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1630   }
1631
1632   info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) *
1633                          info.buf.stride.line;
1634
1635   mali_ptr pushconsts =
1636      panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
1637                                          &info, sizeof(info));
1638   mali_ptr ubo =
1639      panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
1640
1641   struct pan_image_view view = {
1642      .format = key.imgfmt,
1643      .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1644             MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim,
1645      .image = &img->pimage,
1646      .nr_samples = img->pimage.layout.nr_samples,
1647      .first_level = region->imageSubresource.mipLevel,
1648      .last_level = region->imageSubresource.mipLevel,
1649      .first_layer = region->imageSubresource.baseArrayLayer,
1650      .last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1,
1651      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1652   };
1653
1654   mali_ptr texture =
1655      panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view);
1656   mali_ptr sampler =
1657      panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
1658
1659   panvk_per_arch(cmd_close_batch)(cmdbuf);
1660
1661   struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1662
1663   struct pan_tls_info tlsinfo = { 0 };
1664
1665   batch->blit.src = img->pimage.data.bo;
1666   batch->blit.dst = buf->bo;
1667   batch->tls =
1668      pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
1669   GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu);
1670
1671   mali_ptr tsd = batch->tls.gpu;
1672
1673   struct pan_compute_dim wg_sz = {
1674      16,
1675      img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16,
1676      1,
1677   };
1678
1679   struct pan_compute_dim num_wg = {
1680     (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16,
1681     img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1682        region->imageSubresource.layerCount :
1683        (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16,
1684     img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ?
1685        MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1,
1686   };
1687
1688   struct panfrost_ptr job =
1689      panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1690                                       &batch->scoreboard, &num_wg, &wg_sz,
1691                                       texture, sampler,
1692                                       ubo, pushconsts,
1693                                       rsd, tsd);
1694
1695   util_dynarray_append(&batch->jobs, void *, job.cpu);
1696
1697   panvk_per_arch(cmd_close_batch)(cmdbuf);
1698}
1699
1700static void
1701panvk_meta_copy_img2buf_init(struct panvk_physical_device *dev)
1702{
1703   STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS);
1704
1705   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1706      for (unsigned texdim = 1; texdim <= 3; texdim++) {
1707         unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
1708         assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1709
1710         struct pan_shader_info shader_info;
1711         mali_ptr shader =
1712            panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1713                                           panvk_meta_copy_img2buf_fmts[i],
1714                                           texdim, false, &shader_info);
1715         dev->meta.copy.img2buf[texdimidx][i].pushmap = shader_info.push;
1716         dev->meta.copy.img2buf[texdimidx][i].rsd =
1717            panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1718                                            &dev->meta.desc_pool.base,
1719                                            shader, &shader_info, true);
1720
1721         if (texdim == 3)
1722            continue;
1723
1724         memset(&shader_info, 0, sizeof(shader_info));
1725         texdimidx = panvk_meta_copy_tex_type(texdim, true);
1726         assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1727         shader =
1728            panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1729                                           panvk_meta_copy_img2buf_fmts[i],
1730                                           texdim, true, &shader_info);
1731         dev->meta.copy.img2buf[texdimidx][i].pushmap = shader_info.push;
1732         dev->meta.copy.img2buf[texdimidx][i].rsd =
1733            panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1734                                            &dev->meta.desc_pool.base,
1735                                            shader, &shader_info, true);
1736      }
1737   }
1738}
1739
1740void
1741panvk_per_arch(CmdCopyImageToBuffer)(VkCommandBuffer commandBuffer,
1742                                     VkImage srcImage,
1743                                     VkImageLayout srcImageLayout,
1744                                     VkBuffer destBuffer,
1745                                     uint32_t regionCount,
1746                                     const VkBufferImageCopy *pRegions)
1747{
1748   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1749   VK_FROM_HANDLE(panvk_buffer, buf, destBuffer);
1750   VK_FROM_HANDLE(panvk_image, img, srcImage);
1751
1752   for (unsigned i = 0; i < regionCount; i++) {
1753      panvk_meta_copy_img2buf(cmdbuf, buf, img, &pRegions[i]);
1754   }
1755}
1756
1757struct panvk_meta_copy_buf2buf_info {
1758   mali_ptr src;
1759   mali_ptr dst;
1760};
1761
1762#define panvk_meta_copy_buf2buf_get_info_field(b, field) \
1763        nir_load_ubo((b), 1, \
1764                     sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \
1765                     nir_imm_int(b, 0), \
1766                     nir_imm_int(b, offsetof(struct panvk_meta_copy_buf2buf_info, field)), \
1767                     .align_mul = 4, \
1768                     .align_offset = 0, \
1769                     .range_base = 0, \
1770                     .range = ~0)
1771
1772static mali_ptr
1773panvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev,
1774                               struct pan_pool *bin_pool,
1775                               unsigned blksz,
1776                               struct pan_shader_info *shader_info)
1777{
1778   /* FIXME: Won't work on compute queues, but we can't do that with
1779    * a compute shader if the destination is an AFBC surface.
1780    */
1781   nir_builder b =
1782      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1783                                     GENX(pan_shader_get_compiler_options)(),
1784                                     "panvk_meta_copy_buf2buf(blksz=%d)",
1785                                     blksz);
1786
1787   b.shader->info.internal = true;
1788   b.shader->info.num_ubos = 1;
1789
1790   nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1791
1792   nir_ssa_def *offset =
1793      nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
1794   nir_ssa_def *srcptr =
1795      nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
1796   nir_ssa_def *dstptr =
1797      nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
1798
1799   unsigned compsz = blksz < 4 ? blksz : 4;
1800   unsigned ncomps = blksz / compsz;
1801   nir_store_global(&b, dstptr, blksz,
1802                    nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
1803                    (1 << ncomps) - 1);
1804
1805   struct panfrost_compile_inputs inputs = {
1806      .gpu_id = pdev->gpu_id,
1807      .is_blit = true,
1808   };
1809
1810   struct util_dynarray binary;
1811
1812   util_dynarray_init(&binary, NULL);
1813   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1814
1815   /* Make sure UBO words have been upgraded to push constants and everything
1816    * is at the right place.
1817    */
1818   assert(shader_info->ubo_count == 1);
1819   assert(shader_info->push.count == (sizeof(struct panvk_meta_copy_buf2buf_info) / 4));
1820
1821   mali_ptr shader =
1822      pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1823                              PAN_ARCH >= 6 ? 128 : 64);
1824
1825   util_dynarray_fini(&binary);
1826   ralloc_free(b.shader);
1827
1828   return shader;
1829}
1830
1831static void
1832panvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev)
1833{
1834   for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
1835      struct pan_shader_info shader_info;
1836      mali_ptr shader =
1837         panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1838                                        1 << i, &shader_info);
1839      dev->meta.copy.buf2buf[i].pushmap = shader_info.push;
1840      dev->meta.copy.buf2buf[i].rsd =
1841         panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1842                                         shader, &shader_info, false);
1843   }
1844}
1845
1846static void
1847panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
1848                        const struct panvk_buffer *src,
1849                        const struct panvk_buffer *dst,
1850                        const VkBufferCopy *region)
1851{
1852   struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1853
1854   struct panvk_meta_copy_buf2buf_info info = {
1855      .src = src->bo->ptr.gpu + src->bo_offset + region->srcOffset,
1856      .dst = dst->bo->ptr.gpu + dst->bo_offset + region->dstOffset,
1857   };
1858
1859   unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
1860   unsigned log2blksz = alignment ? alignment - 1 : 4;
1861
1862   assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf));
1863   mali_ptr rsd =
1864      cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
1865   const struct panfrost_ubo_push *pushmap =
1866      &cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].pushmap;
1867
1868   mali_ptr pushconsts =
1869      panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
1870                                          &info, sizeof(info));
1871   mali_ptr ubo =
1872      panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
1873
1874   panvk_per_arch(cmd_close_batch)(cmdbuf);
1875
1876   struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1877
1878   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1879
1880   mali_ptr tsd = batch->tls.gpu;
1881
1882   unsigned nblocks = region->size >> log2blksz;
1883   struct pan_compute_dim num_wg = { nblocks, 1, 1 };
1884   struct pan_compute_dim wg_sz = { 1, 1, 1};
1885   struct panfrost_ptr job =
1886     panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1887                                      &batch->scoreboard,
1888                                      &num_wg, &wg_sz,
1889                                      0, 0, ubo, pushconsts, rsd, tsd);
1890
1891   util_dynarray_append(&batch->jobs, void *, job.cpu);
1892
1893   batch->blit.src = src->bo;
1894   batch->blit.dst = dst->bo;
1895   panvk_per_arch(cmd_close_batch)(cmdbuf);
1896}
1897
1898void
1899panvk_per_arch(CmdCopyBuffer)(VkCommandBuffer commandBuffer,
1900                              VkBuffer srcBuffer,
1901                              VkBuffer destBuffer,
1902                              uint32_t regionCount,
1903                              const VkBufferCopy *pRegions)
1904{
1905   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1906   VK_FROM_HANDLE(panvk_buffer, src, srcBuffer);
1907   VK_FROM_HANDLE(panvk_buffer, dst, destBuffer);
1908
1909   for (unsigned i = 0; i < regionCount; i++) {
1910      panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pRegions[i]);
1911   }
1912}
1913
1914struct panvk_meta_fill_buf_info {
1915   mali_ptr start;
1916   uint32_t val;
1917};
1918
1919#define panvk_meta_fill_buf_get_info_field(b, field) \
1920        nir_load_ubo((b), 1, \
1921                     sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \
1922                     nir_imm_int(b, 0), \
1923                     nir_imm_int(b, offsetof(struct panvk_meta_fill_buf_info, field)), \
1924                     .align_mul = 4, \
1925                     .align_offset = 0, \
1926                     .range_base = 0, \
1927                     .range = ~0)
1928
1929static mali_ptr
1930panvk_meta_fill_buf_shader(struct panfrost_device *pdev,
1931                           struct pan_pool *bin_pool,
1932                           struct pan_shader_info *shader_info)
1933{
1934   /* FIXME: Won't work on compute queues, but we can't do that with
1935    * a compute shader if the destination is an AFBC surface.
1936    */
1937   nir_builder b =
1938      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1939                                     GENX(pan_shader_get_compiler_options)(),
1940                                     "panvk_meta_fill_buf()");
1941
1942   b.shader->info.internal = true;
1943   b.shader->info.num_ubos = 1;
1944
1945   nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1946
1947   nir_ssa_def *offset =
1948      nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t))));
1949   nir_ssa_def *ptr =
1950      nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset);
1951   nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val);
1952
1953   nir_store_global(&b, ptr, sizeof(uint32_t), val, 1);
1954
1955   struct panfrost_compile_inputs inputs = {
1956      .gpu_id = pdev->gpu_id,
1957      .is_blit = true,
1958   };
1959
1960   struct util_dynarray binary;
1961
1962   util_dynarray_init(&binary, NULL);
1963   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1964
1965   /* Make sure UBO words have been upgraded to push constants and everything
1966    * is at the right place.
1967    */
1968   assert(shader_info->ubo_count == 1);
1969   assert(shader_info->push.count == 3);
1970
1971   mali_ptr shader =
1972      pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1973                              PAN_ARCH >= 6 ? 128 : 64);
1974
1975   util_dynarray_fini(&binary);
1976   ralloc_free(b.shader);
1977
1978   return shader;
1979}
1980
1981static mali_ptr
1982panvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev,
1983                             struct pan_pool *bin_pool,
1984                             struct pan_pool *desc_pool,
1985                             struct panfrost_ubo_push *pushmap)
1986{
1987   struct pan_shader_info shader_info;
1988
1989   mali_ptr shader =
1990      panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info);
1991
1992   struct panfrost_ptr rsd_ptr =
1993      pan_pool_alloc_desc_aggregate(desc_pool,
1994                                    PAN_DESC(RENDERER_STATE));
1995
1996   pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
1997      pan_shader_prepare_rsd(&shader_info, shader, &cfg);
1998   }
1999
2000   *pushmap = shader_info.push;
2001   return rsd_ptr.gpu;
2002}
2003
2004static void
2005panvk_meta_fill_buf_init(struct panvk_physical_device *dev)
2006{
2007   dev->meta.copy.fillbuf.rsd =
2008      panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base,
2009                                   &dev->meta.desc_pool.base,
2010                                   &dev->meta.copy.fillbuf.pushmap);
2011}
2012
2013static void
2014panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
2015                    const struct panvk_buffer *dst,
2016                    VkDeviceSize size, VkDeviceSize offset,
2017                    uint32_t val)
2018{
2019   struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
2020
2021   if (size == VK_WHOLE_SIZE)
2022      size = (dst->size - offset) & ~3ULL;
2023
2024   struct panvk_meta_fill_buf_info info = {
2025      .start = dst->bo->ptr.gpu + dst->bo_offset + offset,
2026      .val = val,
2027   };
2028
2029   assert(!(offset & 3) && !(size & 3));
2030
2031   unsigned nwords = size / sizeof(uint32_t);
2032   mali_ptr rsd =
2033      cmdbuf->device->physical_device->meta.copy.fillbuf.rsd;
2034   const struct panfrost_ubo_push *pushmap =
2035      &cmdbuf->device->physical_device->meta.copy.fillbuf.pushmap;
2036
2037   mali_ptr pushconsts =
2038      panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
2039                                          &info, sizeof(info));
2040   mali_ptr ubo =
2041      panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
2042
2043   panvk_per_arch(cmd_close_batch)(cmdbuf);
2044
2045   struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
2046
2047   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
2048
2049   mali_ptr tsd = batch->tls.gpu;
2050
2051   struct pan_compute_dim num_wg = { nwords, 1, 1 };
2052   struct pan_compute_dim wg_sz = { 1, 1, 1};
2053   struct panfrost_ptr job =
2054     panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
2055                                      &batch->scoreboard,
2056                                      &num_wg, &wg_sz,
2057                                      0, 0, ubo, pushconsts, rsd, tsd);
2058
2059   util_dynarray_append(&batch->jobs, void *, job.cpu);
2060
2061   batch->blit.dst = dst->bo;
2062   panvk_per_arch(cmd_close_batch)(cmdbuf);
2063}
2064
2065void
2066panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer,
2067                              VkBuffer dstBuffer,
2068                              VkDeviceSize dstOffset,
2069                              VkDeviceSize fillSize,
2070                              uint32_t data)
2071{
2072   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
2073   VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
2074
2075   panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data);
2076}
2077
2078static void
2079panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
2080                      const struct panvk_buffer *dst, VkDeviceSize offset,
2081                      VkDeviceSize size, const void *data)
2082{
2083   struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
2084
2085   struct panvk_meta_copy_buf2buf_info info = {
2086      .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4),
2087      .dst = dst->bo->ptr.gpu + dst->bo_offset + offset,
2088   };
2089
2090   unsigned log2blksz = ffs(sizeof(uint32_t)) - 1;
2091
2092   mali_ptr rsd =
2093      cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
2094   const struct panfrost_ubo_push *pushmap =
2095      &cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].pushmap;
2096
2097   mali_ptr pushconsts =
2098      panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
2099                                          &info, sizeof(info));
2100   mali_ptr ubo =
2101      panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
2102
2103   panvk_per_arch(cmd_close_batch)(cmdbuf);
2104
2105   struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
2106
2107   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
2108
2109   mali_ptr tsd = batch->tls.gpu;
2110
2111   unsigned nblocks = size >> log2blksz;
2112   struct pan_compute_dim num_wg = { nblocks, 1, 1 };
2113   struct pan_compute_dim wg_sz = { 1, 1, 1};
2114   struct panfrost_ptr job =
2115     panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
2116                                      &batch->scoreboard,
2117                                      &num_wg, &wg_sz,
2118                                      0, 0, ubo, pushconsts, rsd, tsd);
2119
2120   util_dynarray_append(&batch->jobs, void *, job.cpu);
2121
2122   batch->blit.dst = dst->bo;
2123   panvk_per_arch(cmd_close_batch)(cmdbuf);
2124}
2125
2126void
2127panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
2128                                VkBuffer dstBuffer,
2129                                VkDeviceSize dstOffset,
2130                                VkDeviceSize dataSize,
2131                                const void *pData)
2132{
2133   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
2134   VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
2135
2136   panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData);
2137}
2138
2139void
2140panvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev)
2141{
2142   panvk_meta_copy_img2img_init(dev, false);
2143   panvk_meta_copy_img2img_init(dev, true);
2144   panvk_meta_copy_buf2img_init(dev);
2145   panvk_meta_copy_img2buf_init(dev);
2146   panvk_meta_copy_buf2buf_init(dev);
2147   panvk_meta_fill_buf_init(dev);
2148}
2149