1/* 2 * Copyright © 2021 Collabora Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "gen_macros.h" 25 26#include "nir/nir_builder.h" 27#include "pan_encoder.h" 28#include "pan_shader.h" 29 30#include "panvk_private.h" 31 32static mali_ptr 33panvk_meta_copy_img_emit_texture(struct panfrost_device *pdev, 34 struct pan_pool *desc_pool, 35 const struct pan_image_view *view) 36{ 37#if PAN_ARCH >= 6 38 struct panfrost_ptr texture = 39 pan_pool_alloc_desc(desc_pool, TEXTURE); 40 size_t payload_size = 41 GENX(panfrost_estimate_texture_payload_size)(view); 42 struct panfrost_ptr surfaces = 43 pan_pool_alloc_aligned(desc_pool, payload_size, 44 pan_alignment(SURFACE_WITH_STRIDE)); 45 46 GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces); 47 48 return texture.gpu; 49#else 50 size_t sz = pan_size(TEXTURE) + 51 GENX(panfrost_estimate_texture_payload_size)(view); 52 struct panfrost_ptr texture = 53 pan_pool_alloc_aligned(desc_pool, sz, pan_alignment(TEXTURE)); 54 struct panfrost_ptr surfaces = { 55 .cpu = texture.cpu + pan_size(TEXTURE), 56 .gpu = texture.gpu + pan_size(TEXTURE), 57 }; 58 59 GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces); 60 61 return pan_pool_upload_aligned(desc_pool, &texture.gpu, 62 sizeof(mali_ptr), 63 sizeof(mali_ptr)); 64#endif 65} 66 67static mali_ptr 68panvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev, 69 struct pan_pool *desc_pool) 70{ 71 struct panfrost_ptr sampler = 72 pan_pool_alloc_desc(desc_pool, SAMPLER); 73 74 pan_pack(sampler.cpu, SAMPLER, cfg) { 75#if PAN_ARCH >= 6 76 cfg.seamless_cube_map = false; 77#endif 78 cfg.normalized_coordinates = false; 79 cfg.minify_nearest = true; 80 cfg.magnify_nearest = true; 81 } 82 83 return sampler.gpu; 84} 85 86static void 87panvk_meta_copy_emit_varying(struct pan_pool *pool, 88 mali_ptr coordinates, 89 mali_ptr *varying_bufs, 90 mali_ptr *varyings) 91{ 92 /* Bifrost needs an empty desc to mark end of prefetching */ 93 bool padding_buffer = PAN_ARCH >= 6; 94 95 struct panfrost_ptr varying = 96 pan_pool_alloc_desc(pool, ATTRIBUTE); 97 struct panfrost_ptr varying_buffer = 98 pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1), 99 ATTRIBUTE_BUFFER); 100 101 pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) { 102 cfg.pointer = coordinates; 103 cfg.stride = 4 * sizeof(uint32_t); 104 cfg.size = cfg.stride * 4; 105 } 106 107 if (padding_buffer) { 108 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER), 109 ATTRIBUTE_BUFFER, cfg); 110 } 111 112 pan_pack(varying.cpu, ATTRIBUTE, cfg) { 113 cfg.buffer_index = 0; 114 cfg.offset_enable = PAN_ARCH <= 5; 115 cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw; 116 } 117 118 *varyings = varying.gpu; 119 *varying_bufs = varying_buffer.gpu; 120} 121 122static void 123panvk_meta_copy_emit_dcd(struct pan_pool *pool, 124 mali_ptr src_coords, mali_ptr dst_coords, 125 mali_ptr texture, mali_ptr sampler, 126 mali_ptr vpd, mali_ptr tsd, mali_ptr rsd, 127 mali_ptr ubos, mali_ptr push_constants, 128 void *out) 129{ 130 pan_pack(out, DRAW, cfg) { 131 cfg.four_components_per_vertex = true; 132 cfg.draw_descriptor_is_64b = true; 133 cfg.thread_storage = tsd; 134 cfg.state = rsd; 135 cfg.uniform_buffers = ubos; 136 cfg.push_uniforms = push_constants; 137 cfg.position = dst_coords; 138 if (src_coords) { 139 panvk_meta_copy_emit_varying(pool, src_coords, 140 &cfg.varying_buffers, 141 &cfg.varyings); 142 } 143 cfg.viewport = vpd; 144 cfg.textures = texture; 145 cfg.samplers = sampler; 146 } 147} 148 149static struct panfrost_ptr 150panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool, 151 struct pan_scoreboard *scoreboard, 152 mali_ptr src_coords, mali_ptr dst_coords, 153 mali_ptr texture, mali_ptr sampler, 154 mali_ptr ubo, mali_ptr push_constants, 155 mali_ptr vpd, mali_ptr rsd, 156 mali_ptr tsd, mali_ptr tiler) 157{ 158 struct panfrost_ptr job = 159 pan_pool_alloc_desc(desc_pool, TILER_JOB); 160 161 panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords, 162 texture, sampler, vpd, tsd, rsd, ubo, push_constants, 163 pan_section_ptr(job.cpu, TILER_JOB, DRAW)); 164 165 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) { 166 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; 167 cfg.index_count = 4; 168 cfg.job_task_split = 6; 169 } 170 171 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) { 172 cfg.constant = 1.0f; 173 } 174 175 void *invoc = pan_section_ptr(job.cpu, 176 TILER_JOB, 177 INVOCATION); 178 panfrost_pack_work_groups_compute(invoc, 1, 4, 179 1, 1, 1, 1, true, false); 180 181#if PAN_ARCH >= 6 182 pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg); 183 pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) { 184 cfg.address = tiler; 185 } 186#endif 187 188 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER, 189 false, false, 0, 0, &job, false); 190 return job; 191} 192 193static struct panfrost_ptr 194panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool, 195 struct pan_scoreboard *scoreboard, 196 const struct pan_compute_dim *num_wg, 197 const struct pan_compute_dim *wg_sz, 198 mali_ptr texture, mali_ptr sampler, 199 mali_ptr ubo, mali_ptr push_constants, 200 mali_ptr rsd, mali_ptr tsd) 201{ 202 struct panfrost_ptr job = 203 pan_pool_alloc_desc(desc_pool, COMPUTE_JOB); 204 205 void *invoc = pan_section_ptr(job.cpu, 206 COMPUTE_JOB, 207 INVOCATION); 208 panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z, 209 wg_sz->x, wg_sz->y, wg_sz->z, 210 false, false); 211 212 pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { 213 cfg.job_task_split = 8; 214 } 215 216 panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler, 217 0, tsd, rsd, ubo, push_constants, 218 pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW)); 219 220 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE, 221 false, false, 0, 0, &job, false); 222 return job; 223} 224 225 226#if PAN_ARCH >= 6 227static uint32_t 228panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize) 229{ 230 switch (texelsize) { 231 case 6: return MALI_RGB16UI << 12; 232 case 8: return MALI_RG32UI << 12; 233 case 12: return MALI_RGB32UI << 12; 234 case 16: return MALI_RGBA32UI << 12; 235 default: unreachable("Invalid texel size\n"); 236 } 237} 238#endif 239 240static mali_ptr 241panvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev, 242 struct pan_pool *desc_pool, 243 mali_ptr shader, 244 const struct pan_shader_info *shader_info, 245 enum pipe_format fmt, unsigned wrmask, 246 bool from_img) 247{ 248 struct panfrost_ptr rsd_ptr = 249 pan_pool_alloc_desc_aggregate(desc_pool, 250 PAN_DESC(RENDERER_STATE), 251 PAN_DESC_ARRAY(1, BLEND)); 252 253 bool raw = util_format_get_blocksize(fmt) > 4; 254 unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1; 255 bool partialwrite = fullmask != wrmask && !raw; 256 bool readstb = fullmask != wrmask && raw; 257 258 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { 259 pan_shader_prepare_rsd(shader_info, shader, &cfg); 260 if (from_img) { 261 cfg.shader.varying_count = 1; 262 cfg.shader.texture_count = 1; 263 cfg.shader.sampler_count = 1; 264 } 265 cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; 266 cfg.multisample_misc.sample_mask = UINT16_MAX; 267 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS; 268 cfg.stencil_mask_misc.stencil_mask_front = 0xFF; 269 cfg.stencil_mask_misc.stencil_mask_back = 0xFF; 270 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS; 271 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE; 272 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE; 273 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE; 274 cfg.stencil_front.mask = 0xFF; 275 cfg.stencil_back = cfg.stencil_front; 276 277#if PAN_ARCH >= 6 278 cfg.properties.allow_forward_pixel_to_be_killed = true; 279 cfg.properties.allow_forward_pixel_to_kill = 280 !partialwrite && !readstb; 281 cfg.properties.zs_update_operation = 282 MALI_PIXEL_KILL_STRONG_EARLY; 283 cfg.properties.pixel_kill_operation = 284 MALI_PIXEL_KILL_FORCE_EARLY; 285#else 286 cfg.properties.shader_reads_tilebuffer = readstb; 287 cfg.properties.work_register_count = shader_info->work_reg_count; 288 cfg.properties.force_early_z = true; 289 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; 290#endif 291 } 292 293 pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) { 294 cfg.round_to_fb_precision = true; 295 cfg.load_destination = partialwrite; 296 cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; 297 cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; 298 cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; 299 cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; 300 cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; 301 cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; 302#if PAN_ARCH >= 6 303 cfg.internal.mode = 304 partialwrite ? 305 MALI_BLEND_MODE_FIXED_FUNCTION : 306 MALI_BLEND_MODE_OPAQUE; 307 cfg.equation.color_mask = partialwrite ? wrmask : 0xf; 308 cfg.internal.fixed_function.num_comps = 4; 309 if (!raw) { 310 cfg.internal.fixed_function.conversion.memory_format = 311 panfrost_format_to_bifrost_blend(pdev, fmt, false); 312 cfg.internal.fixed_function.conversion.register_format = 313 MALI_REGISTER_FILE_FORMAT_F32; 314 } else { 315 unsigned imgtexelsz = util_format_get_blocksize(fmt); 316 317 cfg.internal.fixed_function.conversion.memory_format = 318 panvk_meta_copy_img_bifrost_raw_format(imgtexelsz); 319 cfg.internal.fixed_function.conversion.register_format = 320 (imgtexelsz & 2) ? 321 MALI_REGISTER_FILE_FORMAT_U16 : 322 MALI_REGISTER_FILE_FORMAT_U32; 323 } 324#else 325 cfg.equation.color_mask = wrmask; 326#endif 327 } 328 329 return rsd_ptr.gpu; 330} 331 332static mali_ptr 333panvk_meta_copy_emit_ubo(struct panfrost_device *pdev, 334 struct pan_pool *pool, 335 void *data, unsigned size) 336{ 337 struct panfrost_ptr ubo = pan_pool_alloc_desc(pool, UNIFORM_BUFFER); 338 339 pan_pack(ubo.cpu, UNIFORM_BUFFER, cfg) { 340 cfg.entries = DIV_ROUND_UP(size, 16); 341 cfg.pointer = pan_pool_upload_aligned(pool, data, size, 16); 342 } 343 344 return ubo.gpu; 345} 346 347static mali_ptr 348panvk_meta_copy_emit_push_constants(struct panfrost_device *pdev, 349 const struct panfrost_ubo_push *pushmap, 350 struct pan_pool *pool, 351 const void *data, unsigned size) 352{ 353 assert(pushmap->count <= (size / 4)); 354 355 const uint32_t *in = data; 356 uint32_t pushvals[PAN_MAX_PUSH]; 357 358 for (unsigned i = 0; i < pushmap->count; i++) { 359 assert(i < ARRAY_SIZE(pushvals)); 360 assert(pushmap->words[i].ubo == 0); 361 assert(pushmap->words[i].offset < size); 362 pushvals[i] = in[pushmap->words[i].offset / 4]; 363 } 364 365 return pan_pool_upload_aligned(pool, pushvals, size, 16); 366} 367 368static mali_ptr 369panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev, 370 struct pan_pool *desc_pool, 371 mali_ptr shader, 372 const struct pan_shader_info *shader_info, 373 bool from_img) 374{ 375 struct panfrost_ptr rsd_ptr = 376 pan_pool_alloc_desc_aggregate(desc_pool, 377 PAN_DESC(RENDERER_STATE)); 378 379 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { 380 pan_shader_prepare_rsd(shader_info, shader, &cfg); 381 if (from_img) { 382 cfg.shader.texture_count = 1; 383 cfg.shader.sampler_count = 1; 384 } 385 } 386 387 return rsd_ptr.gpu; 388} 389 390static mali_ptr 391panvk_meta_copy_img2img_shader(struct panfrost_device *pdev, 392 struct pan_pool *bin_pool, 393 enum pipe_format srcfmt, 394 enum pipe_format dstfmt, unsigned dstmask, 395 unsigned texdim, bool texisarray, bool is_ms, 396 struct pan_shader_info *shader_info) 397{ 398 nir_builder b = 399 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 400 GENX(pan_shader_get_compiler_options)(), 401 "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)", 402 util_format_name(srcfmt), util_format_name(dstfmt), 403 texdim, texisarray ? "[]" : "", is_ms ? ",ms" : ""); 404 405 b.shader->info.internal = true; 406 407 nir_variable *coord_var = 408 nir_variable_create(b.shader, nir_var_shader_in, 409 glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray), 410 "coord"); 411 coord_var->data.location = VARYING_SLOT_TEX0; 412 nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var)); 413 414 nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1); 415 tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf; 416 tex->texture_index = 0; 417 tex->is_array = texisarray; 418 tex->dest_type = util_format_is_unorm(srcfmt) ? 419 nir_type_float32 : nir_type_uint32; 420 421 switch (texdim) { 422 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break; 423 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break; 424 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break; 425 default: unreachable("Invalid texture dimension"); 426 } 427 428 tex->src[0].src_type = nir_tex_src_coord; 429 tex->src[0].src = nir_src_for_ssa(coord); 430 tex->coord_components = texdim + texisarray; 431 432 if (is_ms) { 433 tex->src[1].src_type = nir_tex_src_ms_index; 434 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b)); 435 } 436 437 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 438 nir_alu_type_get_type_size(tex->dest_type), NULL); 439 nir_builder_instr_insert(&b, &tex->instr); 440 441 nir_ssa_def *texel = &tex->dest.ssa; 442 443 unsigned dstcompsz = 444 util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0); 445 unsigned ndstcomps = util_format_get_nr_components(dstfmt); 446 const struct glsl_type *outtype = NULL; 447 448 if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) { 449 nir_ssa_def *rgb = 450 nir_f2u32(&b, nir_fmul(&b, texel, 451 nir_vec3(&b, 452 nir_imm_float(&b, 31), 453 nir_imm_float(&b, 63), 454 nir_imm_float(&b, 31)))); 455 nir_ssa_def *rg = 456 nir_vec2(&b, 457 nir_ior(&b, nir_channel(&b, rgb, 0), 458 nir_ishl(&b, nir_channel(&b, rgb, 1), 459 nir_imm_int(&b, 5))), 460 nir_ior(&b, 461 nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3), 462 nir_ishl(&b, nir_channel(&b, rgb, 2), 463 nir_imm_int(&b, 3)))); 464 rg = nir_iand_imm(&b, rg, 255); 465 texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255); 466 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2); 467 } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) { 468 nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255)); 469 nir_ssa_def *rgb = 470 nir_vec3(&b, 471 nir_channel(&b, rg, 0), 472 nir_ior(&b, 473 nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5), 474 nir_ishl(&b, nir_channel(&b, rg, 1), 475 nir_imm_int(&b, 3))), 476 nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3)); 477 rgb = nir_iand(&b, rgb, 478 nir_vec3(&b, 479 nir_imm_int(&b, 31), 480 nir_imm_int(&b, 63), 481 nir_imm_int(&b, 31))); 482 texel = nir_fmul(&b, nir_u2f32(&b, rgb), 483 nir_vec3(&b, 484 nir_imm_float(&b, 1.0 / 31), 485 nir_imm_float(&b, 1.0 / 63), 486 nir_imm_float(&b, 1.0 / 31))); 487 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3); 488 } else { 489 assert(srcfmt == dstfmt); 490 enum glsl_base_type basetype; 491 if (util_format_is_unorm(dstfmt)) { 492 basetype = GLSL_TYPE_FLOAT; 493 } else if (dstcompsz == 16) { 494 basetype = GLSL_TYPE_UINT16; 495 } else { 496 assert(dstcompsz == 32); 497 basetype = GLSL_TYPE_UINT; 498 } 499 500 if (dstcompsz == 16) 501 texel = nir_u2u16(&b, texel); 502 503 texel = nir_channels(&b, texel, (1 << ndstcomps) - 1); 504 outtype = glsl_vector_type(basetype, ndstcomps); 505 } 506 507 nir_variable *out = 508 nir_variable_create(b.shader, nir_var_shader_out, outtype, "out"); 509 out->data.location = FRAG_RESULT_DATA0; 510 511 unsigned fullmask = (1 << ndstcomps) - 1; 512 if (dstcompsz > 8 && dstmask != fullmask) { 513 nir_ssa_def *oldtexel = nir_load_var(&b, out); 514 nir_ssa_def *dstcomps[4]; 515 516 for (unsigned i = 0; i < ndstcomps; i++) { 517 if (dstmask & BITFIELD_BIT(i)) 518 dstcomps[i] = nir_channel(&b, texel, i); 519 else 520 dstcomps[i] = nir_channel(&b, oldtexel, i); 521 } 522 523 texel = nir_vec(&b, dstcomps, ndstcomps); 524 } 525 526 nir_store_var(&b, out, texel, 0xff); 527 528 struct panfrost_compile_inputs inputs = { 529 .gpu_id = pdev->gpu_id, 530 .is_blit = true, 531 }; 532 533#if PAN_ARCH >= 6 534 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) { 535 cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12; 536 cfg.register_format = dstcompsz == 2 ? 537 MALI_REGISTER_FILE_FORMAT_U16 : 538 MALI_REGISTER_FILE_FORMAT_U32; 539 } 540 inputs.bifrost.static_rt_conv = true; 541#endif 542 543 struct util_dynarray binary; 544 545 util_dynarray_init(&binary, NULL); 546 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 547 548 shader_info->fs.sample_shading = is_ms; 549 550 mali_ptr shader = 551 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 552 PAN_ARCH >= 6 ? 128 : 64); 553 554 util_dynarray_fini(&binary); 555 ralloc_free(b.shader); 556 557 return shader; 558} 559 560static enum pipe_format 561panvk_meta_copy_img_format(enum pipe_format fmt) 562{ 563 /* We can't use a non-compressed format when handling a tiled/AFBC 564 * compressed format because the tile size differ (4x4 blocks for 565 * compressed formats and 16x16 texels for non-compressed ones). 566 */ 567 assert(!util_format_is_compressed(fmt)); 568 569 /* Pick blendable formats when we can, otherwise pick the UINT variant 570 * matching the texel size. 571 */ 572 switch (util_format_get_blocksize(fmt)) { 573 case 16: return PIPE_FORMAT_R32G32B32A32_UINT; 574 case 12: return PIPE_FORMAT_R32G32B32_UINT; 575 case 8: return PIPE_FORMAT_R32G32_UINT; 576 case 6: return PIPE_FORMAT_R16G16B16_UINT; 577 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM; 578 case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM || 579 fmt == PIPE_FORMAT_B5G6R5_UNORM) ? 580 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM; 581 case 1: return PIPE_FORMAT_R8_UNORM; 582 default: unreachable("Unsupported format\n"); 583 } 584} 585 586struct panvk_meta_copy_img2img_format_info { 587 enum pipe_format srcfmt; 588 enum pipe_format dstfmt; 589 unsigned dstmask; 590}; 591 592static const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = { 593 { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1}, 594 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7}, 595 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3}, 596 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7}, 597 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3}, 598 /* Z24S8(depth) */ 599 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 }, 600 /* Z24S8(stencil) */ 601 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 }, 602 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf }, 603 { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 }, 604 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 }, 605 /* Z32S8X24(depth) */ 606 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 }, 607 /* Z32S8X24(stencil) */ 608 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 }, 609 { PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 }, 610 { PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf }, 611}; 612 613static unsigned 614panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key) 615{ 616 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS); 617 618 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) { 619 if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key))) 620 return i; 621 } 622 623 unreachable("Invalid image format\n"); 624} 625 626static unsigned 627panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask) 628{ 629 if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT && 630 aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) { 631 enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt); 632 633 return (1 << util_format_get_nr_components(outfmt)) - 1; 634 } 635 636 switch (imgfmt) { 637 case PIPE_FORMAT_S8_UINT: 638 return 1; 639 case PIPE_FORMAT_Z16_UNORM: 640 return 3; 641 case PIPE_FORMAT_Z16_UNORM_S8_UINT: 642 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8; 643 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 644 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8; 645 case PIPE_FORMAT_Z24X8_UNORM: 646 assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT); 647 return 7; 648 case PIPE_FORMAT_Z32_FLOAT: 649 return 0xf; 650 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 651 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2; 652 default: 653 unreachable("Invalid depth format\n"); 654 } 655} 656 657static void 658panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf, 659 const struct panvk_image *src, 660 const struct panvk_image *dst, 661 const VkImageCopy *region) 662{ 663 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 664 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 665 struct panvk_meta_copy_img2img_format_info key = { 666 .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format), 667 .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format), 668 .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format, 669 region->dstSubresource.aspectMask), 670 }; 671 672 assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples); 673 674 unsigned texdimidx = 675 panvk_meta_copy_tex_type(src->pimage.layout.dim, 676 src->pimage.layout.array_size > 1); 677 unsigned fmtidx = 678 panvk_meta_copy_img2img_format_idx(key); 679 unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0; 680 681 mali_ptr rsd = 682 cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd; 683 684 struct pan_image_view srcview = { 685 .format = key.srcfmt, 686 .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ? 687 MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim, 688 .image = &src->pimage, 689 .nr_samples = src->pimage.layout.nr_samples, 690 .first_level = region->srcSubresource.mipLevel, 691 .last_level = region->srcSubresource.mipLevel, 692 .first_layer = region->srcSubresource.baseArrayLayer, 693 .last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1, 694 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 695 }; 696 697 struct pan_image_view dstview = { 698 .format = key.dstfmt, 699 .dim = MALI_TEXTURE_DIMENSION_2D, 700 .image = &dst->pimage, 701 .nr_samples = dst->pimage.layout.nr_samples, 702 .first_level = region->dstSubresource.mipLevel, 703 .last_level = region->dstSubresource.mipLevel, 704 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 705 }; 706 707 unsigned minx = MAX2(region->dstOffset.x, 0); 708 unsigned miny = MAX2(region->dstOffset.y, 0); 709 unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0); 710 unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0); 711 712 mali_ptr vpd = 713 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base, 714 minx, miny, maxx, maxy); 715 716 float dst_rect[] = { 717 minx, miny, 0.0, 1.0, 718 maxx + 1, miny, 0.0, 1.0, 719 minx, maxy + 1, 0.0, 1.0, 720 maxx + 1, maxy + 1, 0.0, 1.0, 721 }; 722 723 mali_ptr dst_coords = 724 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect, 725 sizeof(dst_rect), 64); 726 727 /* TODO: don't force preloads of dst resources if unneeded */ 728 729 unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel); 730 unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel); 731 cmdbuf->state.fb.crc_valid[0] = false; 732 *fbinfo = (struct pan_fb_info){ 733 .width = width, 734 .height = height, 735 .extent.minx = minx & ~31, 736 .extent.miny = miny & ~31, 737 .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1, 738 .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1, 739 .nr_samples = dst->pimage.layout.nr_samples, 740 .rt_count = 1, 741 .rts[0].view = &dstview, 742 .rts[0].preload = true, 743 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0], 744 }; 745 746 mali_ptr texture = 747 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview); 748 mali_ptr sampler = 749 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base); 750 751 panvk_per_arch(cmd_close_batch)(cmdbuf); 752 753 minx = MAX2(region->srcOffset.x, 0); 754 miny = MAX2(region->srcOffset.y, 0); 755 maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0); 756 maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0); 757 assert(region->dstOffset.z >= 0); 758 759 unsigned first_src_layer = MAX2(0, region->srcOffset.z); 760 unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z); 761 unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth); 762 for (unsigned l = 0; l < nlayers; l++) { 763 unsigned src_l = l + first_src_layer; 764 float src_rect[] = { 765 minx, miny, src_l, 1.0, 766 maxx + 1, miny, src_l, 1.0, 767 minx, maxy + 1, src_l, 1.0, 768 maxx + 1, maxy + 1, src_l, 1.0, 769 }; 770 771 mali_ptr src_coords = 772 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect, 773 sizeof(src_rect), 64); 774 775 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 776 777 dstview.first_layer = dstview.last_layer = l + first_dst_layer; 778 batch->blit.src = src->pimage.data.bo; 779 batch->blit.dst = dst->pimage.data.bo; 780 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); 781 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); 782 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf); 783 784 mali_ptr tsd, tiler; 785 786#if PAN_ARCH >= 6 787 tsd = batch->tls.gpu; 788 tiler = batch->tiler.descs.gpu; 789#else 790 tsd = batch->fb.desc.gpu; 791 tiler = 0; 792#endif 793 794 struct panfrost_ptr job; 795 796 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, 797 &batch->scoreboard, 798 src_coords, dst_coords, 799 texture, sampler, 0, 0, 800 vpd, rsd, tsd, tiler); 801 802 util_dynarray_append(&batch->jobs, void *, job.cpu); 803 panvk_per_arch(cmd_close_batch)(cmdbuf); 804 } 805} 806 807static void 808panvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms) 809{ 810 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS); 811 812 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) { 813 for (unsigned texdim = 1; texdim <= 3; texdim++) { 814 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false); 815 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0])); 816 817 /* No MSAA on 3D textures */ 818 if (texdim == 3 && is_ms) continue; 819 820 struct pan_shader_info shader_info; 821 mali_ptr shader = 822 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base, 823 panvk_meta_copy_img2img_fmts[i].srcfmt, 824 panvk_meta_copy_img2img_fmts[i].dstfmt, 825 panvk_meta_copy_img2img_fmts[i].dstmask, 826 texdim, false, is_ms, &shader_info); 827 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd = 828 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 829 shader, &shader_info, 830 panvk_meta_copy_img2img_fmts[i].dstfmt, 831 panvk_meta_copy_img2img_fmts[i].dstmask, 832 true); 833 if (texdim == 3) 834 continue; 835 836 memset(&shader_info, 0, sizeof(shader_info)); 837 texdimidx = panvk_meta_copy_tex_type(texdim, true); 838 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0])); 839 shader = 840 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base, 841 panvk_meta_copy_img2img_fmts[i].srcfmt, 842 panvk_meta_copy_img2img_fmts[i].dstfmt, 843 panvk_meta_copy_img2img_fmts[i].dstmask, 844 texdim, true, is_ms, &shader_info); 845 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd = 846 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 847 shader, &shader_info, 848 panvk_meta_copy_img2img_fmts[i].dstfmt, 849 panvk_meta_copy_img2img_fmts[i].dstmask, 850 true); 851 } 852 } 853} 854 855void 856panvk_per_arch(CmdCopyImage)(VkCommandBuffer commandBuffer, 857 VkImage srcImage, 858 VkImageLayout srcImageLayout, 859 VkImage destImage, 860 VkImageLayout destImageLayout, 861 uint32_t regionCount, 862 const VkImageCopy *pRegions) 863{ 864 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 865 VK_FROM_HANDLE(panvk_image, dst, destImage); 866 VK_FROM_HANDLE(panvk_image, src, srcImage); 867 868 for (unsigned i = 0; i < regionCount; i++) { 869 panvk_meta_copy_img2img(cmdbuf, src, dst, &pRegions[i]); 870 } 871} 872 873static unsigned 874panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask) 875{ 876 unsigned imgtexelsz = util_format_get_blocksize(imgfmt); 877 unsigned nbufcomps = util_bitcount(mask); 878 879 if (nbufcomps == util_format_get_nr_components(imgfmt)) 880 return imgtexelsz; 881 882 /* Special case for Z24 buffers which are not tightly packed */ 883 if (mask == 7 && imgtexelsz == 4) 884 return 4; 885 886 /* Special case for S8 extraction from Z32_S8X24 */ 887 if (mask == 2 && imgtexelsz == 8) 888 return 1; 889 890 unsigned compsz = 891 util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0); 892 893 assert(!(compsz % 8)); 894 895 return nbufcomps * compsz / 8; 896} 897 898static enum pipe_format 899panvk_meta_copy_buf2img_format(enum pipe_format imgfmt) 900{ 901 /* Pick blendable formats when we can, and the FLOAT variant matching the 902 * texelsize otherwise. 903 */ 904 switch (util_format_get_blocksize(imgfmt)) { 905 case 1: return PIPE_FORMAT_R8_UNORM; 906 /* AFBC stores things differently for RGB565, 907 * we can't simply map to R8G8 in that case */ 908 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM || 909 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ? 910 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM; 911 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM; 912 case 6: return PIPE_FORMAT_R16G16B16_UINT; 913 case 8: return PIPE_FORMAT_R32G32_UINT; 914 case 12: return PIPE_FORMAT_R32G32B32_UINT; 915 case 16: return PIPE_FORMAT_R32G32B32A32_UINT; 916 default: unreachable("Invalid format\n"); 917 } 918} 919 920struct panvk_meta_copy_format_info { 921 enum pipe_format imgfmt; 922 unsigned mask; 923}; 924 925static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = { 926 { PIPE_FORMAT_R8_UNORM, 0x1 }, 927 { PIPE_FORMAT_R8G8_UNORM, 0x3 }, 928 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 }, 929 { PIPE_FORMAT_R8G8B8A8_UNORM, 0xf }, 930 { PIPE_FORMAT_R16G16B16_UINT, 0x7 }, 931 { PIPE_FORMAT_R32G32_UINT, 0x3 }, 932 { PIPE_FORMAT_R32G32B32_UINT, 0x7 }, 933 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf }, 934 /* S8 -> Z24S8 */ 935 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 }, 936 /* S8 -> Z32_S8X24 */ 937 { PIPE_FORMAT_R32G32_UINT, 0x2 }, 938 /* Z24X8 -> Z24S8 */ 939 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 }, 940 /* Z32 -> Z32_S8X24 */ 941 { PIPE_FORMAT_R32G32_UINT, 0x1 }, 942}; 943 944struct panvk_meta_copy_buf2img_info { 945 struct { 946 mali_ptr ptr; 947 struct { 948 unsigned line; 949 unsigned surf; 950 } stride; 951 } buf; 952}; 953 954#define panvk_meta_copy_buf2img_get_info_field(b, field) \ 955 nir_load_ubo((b), 1, \ 956 sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \ 957 nir_imm_int(b, 0), \ 958 nir_imm_int(b, offsetof(struct panvk_meta_copy_buf2img_info, field)), \ 959 .align_mul = 4, \ 960 .align_offset = 0, \ 961 .range_base = 0, \ 962 .range = ~0) 963 964static mali_ptr 965panvk_meta_copy_buf2img_shader(struct panfrost_device *pdev, 966 struct pan_pool *bin_pool, 967 struct panvk_meta_copy_format_info key, 968 struct pan_shader_info *shader_info) 969{ 970 nir_builder b = 971 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 972 GENX(pan_shader_get_compiler_options)(), 973 "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)", 974 util_format_name(key.imgfmt), 975 key.mask); 976 977 b.shader->info.internal = true; 978 b.shader->info.num_ubos = 1; 979 980 nir_variable *coord_var = 981 nir_variable_create(b.shader, nir_var_shader_in, 982 glsl_vector_type(GLSL_TYPE_FLOAT, 3), 983 "coord"); 984 coord_var->data.location = VARYING_SLOT_TEX0; 985 nir_ssa_def *coord = nir_load_var(&b, coord_var); 986 987 coord = nir_f2u32(&b, coord); 988 989 nir_ssa_def *bufptr = 990 panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr); 991 nir_ssa_def *buflinestride = 992 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line); 993 nir_ssa_def *bufsurfstride = 994 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf); 995 996 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt); 997 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 998 unsigned writemask = key.mask; 999 1000 nir_ssa_def *offset = 1001 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz)); 1002 offset = nir_iadd(&b, offset, 1003 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride)); 1004 offset = nir_iadd(&b, offset, 1005 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride)); 1006 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset)); 1007 1008 unsigned imgcompsz = 1009 (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ? 1010 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4); 1011 1012 unsigned nimgcomps = imgtexelsz / imgcompsz; 1013 unsigned bufcompsz = MIN2(buftexelsz, imgcompsz); 1014 unsigned nbufcomps = buftexelsz / bufcompsz; 1015 1016 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4); 1017 assert(nbufcomps <= 4 && nimgcomps <= 4); 1018 1019 nir_ssa_def *texel = 1020 nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8); 1021 1022 enum glsl_base_type basetype; 1023 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) { 1024 texel = nir_vec3(&b, 1025 nir_iand_imm(&b, texel, BITFIELD_MASK(5)), 1026 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)), 1027 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5))); 1028 texel = nir_fmul(&b, 1029 nir_u2f32(&b, texel), 1030 nir_vec3(&b, 1031 nir_imm_float(&b, 1.0f / 31), 1032 nir_imm_float(&b, 1.0f / 63), 1033 nir_imm_float(&b, 1.0f / 31))); 1034 nimgcomps = 3; 1035 basetype = GLSL_TYPE_FLOAT; 1036 } else if (imgcompsz == 1) { 1037 assert(bufcompsz == 1); 1038 /* Blendable formats are unorm and the fixed-function blend unit 1039 * takes float values. 1040 */ 1041 texel = nir_fmul(&b, nir_u2f32(&b, texel), 1042 nir_imm_float(&b, 1.0f / 255)); 1043 basetype = GLSL_TYPE_FLOAT; 1044 } else { 1045 texel = nir_u2uN(&b, texel, imgcompsz * 8); 1046 basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT; 1047 } 1048 1049 /* We always pass the texel using 32-bit regs for now */ 1050 nir_variable *out = 1051 nir_variable_create(b.shader, nir_var_shader_out, 1052 glsl_vector_type(basetype, nimgcomps), 1053 "out"); 1054 out->data.location = FRAG_RESULT_DATA0; 1055 1056 uint16_t fullmask = (1 << nimgcomps) - 1; 1057 1058 assert(fullmask >= writemask); 1059 1060 if (fullmask != writemask) { 1061 unsigned first_written_comp = ffs(writemask) - 1; 1062 nir_ssa_def *oldtexel = NULL; 1063 if (imgcompsz > 1) 1064 oldtexel = nir_load_var(&b, out); 1065 1066 nir_ssa_def *texel_comps[4]; 1067 for (unsigned i = 0; i < nimgcomps; i++) { 1068 if (writemask & BITFIELD_BIT(i)) 1069 texel_comps[i] = nir_channel(&b, texel, i - first_written_comp); 1070 else if (imgcompsz > 1) 1071 texel_comps[i] = nir_channel(&b, oldtexel, i); 1072 else 1073 texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size); 1074 } 1075 1076 texel = nir_vec(&b, texel_comps, nimgcomps); 1077 } 1078 1079 nir_store_var(&b, out, texel, 0xff); 1080 1081 struct panfrost_compile_inputs inputs = { 1082 .gpu_id = pdev->gpu_id, 1083 .is_blit = true, 1084 }; 1085 1086#if PAN_ARCH >= 6 1087 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) { 1088 cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12; 1089 cfg.register_format = imgcompsz == 2 ? 1090 MALI_REGISTER_FILE_FORMAT_U16 : 1091 MALI_REGISTER_FILE_FORMAT_U32; 1092 } 1093 inputs.bifrost.static_rt_conv = true; 1094#endif 1095 1096 struct util_dynarray binary; 1097 1098 util_dynarray_init(&binary, NULL); 1099 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1100 1101 /* Make sure UBO words have been upgraded to push constants */ 1102 assert(shader_info->ubo_count == 1); 1103 1104 mali_ptr shader = 1105 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 1106 PAN_ARCH >= 6 ? 128 : 64); 1107 1108 util_dynarray_fini(&binary); 1109 ralloc_free(b.shader); 1110 1111 return shader; 1112} 1113 1114static unsigned 1115panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key) 1116{ 1117 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) { 1118 if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key))) 1119 return i; 1120 } 1121 1122 unreachable("Invalid image format\n"); 1123} 1124 1125static void 1126panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf, 1127 const struct panvk_buffer *buf, 1128 const struct panvk_image *img, 1129 const VkBufferImageCopy *region) 1130{ 1131 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 1132 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 1133 unsigned minx = MAX2(region->imageOffset.x, 0); 1134 unsigned miny = MAX2(region->imageOffset.y, 0); 1135 unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0); 1136 unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0); 1137 1138 mali_ptr vpd = 1139 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base, 1140 minx, miny, maxx, maxy); 1141 1142 float dst_rect[] = { 1143 minx, miny, 0.0, 1.0, 1144 maxx + 1, miny, 0.0, 1.0, 1145 minx, maxy + 1, 0.0, 1.0, 1146 maxx + 1, maxy + 1, 0.0, 1.0, 1147 }; 1148 mali_ptr dst_coords = 1149 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect, 1150 sizeof(dst_rect), 64); 1151 1152 struct panvk_meta_copy_format_info key = { 1153 .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format), 1154 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format, 1155 region->imageSubresource.aspectMask), 1156 }; 1157 1158 unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key); 1159 1160 mali_ptr rsd = 1161 cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd; 1162 const struct panfrost_ubo_push *pushmap = 1163 &cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].pushmap; 1164 1165 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 1166 struct panvk_meta_copy_buf2img_info info = { 1167 .buf.ptr = buf->bo->ptr.gpu + buf->bo_offset + region->bufferOffset, 1168 .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz, 1169 }; 1170 1171 info.buf.stride.surf = 1172 (region->bufferImageHeight ? : region->imageExtent.height) * info.buf.stride.line; 1173 1174 mali_ptr pushconsts = 1175 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base, 1176 &info, sizeof(info)); 1177 mali_ptr ubo = 1178 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info)); 1179 1180 struct pan_image_view view = { 1181 .format = key.imgfmt, 1182 .dim = MALI_TEXTURE_DIMENSION_2D, 1183 .image = &img->pimage, 1184 .nr_samples = img->pimage.layout.nr_samples, 1185 .first_level = region->imageSubresource.mipLevel, 1186 .last_level = region->imageSubresource.mipLevel, 1187 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 1188 }; 1189 1190 /* TODO: don't force preloads of dst resources if unneeded */ 1191 cmdbuf->state.fb.crc_valid[0] = false; 1192 *fbinfo = (struct pan_fb_info){ 1193 .width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel), 1194 .height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel), 1195 .extent.minx = minx, 1196 .extent.maxx = maxx, 1197 .extent.miny = miny, 1198 .extent.maxy = maxy, 1199 .nr_samples = 1, 1200 .rt_count = 1, 1201 .rts[0].view = &view, 1202 .rts[0].preload = true, 1203 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0], 1204 }; 1205 1206 panvk_per_arch(cmd_close_batch)(cmdbuf); 1207 1208 assert(region->imageSubresource.layerCount == 1 || 1209 region->imageExtent.depth == 1); 1210 assert(region->imageOffset.z >= 0); 1211 unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z); 1212 unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth); 1213 for (unsigned l = 0; l < nlayers; l++) { 1214 float src_rect[] = { 1215 0, 0, l, 1.0, 1216 region->imageExtent.width, 0, l, 1.0, 1217 0, region->imageExtent.height, l, 1.0, 1218 region->imageExtent.width, region->imageExtent.height, l, 1.0, 1219 }; 1220 1221 mali_ptr src_coords = 1222 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect, 1223 sizeof(src_rect), 64); 1224 1225 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1226 1227 view.first_layer = view.last_layer = l + first_layer; 1228 batch->blit.src = buf->bo; 1229 batch->blit.dst = img->pimage.data.bo; 1230 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); 1231 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); 1232 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf); 1233 1234 mali_ptr tsd, tiler; 1235 1236#if PAN_ARCH >= 6 1237 tsd = batch->tls.gpu; 1238 tiler = batch->tiler.descs.gpu; 1239#else 1240 tsd = batch->fb.desc.gpu; 1241 tiler = 0; 1242#endif 1243 1244 struct panfrost_ptr job; 1245 1246 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, 1247 &batch->scoreboard, 1248 src_coords, dst_coords, 1249 0, 0, ubo, pushconsts, 1250 vpd, rsd, tsd, tiler); 1251 1252 util_dynarray_append(&batch->jobs, void *, job.cpu); 1253 panvk_per_arch(cmd_close_batch)(cmdbuf); 1254 } 1255} 1256 1257static void 1258panvk_meta_copy_buf2img_init(struct panvk_physical_device *dev) 1259{ 1260 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS); 1261 1262 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) { 1263 struct pan_shader_info shader_info; 1264 mali_ptr shader = 1265 panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base, 1266 panvk_meta_copy_buf2img_fmts[i], 1267 &shader_info); 1268 dev->meta.copy.buf2img[i].pushmap = shader_info.push; 1269 dev->meta.copy.buf2img[i].rsd = 1270 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 1271 shader, &shader_info, 1272 panvk_meta_copy_buf2img_fmts[i].imgfmt, 1273 panvk_meta_copy_buf2img_fmts[i].mask, 1274 false); 1275 } 1276} 1277 1278void 1279panvk_per_arch(CmdCopyBufferToImage)(VkCommandBuffer commandBuffer, 1280 VkBuffer srcBuffer, 1281 VkImage destImage, 1282 VkImageLayout destImageLayout, 1283 uint32_t regionCount, 1284 const VkBufferImageCopy *pRegions) 1285{ 1286 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1287 VK_FROM_HANDLE(panvk_buffer, buf, srcBuffer); 1288 VK_FROM_HANDLE(panvk_image, img, destImage); 1289 1290 for (unsigned i = 0; i < regionCount; i++) { 1291 panvk_meta_copy_buf2img(cmdbuf, buf, img, &pRegions[i]); 1292 } 1293} 1294 1295static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = { 1296 { PIPE_FORMAT_R8_UINT, 0x1 }, 1297 { PIPE_FORMAT_R8G8_UINT, 0x3 }, 1298 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 }, 1299 { PIPE_FORMAT_R8G8B8A8_UINT, 0xf }, 1300 { PIPE_FORMAT_R16G16B16_UINT, 0x7 }, 1301 { PIPE_FORMAT_R32G32_UINT, 0x3 }, 1302 { PIPE_FORMAT_R32G32B32_UINT, 0x7 }, 1303 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf }, 1304 /* S8 -> Z24S8 */ 1305 { PIPE_FORMAT_R8G8B8A8_UINT, 0x8 }, 1306 /* S8 -> Z32_S8X24 */ 1307 { PIPE_FORMAT_R32G32_UINT, 0x2 }, 1308 /* Z24X8 -> Z24S8 */ 1309 { PIPE_FORMAT_R8G8B8A8_UINT, 0x7 }, 1310 /* Z32 -> Z32_S8X24 */ 1311 { PIPE_FORMAT_R32G32_UINT, 0x1 }, 1312}; 1313 1314static enum pipe_format 1315panvk_meta_copy_img2buf_format(enum pipe_format imgfmt) 1316{ 1317 /* Pick blendable formats when we can, and the FLOAT variant matching the 1318 * texelsize otherwise. 1319 */ 1320 switch (util_format_get_blocksize(imgfmt)) { 1321 case 1: return PIPE_FORMAT_R8_UINT; 1322 /* AFBC stores things differently for RGB565, 1323 * we can't simply map to R8G8 in that case */ 1324 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM || 1325 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ? 1326 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT; 1327 case 4: return PIPE_FORMAT_R8G8B8A8_UINT; 1328 case 6: return PIPE_FORMAT_R16G16B16_UINT; 1329 case 8: return PIPE_FORMAT_R32G32_UINT; 1330 case 12: return PIPE_FORMAT_R32G32B32_UINT; 1331 case 16: return PIPE_FORMAT_R32G32B32A32_UINT; 1332 default: unreachable("Invalid format\n"); 1333 } 1334} 1335 1336struct panvk_meta_copy_img2buf_info { 1337 struct { 1338 mali_ptr ptr; 1339 struct { 1340 unsigned line; 1341 unsigned surf; 1342 } stride; 1343 } buf; 1344 struct { 1345 struct { 1346 unsigned x, y, z; 1347 } offset; 1348 struct { 1349 unsigned minx, miny, maxx, maxy; 1350 } extent; 1351 } img; 1352}; 1353 1354#define panvk_meta_copy_img2buf_get_info_field(b, field) \ 1355 nir_load_ubo((b), 1, \ 1356 sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \ 1357 nir_imm_int(b, 0), \ 1358 nir_imm_int(b, offsetof(struct panvk_meta_copy_img2buf_info, field)), \ 1359 .align_mul = 4, \ 1360 .align_offset = 0, \ 1361 .range_base = 0, \ 1362 .range = ~0) 1363 1364static mali_ptr 1365panvk_meta_copy_img2buf_shader(struct panfrost_device *pdev, 1366 struct pan_pool *bin_pool, 1367 struct panvk_meta_copy_format_info key, 1368 unsigned texdim, unsigned texisarray, 1369 struct pan_shader_info *shader_info) 1370{ 1371 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt); 1372 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 1373 1374 /* FIXME: Won't work on compute queues, but we can't do that with 1375 * a compute shader if the destination is an AFBC surface. 1376 */ 1377 nir_builder b = 1378 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 1379 GENX(pan_shader_get_compiler_options)(), 1380 "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)", 1381 texdim, texisarray ? "[]" : "", 1382 util_format_name(key.imgfmt), 1383 key.mask); 1384 1385 b.shader->info.internal = true; 1386 b.shader->info.num_ubos = 1; 1387 1388 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32); 1389 nir_ssa_def *bufptr = 1390 panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr); 1391 nir_ssa_def *buflinestride = 1392 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line); 1393 nir_ssa_def *bufsurfstride = 1394 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf); 1395 1396 nir_ssa_def *imgminx = 1397 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx); 1398 nir_ssa_def *imgminy = 1399 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny); 1400 nir_ssa_def *imgmaxx = 1401 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx); 1402 nir_ssa_def *imgmaxy = 1403 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy); 1404 1405 nir_ssa_def *imgcoords, *inbounds; 1406 1407 switch (texdim + texisarray) { 1408 case 1: 1409 imgcoords = 1410 nir_iadd(&b, 1411 nir_channel(&b, coord, 0), 1412 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)); 1413 inbounds = 1414 nir_iand(&b, 1415 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), 1416 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx)); 1417 break; 1418 case 2: 1419 imgcoords = 1420 nir_vec2(&b, 1421 nir_iadd(&b, 1422 nir_channel(&b, coord, 0), 1423 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)), 1424 nir_iadd(&b, 1425 nir_channel(&b, coord, 1), 1426 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y))); 1427 inbounds = 1428 nir_iand(&b, 1429 nir_iand(&b, 1430 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), 1431 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))), 1432 nir_iand(&b, 1433 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx), 1434 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy))); 1435 break; 1436 case 3: 1437 imgcoords = 1438 nir_vec3(&b, 1439 nir_iadd(&b, 1440 nir_channel(&b, coord, 0), 1441 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)), 1442 nir_iadd(&b, 1443 nir_channel(&b, coord, 1), 1444 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)), 1445 nir_iadd(&b, 1446 nir_channel(&b, coord, 2), 1447 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y))); 1448 inbounds = 1449 nir_iand(&b, 1450 nir_iand(&b, 1451 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), 1452 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))), 1453 nir_iand(&b, 1454 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx), 1455 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy))); 1456 break; 1457 default: 1458 unreachable("Invalid texture dimension\n"); 1459 } 1460 1461 nir_push_if(&b, inbounds); 1462 1463 /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4 1464 * blocks instead of 16x16 texels in that case, and there's nothing we can 1465 * do to force the tile size to 4x4 in the render path. 1466 * This being said, compressed textures are not compatible with AFBC, so we 1467 * could use a compute shader arranging the blocks properly. 1468 */ 1469 nir_ssa_def *offset = 1470 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz)); 1471 offset = nir_iadd(&b, offset, 1472 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride)); 1473 offset = nir_iadd(&b, offset, 1474 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride)); 1475 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset)); 1476 1477 unsigned imgcompsz = imgtexelsz <= 4 ? 1478 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4); 1479 unsigned nimgcomps = imgtexelsz / imgcompsz; 1480 assert(nimgcomps <= 4); 1481 1482 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); 1483 tex->op = nir_texop_txf; 1484 tex->texture_index = 0; 1485 tex->is_array = texisarray; 1486 tex->dest_type = util_format_is_unorm(key.imgfmt) ? 1487 nir_type_float32 : nir_type_uint32; 1488 1489 switch (texdim) { 1490 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break; 1491 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break; 1492 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break; 1493 default: unreachable("Invalid texture dimension"); 1494 } 1495 1496 tex->src[0].src_type = nir_tex_src_coord; 1497 tex->src[0].src = nir_src_for_ssa(imgcoords); 1498 tex->coord_components = texdim + texisarray; 1499 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 1500 nir_alu_type_get_type_size(tex->dest_type), NULL); 1501 nir_builder_instr_insert(&b, &tex->instr); 1502 1503 nir_ssa_def *texel = &tex->dest.ssa; 1504 1505 unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1; 1506 unsigned nbufcomps = util_bitcount(fullmask); 1507 if (key.mask != fullmask) { 1508 nir_ssa_def *bufcomps[4]; 1509 nbufcomps = 0; 1510 for (unsigned i = 0; i < nimgcomps; i++) { 1511 if (key.mask & BITFIELD_BIT(i)) 1512 bufcomps[nbufcomps++] = nir_channel(&b, texel, i); 1513 } 1514 1515 texel = nir_vec(&b, bufcomps, nbufcomps); 1516 } 1517 1518 unsigned bufcompsz = buftexelsz / nbufcomps; 1519 1520 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) { 1521 texel = nir_fmul(&b, texel, 1522 nir_vec3(&b, 1523 nir_imm_float(&b, 31), 1524 nir_imm_float(&b, 63), 1525 nir_imm_float(&b, 31))); 1526 texel = nir_f2u16(&b, texel); 1527 texel = nir_ior(&b, nir_channel(&b, texel, 0), 1528 nir_ior(&b, 1529 nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)), 1530 nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11)))); 1531 imgcompsz = 2; 1532 bufcompsz = 2; 1533 nbufcomps = 1; 1534 nimgcomps = 1; 1535 } else if (imgcompsz == 1) { 1536 nir_ssa_def *packed = nir_channel(&b, texel, 0); 1537 for (unsigned i = 1; i < nbufcomps; i++) { 1538 packed = nir_ior(&b, packed, 1539 nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff), 1540 nir_imm_int(&b, i * 8))); 1541 } 1542 texel = packed; 1543 1544 bufcompsz = nbufcomps == 3 ? 4 : nbufcomps; 1545 nbufcomps = 1; 1546 } 1547 1548 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4); 1549 assert(nbufcomps <= 4 && nimgcomps <= 4); 1550 texel = nir_u2uN(&b, texel, bufcompsz * 8); 1551 1552 nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1); 1553 nir_pop_if(&b, NULL); 1554 1555 struct panfrost_compile_inputs inputs = { 1556 .gpu_id = pdev->gpu_id, 1557 .is_blit = true, 1558 }; 1559 1560 struct util_dynarray binary; 1561 1562 util_dynarray_init(&binary, NULL); 1563 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1564 1565 /* Make sure UBO words have been upgraded to push constants and everything 1566 * is at the right place. 1567 */ 1568 assert(shader_info->ubo_count == 1); 1569 assert(shader_info->push.count <= (sizeof(struct panvk_meta_copy_img2buf_info) / 4)); 1570 1571 mali_ptr shader = 1572 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 1573 PAN_ARCH >= 6 ? 128 : 64); 1574 1575 util_dynarray_fini(&binary); 1576 ralloc_free(b.shader); 1577 1578 return shader; 1579} 1580 1581static unsigned 1582panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key) 1583{ 1584 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) { 1585 if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key))) 1586 return i; 1587 } 1588 1589 unreachable("Invalid texel size\n"); 1590} 1591 1592static void 1593panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf, 1594 const struct panvk_buffer *buf, 1595 const struct panvk_image *img, 1596 const VkBufferImageCopy *region) 1597{ 1598 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 1599 struct panvk_meta_copy_format_info key = { 1600 .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format), 1601 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format, 1602 region->imageSubresource.aspectMask), 1603 }; 1604 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 1605 unsigned texdimidx = 1606 panvk_meta_copy_tex_type(img->pimage.layout.dim, 1607 img->pimage.layout.array_size > 1); 1608 unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key); 1609 1610 mali_ptr rsd = 1611 cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd; 1612 const struct panfrost_ubo_push *pushmap = 1613 &cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].pushmap; 1614 1615 struct panvk_meta_copy_img2buf_info info = { 1616 .buf.ptr = buf->bo->ptr.gpu + buf->bo_offset + region->bufferOffset, 1617 .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz, 1618 .img.offset.x = MAX2(region->imageOffset.x & ~15, 0), 1619 .img.extent.minx = MAX2(region->imageOffset.x, 0), 1620 .img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0), 1621 }; 1622 1623 if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) { 1624 info.img.extent.maxy = region->imageSubresource.layerCount - 1; 1625 } else { 1626 info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0); 1627 info.img.offset.z = MAX2(region->imageOffset.z, 0); 1628 info.img.extent.miny = MAX2(region->imageOffset.y, 0); 1629 info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0); 1630 } 1631 1632 info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) * 1633 info.buf.stride.line; 1634 1635 mali_ptr pushconsts = 1636 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base, 1637 &info, sizeof(info)); 1638 mali_ptr ubo = 1639 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info)); 1640 1641 struct pan_image_view view = { 1642 .format = key.imgfmt, 1643 .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ? 1644 MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim, 1645 .image = &img->pimage, 1646 .nr_samples = img->pimage.layout.nr_samples, 1647 .first_level = region->imageSubresource.mipLevel, 1648 .last_level = region->imageSubresource.mipLevel, 1649 .first_layer = region->imageSubresource.baseArrayLayer, 1650 .last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1, 1651 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 1652 }; 1653 1654 mali_ptr texture = 1655 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view); 1656 mali_ptr sampler = 1657 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base); 1658 1659 panvk_per_arch(cmd_close_batch)(cmdbuf); 1660 1661 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1662 1663 struct pan_tls_info tlsinfo = { 0 }; 1664 1665 batch->blit.src = img->pimage.data.bo; 1666 batch->blit.dst = buf->bo; 1667 batch->tls = 1668 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); 1669 GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu); 1670 1671 mali_ptr tsd = batch->tls.gpu; 1672 1673 struct pan_compute_dim wg_sz = { 1674 16, 1675 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16, 1676 1, 1677 }; 1678 1679 struct pan_compute_dim num_wg = { 1680 (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16, 1681 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1682 region->imageSubresource.layerCount : 1683 (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16, 1684 img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ? 1685 MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1, 1686 }; 1687 1688 struct panfrost_ptr job = 1689 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1690 &batch->scoreboard, &num_wg, &wg_sz, 1691 texture, sampler, 1692 ubo, pushconsts, 1693 rsd, tsd); 1694 1695 util_dynarray_append(&batch->jobs, void *, job.cpu); 1696 1697 panvk_per_arch(cmd_close_batch)(cmdbuf); 1698} 1699 1700static void 1701panvk_meta_copy_img2buf_init(struct panvk_physical_device *dev) 1702{ 1703 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS); 1704 1705 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) { 1706 for (unsigned texdim = 1; texdim <= 3; texdim++) { 1707 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false); 1708 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf)); 1709 1710 struct pan_shader_info shader_info; 1711 mali_ptr shader = 1712 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base, 1713 panvk_meta_copy_img2buf_fmts[i], 1714 texdim, false, &shader_info); 1715 dev->meta.copy.img2buf[texdimidx][i].pushmap = shader_info.push; 1716 dev->meta.copy.img2buf[texdimidx][i].rsd = 1717 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, 1718 &dev->meta.desc_pool.base, 1719 shader, &shader_info, true); 1720 1721 if (texdim == 3) 1722 continue; 1723 1724 memset(&shader_info, 0, sizeof(shader_info)); 1725 texdimidx = panvk_meta_copy_tex_type(texdim, true); 1726 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf)); 1727 shader = 1728 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base, 1729 panvk_meta_copy_img2buf_fmts[i], 1730 texdim, true, &shader_info); 1731 dev->meta.copy.img2buf[texdimidx][i].pushmap = shader_info.push; 1732 dev->meta.copy.img2buf[texdimidx][i].rsd = 1733 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, 1734 &dev->meta.desc_pool.base, 1735 shader, &shader_info, true); 1736 } 1737 } 1738} 1739 1740void 1741panvk_per_arch(CmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, 1742 VkImage srcImage, 1743 VkImageLayout srcImageLayout, 1744 VkBuffer destBuffer, 1745 uint32_t regionCount, 1746 const VkBufferImageCopy *pRegions) 1747{ 1748 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1749 VK_FROM_HANDLE(panvk_buffer, buf, destBuffer); 1750 VK_FROM_HANDLE(panvk_image, img, srcImage); 1751 1752 for (unsigned i = 0; i < regionCount; i++) { 1753 panvk_meta_copy_img2buf(cmdbuf, buf, img, &pRegions[i]); 1754 } 1755} 1756 1757struct panvk_meta_copy_buf2buf_info { 1758 mali_ptr src; 1759 mali_ptr dst; 1760}; 1761 1762#define panvk_meta_copy_buf2buf_get_info_field(b, field) \ 1763 nir_load_ubo((b), 1, \ 1764 sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \ 1765 nir_imm_int(b, 0), \ 1766 nir_imm_int(b, offsetof(struct panvk_meta_copy_buf2buf_info, field)), \ 1767 .align_mul = 4, \ 1768 .align_offset = 0, \ 1769 .range_base = 0, \ 1770 .range = ~0) 1771 1772static mali_ptr 1773panvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev, 1774 struct pan_pool *bin_pool, 1775 unsigned blksz, 1776 struct pan_shader_info *shader_info) 1777{ 1778 /* FIXME: Won't work on compute queues, but we can't do that with 1779 * a compute shader if the destination is an AFBC surface. 1780 */ 1781 nir_builder b = 1782 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 1783 GENX(pan_shader_get_compiler_options)(), 1784 "panvk_meta_copy_buf2buf(blksz=%d)", 1785 blksz); 1786 1787 b.shader->info.internal = true; 1788 b.shader->info.num_ubos = 1; 1789 1790 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32); 1791 1792 nir_ssa_def *offset = 1793 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz))); 1794 nir_ssa_def *srcptr = 1795 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset); 1796 nir_ssa_def *dstptr = 1797 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset); 1798 1799 unsigned compsz = blksz < 4 ? blksz : 4; 1800 unsigned ncomps = blksz / compsz; 1801 nir_store_global(&b, dstptr, blksz, 1802 nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8), 1803 (1 << ncomps) - 1); 1804 1805 struct panfrost_compile_inputs inputs = { 1806 .gpu_id = pdev->gpu_id, 1807 .is_blit = true, 1808 }; 1809 1810 struct util_dynarray binary; 1811 1812 util_dynarray_init(&binary, NULL); 1813 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1814 1815 /* Make sure UBO words have been upgraded to push constants and everything 1816 * is at the right place. 1817 */ 1818 assert(shader_info->ubo_count == 1); 1819 assert(shader_info->push.count == (sizeof(struct panvk_meta_copy_buf2buf_info) / 4)); 1820 1821 mali_ptr shader = 1822 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 1823 PAN_ARCH >= 6 ? 128 : 64); 1824 1825 util_dynarray_fini(&binary); 1826 ralloc_free(b.shader); 1827 1828 return shader; 1829} 1830 1831static void 1832panvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev) 1833{ 1834 for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) { 1835 struct pan_shader_info shader_info; 1836 mali_ptr shader = 1837 panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base, 1838 1 << i, &shader_info); 1839 dev->meta.copy.buf2buf[i].pushmap = shader_info.push; 1840 dev->meta.copy.buf2buf[i].rsd = 1841 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 1842 shader, &shader_info, false); 1843 } 1844} 1845 1846static void 1847panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf, 1848 const struct panvk_buffer *src, 1849 const struct panvk_buffer *dst, 1850 const VkBufferCopy *region) 1851{ 1852 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 1853 1854 struct panvk_meta_copy_buf2buf_info info = { 1855 .src = src->bo->ptr.gpu + src->bo_offset + region->srcOffset, 1856 .dst = dst->bo->ptr.gpu + dst->bo_offset + region->dstOffset, 1857 }; 1858 1859 unsigned alignment = ffs((info.src | info.dst | region->size) & 15); 1860 unsigned log2blksz = alignment ? alignment - 1 : 4; 1861 1862 assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf)); 1863 mali_ptr rsd = 1864 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd; 1865 const struct panfrost_ubo_push *pushmap = 1866 &cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].pushmap; 1867 1868 mali_ptr pushconsts = 1869 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base, 1870 &info, sizeof(info)); 1871 mali_ptr ubo = 1872 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info)); 1873 1874 panvk_per_arch(cmd_close_batch)(cmdbuf); 1875 1876 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1877 1878 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 1879 1880 mali_ptr tsd = batch->tls.gpu; 1881 1882 unsigned nblocks = region->size >> log2blksz; 1883 struct pan_compute_dim num_wg = { nblocks, 1, 1 }; 1884 struct pan_compute_dim wg_sz = { 1, 1, 1}; 1885 struct panfrost_ptr job = 1886 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1887 &batch->scoreboard, 1888 &num_wg, &wg_sz, 1889 0, 0, ubo, pushconsts, rsd, tsd); 1890 1891 util_dynarray_append(&batch->jobs, void *, job.cpu); 1892 1893 batch->blit.src = src->bo; 1894 batch->blit.dst = dst->bo; 1895 panvk_per_arch(cmd_close_batch)(cmdbuf); 1896} 1897 1898void 1899panvk_per_arch(CmdCopyBuffer)(VkCommandBuffer commandBuffer, 1900 VkBuffer srcBuffer, 1901 VkBuffer destBuffer, 1902 uint32_t regionCount, 1903 const VkBufferCopy *pRegions) 1904{ 1905 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1906 VK_FROM_HANDLE(panvk_buffer, src, srcBuffer); 1907 VK_FROM_HANDLE(panvk_buffer, dst, destBuffer); 1908 1909 for (unsigned i = 0; i < regionCount; i++) { 1910 panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pRegions[i]); 1911 } 1912} 1913 1914struct panvk_meta_fill_buf_info { 1915 mali_ptr start; 1916 uint32_t val; 1917}; 1918 1919#define panvk_meta_fill_buf_get_info_field(b, field) \ 1920 nir_load_ubo((b), 1, \ 1921 sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \ 1922 nir_imm_int(b, 0), \ 1923 nir_imm_int(b, offsetof(struct panvk_meta_fill_buf_info, field)), \ 1924 .align_mul = 4, \ 1925 .align_offset = 0, \ 1926 .range_base = 0, \ 1927 .range = ~0) 1928 1929static mali_ptr 1930panvk_meta_fill_buf_shader(struct panfrost_device *pdev, 1931 struct pan_pool *bin_pool, 1932 struct pan_shader_info *shader_info) 1933{ 1934 /* FIXME: Won't work on compute queues, but we can't do that with 1935 * a compute shader if the destination is an AFBC surface. 1936 */ 1937 nir_builder b = 1938 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 1939 GENX(pan_shader_get_compiler_options)(), 1940 "panvk_meta_fill_buf()"); 1941 1942 b.shader->info.internal = true; 1943 b.shader->info.num_ubos = 1; 1944 1945 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32); 1946 1947 nir_ssa_def *offset = 1948 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t)))); 1949 nir_ssa_def *ptr = 1950 nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset); 1951 nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val); 1952 1953 nir_store_global(&b, ptr, sizeof(uint32_t), val, 1); 1954 1955 struct panfrost_compile_inputs inputs = { 1956 .gpu_id = pdev->gpu_id, 1957 .is_blit = true, 1958 }; 1959 1960 struct util_dynarray binary; 1961 1962 util_dynarray_init(&binary, NULL); 1963 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1964 1965 /* Make sure UBO words have been upgraded to push constants and everything 1966 * is at the right place. 1967 */ 1968 assert(shader_info->ubo_count == 1); 1969 assert(shader_info->push.count == 3); 1970 1971 mali_ptr shader = 1972 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 1973 PAN_ARCH >= 6 ? 128 : 64); 1974 1975 util_dynarray_fini(&binary); 1976 ralloc_free(b.shader); 1977 1978 return shader; 1979} 1980 1981static mali_ptr 1982panvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev, 1983 struct pan_pool *bin_pool, 1984 struct pan_pool *desc_pool, 1985 struct panfrost_ubo_push *pushmap) 1986{ 1987 struct pan_shader_info shader_info; 1988 1989 mali_ptr shader = 1990 panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info); 1991 1992 struct panfrost_ptr rsd_ptr = 1993 pan_pool_alloc_desc_aggregate(desc_pool, 1994 PAN_DESC(RENDERER_STATE)); 1995 1996 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { 1997 pan_shader_prepare_rsd(&shader_info, shader, &cfg); 1998 } 1999 2000 *pushmap = shader_info.push; 2001 return rsd_ptr.gpu; 2002} 2003 2004static void 2005panvk_meta_fill_buf_init(struct panvk_physical_device *dev) 2006{ 2007 dev->meta.copy.fillbuf.rsd = 2008 panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base, 2009 &dev->meta.desc_pool.base, 2010 &dev->meta.copy.fillbuf.pushmap); 2011} 2012 2013static void 2014panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf, 2015 const struct panvk_buffer *dst, 2016 VkDeviceSize size, VkDeviceSize offset, 2017 uint32_t val) 2018{ 2019 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 2020 2021 if (size == VK_WHOLE_SIZE) 2022 size = (dst->size - offset) & ~3ULL; 2023 2024 struct panvk_meta_fill_buf_info info = { 2025 .start = dst->bo->ptr.gpu + dst->bo_offset + offset, 2026 .val = val, 2027 }; 2028 2029 assert(!(offset & 3) && !(size & 3)); 2030 2031 unsigned nwords = size / sizeof(uint32_t); 2032 mali_ptr rsd = 2033 cmdbuf->device->physical_device->meta.copy.fillbuf.rsd; 2034 const struct panfrost_ubo_push *pushmap = 2035 &cmdbuf->device->physical_device->meta.copy.fillbuf.pushmap; 2036 2037 mali_ptr pushconsts = 2038 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base, 2039 &info, sizeof(info)); 2040 mali_ptr ubo = 2041 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info)); 2042 2043 panvk_per_arch(cmd_close_batch)(cmdbuf); 2044 2045 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 2046 2047 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 2048 2049 mali_ptr tsd = batch->tls.gpu; 2050 2051 struct pan_compute_dim num_wg = { nwords, 1, 1 }; 2052 struct pan_compute_dim wg_sz = { 1, 1, 1}; 2053 struct panfrost_ptr job = 2054 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 2055 &batch->scoreboard, 2056 &num_wg, &wg_sz, 2057 0, 0, ubo, pushconsts, rsd, tsd); 2058 2059 util_dynarray_append(&batch->jobs, void *, job.cpu); 2060 2061 batch->blit.dst = dst->bo; 2062 panvk_per_arch(cmd_close_batch)(cmdbuf); 2063} 2064 2065void 2066panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer, 2067 VkBuffer dstBuffer, 2068 VkDeviceSize dstOffset, 2069 VkDeviceSize fillSize, 2070 uint32_t data) 2071{ 2072 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 2073 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer); 2074 2075 panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data); 2076} 2077 2078static void 2079panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf, 2080 const struct panvk_buffer *dst, VkDeviceSize offset, 2081 VkDeviceSize size, const void *data) 2082{ 2083 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 2084 2085 struct panvk_meta_copy_buf2buf_info info = { 2086 .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4), 2087 .dst = dst->bo->ptr.gpu + dst->bo_offset + offset, 2088 }; 2089 2090 unsigned log2blksz = ffs(sizeof(uint32_t)) - 1; 2091 2092 mali_ptr rsd = 2093 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd; 2094 const struct panfrost_ubo_push *pushmap = 2095 &cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].pushmap; 2096 2097 mali_ptr pushconsts = 2098 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base, 2099 &info, sizeof(info)); 2100 mali_ptr ubo = 2101 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info)); 2102 2103 panvk_per_arch(cmd_close_batch)(cmdbuf); 2104 2105 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 2106 2107 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 2108 2109 mali_ptr tsd = batch->tls.gpu; 2110 2111 unsigned nblocks = size >> log2blksz; 2112 struct pan_compute_dim num_wg = { nblocks, 1, 1 }; 2113 struct pan_compute_dim wg_sz = { 1, 1, 1}; 2114 struct panfrost_ptr job = 2115 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 2116 &batch->scoreboard, 2117 &num_wg, &wg_sz, 2118 0, 0, ubo, pushconsts, rsd, tsd); 2119 2120 util_dynarray_append(&batch->jobs, void *, job.cpu); 2121 2122 batch->blit.dst = dst->bo; 2123 panvk_per_arch(cmd_close_batch)(cmdbuf); 2124} 2125 2126void 2127panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer, 2128 VkBuffer dstBuffer, 2129 VkDeviceSize dstOffset, 2130 VkDeviceSize dataSize, 2131 const void *pData) 2132{ 2133 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 2134 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer); 2135 2136 panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData); 2137} 2138 2139void 2140panvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev) 2141{ 2142 panvk_meta_copy_img2img_init(dev, false); 2143 panvk_meta_copy_img2img_init(dev, true); 2144 panvk_meta_copy_buf2img_init(dev); 2145 panvk_meta_copy_img2buf_init(dev); 2146 panvk_meta_copy_buf2buf_init(dev); 2147 panvk_meta_fill_buf_init(dev); 2148} 2149