pan_blitter.c revision 7ec681f3
1/* 2 * Copyright (C) 2020-2021 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 * Boris Brezillon <boris.brezillon@collabora.com> 26 */ 27 28#include <math.h> 29#include <stdio.h> 30#include "pan_blend.h" 31#include "pan_blitter.h" 32#include "pan_cs.h" 33#include "pan_encoder.h" 34#include "pan_pool.h" 35#include "pan_shader.h" 36#include "pan_scoreboard.h" 37#include "pan_texture.h" 38#include "panfrost-quirks.h" 39#include "compiler/nir/nir_builder.h" 40#include "util/u_math.h" 41 42#if PAN_ARCH >= 6 43/* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or 44 * missing in many cases. We instead use software paths as fallbacks to 45 * implement blits, which are done as TILER jobs. No vertex shader is 46 * necessary since we can supply screen-space coordinates directly. 47 * 48 * This is primarily designed as a fallback for preloads but could be extended 49 * for other clears/blits if needed in the future. */ 50 51static enum mali_register_file_format 52blit_type_to_reg_fmt(nir_alu_type in) 53{ 54 switch (in) { 55 case nir_type_float32: 56 return MALI_REGISTER_FILE_FORMAT_F32; 57 case nir_type_int32: 58 return MALI_REGISTER_FILE_FORMAT_I32; 59 case nir_type_uint32: 60 return MALI_REGISTER_FILE_FORMAT_U32; 61 default: 62 unreachable("Invalid blit type"); 63 } 64} 65#endif 66 67struct pan_blit_surface { 68 gl_frag_result loc : 4; 69 nir_alu_type type : 8; 70 enum mali_texture_dimension dim : 2; 71 bool array : 1; 72 unsigned src_samples: 5; 73 unsigned dst_samples: 5; 74}; 75 76struct pan_blit_shader_key { 77 struct pan_blit_surface surfaces[8]; 78}; 79 80struct pan_blit_shader_data { 81 struct pan_blit_shader_key key; 82 mali_ptr address; 83 unsigned blend_ret_offsets[8]; 84 nir_alu_type blend_types[8]; 85}; 86 87struct pan_blit_blend_shader_key { 88 enum pipe_format format; 89 nir_alu_type type; 90 unsigned rt : 3; 91 unsigned nr_samples : 5; 92 unsigned pad : 24; 93}; 94 95struct pan_blit_blend_shader_data { 96 struct pan_blit_blend_shader_key key; 97 mali_ptr address; 98}; 99 100struct pan_blit_rsd_key { 101 struct { 102 enum pipe_format format; 103 nir_alu_type type : 8; 104 unsigned src_samples : 5; 105 unsigned dst_samples : 5; 106 enum mali_texture_dimension dim : 2; 107 bool array : 1; 108 } rts[8], z, s; 109}; 110 111struct pan_blit_rsd_data { 112 struct pan_blit_rsd_key key; 113 mali_ptr address; 114}; 115 116#if PAN_ARCH >= 5 117static void 118pan_blitter_emit_blend(const struct panfrost_device *dev, 119 unsigned rt, 120 const struct pan_image_view *iview, 121 const struct pan_blit_shader_data *blit_shader, 122 mali_ptr blend_shader, 123 void *out) 124{ 125 pan_pack(out, BLEND, cfg) { 126 if (!iview) { 127 cfg.enable = false; 128#if PAN_ARCH >= 6 129 cfg.internal.mode = MALI_BLEND_MODE_OFF; 130#endif 131 continue; 132 } 133 134 cfg.round_to_fb_precision = true; 135 cfg.srgb = util_format_is_srgb(iview->format); 136 137#if PAN_ARCH >= 6 138 cfg.internal.mode = blend_shader ? 139 MALI_BLEND_MODE_SHADER : 140 MALI_BLEND_MODE_OPAQUE; 141#endif 142 143 if (!blend_shader) { 144 cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; 145 cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; 146 cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; 147 cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; 148 cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; 149 cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; 150 cfg.equation.color_mask = 0xf; 151 152#if PAN_ARCH >= 6 153 nir_alu_type type = blit_shader->key.surfaces[rt].type; 154 155 cfg.internal.fixed_function.num_comps = 4; 156 cfg.internal.fixed_function.conversion.memory_format = 157 panfrost_format_to_bifrost_blend(dev, iview->format, false); 158 cfg.internal.fixed_function.conversion.register_format = 159 blit_type_to_reg_fmt(type); 160 161 cfg.internal.fixed_function.rt = rt; 162#endif 163 } else { 164#if PAN_ARCH >= 6 165 cfg.internal.shader.pc = blend_shader; 166 if (blit_shader->blend_ret_offsets[rt]) { 167 cfg.internal.shader.return_value = 168 blit_shader->address + 169 blit_shader->blend_ret_offsets[rt]; 170 } 171#else 172 cfg.blend_shader = true; 173 cfg.shader_pc = blend_shader; 174#endif 175 } 176 } 177} 178#endif 179 180static void 181pan_blitter_emit_rsd(const struct panfrost_device *dev, 182 const struct pan_blit_shader_data *blit_shader, 183 unsigned rt_count, 184 const struct pan_image_view **rts, 185 mali_ptr *blend_shaders, 186 const struct pan_image_view *z, 187 const struct pan_image_view *s, 188 void *out) 189{ 190 unsigned tex_count = 0; 191 bool zs = (z || s); 192 bool ms = false; 193 194 for (unsigned i = 0; i < rt_count; i++) { 195 if (rts[i]) { 196 tex_count++; 197 if (rts[i]->nr_samples > 1) 198 ms = true; 199 } 200 } 201 202 if (z) { 203 if (z->image->layout.nr_samples > 1) 204 ms = true; 205 tex_count++; 206 } 207 208 if (s) { 209 if (s->image->layout.nr_samples > 1) 210 ms = true; 211 tex_count++; 212 } 213 214 pan_pack(out, RENDERER_STATE, cfg) { 215 assert(blit_shader->address); 216 cfg.shader.shader = blit_shader->address; 217 cfg.shader.varying_count = 1; 218 cfg.shader.texture_count = tex_count; 219 cfg.shader.sampler_count = 1; 220 221 cfg.properties.stencil_from_shader = s != NULL; 222 cfg.properties.depth_source = 223 z ? 224 MALI_DEPTH_SOURCE_SHADER : 225 MALI_DEPTH_SOURCE_FIXED_FUNCTION; 226 227 cfg.multisample_misc.sample_mask = 0xFFFF; 228 cfg.multisample_misc.multisample_enable = ms; 229 cfg.multisample_misc.evaluate_per_sample = ms; 230 cfg.multisample_misc.depth_write_mask = z != NULL; 231 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS; 232 233 cfg.stencil_mask_misc.stencil_enable = s != NULL; 234 cfg.stencil_mask_misc.stencil_mask_front = 0xFF; 235 cfg.stencil_mask_misc.stencil_mask_back = 0xFF; 236 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS; 237 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE; 238 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE; 239 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE; 240 cfg.stencil_front.mask = 0xFF; 241 cfg.stencil_back = cfg.stencil_front; 242 243#if PAN_ARCH >= 6 244 if (zs) { 245 cfg.properties.zs_update_operation = 246 MALI_PIXEL_KILL_FORCE_LATE; 247 cfg.properties.pixel_kill_operation = 248 MALI_PIXEL_KILL_FORCE_LATE; 249 } else { 250 cfg.properties.zs_update_operation = 251 MALI_PIXEL_KILL_STRONG_EARLY; 252 cfg.properties.pixel_kill_operation = 253 MALI_PIXEL_KILL_FORCE_EARLY; 254 } 255 256 /* We can only allow blit shader fragments to kill if they write all 257 * colour outputs. This is true for our colour (non-Z/S) blit shaders, 258 * but obviously not true for Z/S shaders. However, blit shaders 259 * otherwise lack side effects, so other fragments may kill them. 260 * However, while shaders writing Z/S can normally be killed, on v6 261 * for frame shaders it can cause GPU timeouts, so only allow colour 262 * blit shaders to be killed. */ 263 264 cfg.properties.allow_forward_pixel_to_kill = !zs; 265 cfg.properties.allow_forward_pixel_to_be_killed = (dev->arch >= 7) || !zs; 266 267 cfg.preload.fragment.coverage = true; 268 cfg.preload.fragment.sample_mask_id = ms; 269#else 270 mali_ptr blend_shader = blend_shaders ? 271 panfrost_last_nonnull(blend_shaders, rt_count) : 0; 272 273 cfg.properties.work_register_count = 4; 274 cfg.properties.force_early_z = !zs; 275 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; 276 277 /* Set even on v5 for erratum workaround */ 278#if PAN_ARCH == 5 279 cfg.legacy_blend_shader = blend_shader; 280#else 281 cfg.blend_shader = blend_shader; 282 cfg.stencil_mask_misc.write_enable = true; 283 cfg.stencil_mask_misc.dither_disable = true; 284 cfg.multisample_misc.blend_shader = !!blend_shader; 285 cfg.blend_shader = blend_shader; 286 if (!cfg.multisample_misc.blend_shader) { 287 cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; 288 cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; 289 cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; 290 cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; 291 cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; 292 cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; 293 cfg.blend_constant = 0; 294 295 if (rts && rts[0]) { 296 cfg.stencil_mask_misc.srgb = 297 util_format_is_srgb(rts[0]->format); 298 cfg.blend_equation.color_mask = 0xf; 299 } 300 } 301#endif 302#endif 303 } 304 305#if PAN_ARCH >= 5 306 for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) { 307 void *dest = out + pan_size(RENDERER_STATE) + pan_size(BLEND) * i; 308 const struct pan_image_view *rt_view = rts ? rts[i] : NULL; 309 mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0; 310 311 pan_blitter_emit_blend(dev, i, rt_view, blit_shader, 312 blend_shader, dest); 313 } 314#endif 315} 316 317static void 318pan_blitter_get_blend_shaders(struct panfrost_device *dev, 319 unsigned rt_count, 320 const struct pan_image_view **rts, 321 const struct pan_blit_shader_data *blit_shader, 322 mali_ptr *blend_shaders) 323{ 324 if (!rt_count) 325 return; 326 327 struct pan_blend_state blend_state = { 328 .rt_count = rt_count, 329 }; 330 331 for (unsigned i = 0; i < rt_count; i++) { 332 if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal) 333 continue; 334 335 struct pan_blit_blend_shader_key key = { 336 .format = rts[i]->format, 337 .rt = i, 338 .nr_samples = rts[i]->image->layout.nr_samples, 339 .type = blit_shader->blend_types[i], 340 }; 341 342 pthread_mutex_lock(&dev->blitter.shaders.lock); 343 struct hash_entry *he = 344 _mesa_hash_table_search(dev->blitter.shaders.blend, &key); 345 struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL; 346 if (blend_shader) { 347 blend_shaders[i] = blend_shader->address; 348 pthread_mutex_unlock(&dev->blitter.shaders.lock); 349 continue; 350 } 351 352 blend_shader = rzalloc(dev->blitter.shaders.blend, 353 struct pan_blit_blend_shader_data); 354 blend_shader->key = key; 355 356 blend_state.rts[i] = (struct pan_blend_rt_state) { 357 .format = rts[i]->format, 358 .nr_samples = rts[i]->image->layout.nr_samples, 359 .equation = { 360 .blend_enable = true, 361 .rgb_src_factor = BLEND_FACTOR_ZERO, 362 .rgb_invert_src_factor = true, 363 .rgb_dst_factor = BLEND_FACTOR_ZERO, 364 .rgb_func = BLEND_FUNC_ADD, 365 .alpha_src_factor = BLEND_FACTOR_ZERO, 366 .alpha_invert_src_factor = true, 367 .alpha_dst_factor = BLEND_FACTOR_ZERO, 368 .alpha_func = BLEND_FUNC_ADD, 369 .color_mask = 0xf, 370 }, 371 }; 372 373 pthread_mutex_lock(&dev->blend_shaders.lock); 374 struct pan_blend_shader_variant *b = 375 GENX(pan_blend_get_shader_locked)(dev, &blend_state, 376 blit_shader->blend_types[i], 377 nir_type_float32, /* unused */ 378 i); 379 380 ASSERTED unsigned full_threads = 381 (dev->arch >= 7) ? 32 : ((dev->arch == 6) ? 64 : 4); 382 assert(b->work_reg_count <= full_threads); 383 struct panfrost_ptr bin = 384 pan_pool_alloc_aligned(dev->blitter.shaders.pool, 385 b->binary.size, 386 PAN_ARCH >= 6 ? 128 : 64); 387 memcpy(bin.cpu, b->binary.data, b->binary.size); 388 389 blend_shader->address = bin.gpu | b->first_tag; 390 pthread_mutex_unlock(&dev->blend_shaders.lock); 391 _mesa_hash_table_insert(dev->blitter.shaders.blend, 392 &blend_shader->key, blend_shader); 393 pthread_mutex_unlock(&dev->blitter.shaders.lock); 394 blend_shaders[i] = blend_shader->address; 395 } 396} 397 398static const struct pan_blit_shader_data * 399pan_blitter_get_blit_shader(struct panfrost_device *dev, 400 const struct pan_blit_shader_key *key) 401{ 402 pthread_mutex_lock(&dev->blitter.shaders.lock); 403 struct hash_entry *he = _mesa_hash_table_search(dev->blitter.shaders.blit, key); 404 struct pan_blit_shader_data *shader = he ? he->data : NULL; 405 406 if (shader) 407 goto out; 408 409 unsigned coord_comps = 0; 410 unsigned sig_offset = 0; 411 char sig[256]; 412 bool first = true; 413 for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) { 414 const char *type_str, *dim_str; 415 if (key->surfaces[i].type == nir_type_invalid) 416 continue; 417 418 switch (key->surfaces[i].type) { 419 case nir_type_float32: type_str = "float"; break; 420 case nir_type_uint32: type_str = "uint"; break; 421 case nir_type_int32: type_str = "int"; break; 422 default: unreachable("Invalid type\n"); 423 } 424 425 switch (key->surfaces[i].dim) { 426 case MALI_TEXTURE_DIMENSION_CUBE: dim_str = "cube"; break; 427 case MALI_TEXTURE_DIMENSION_1D: dim_str = "1D"; break; 428 case MALI_TEXTURE_DIMENSION_2D: dim_str = "2D"; break; 429 case MALI_TEXTURE_DIMENSION_3D: dim_str = "3D"; break; 430 default: unreachable("Invalid dim\n"); 431 } 432 433 coord_comps = MAX2(coord_comps, 434 (key->surfaces[i].dim ? : 3) + 435 (key->surfaces[i].array ? 1 : 0)); 436 first = false; 437 438 if (sig_offset >= sizeof(sig)) 439 continue; 440 441 sig_offset += snprintf(sig + sig_offset, sizeof(sig) - sig_offset, 442 "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]", 443 first ? "" : ",", 444 gl_frag_result_name(key->surfaces[i].loc), 445 type_str, dim_str, 446 key->surfaces[i].array ? "[]" : "", 447 key->surfaces[i].src_samples, 448 key->surfaces[i].dst_samples); 449 } 450 451 nir_builder b = 452 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 453 GENX(pan_shader_get_compiler_options)(), 454 "pan_blit(%s)", sig); 455 b.shader->info.internal = true; 456 457 nir_variable *coord_var = 458 nir_variable_create(b.shader, nir_var_shader_in, 459 glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps), 460 "coord"); 461 coord_var->data.location = VARYING_SLOT_TEX0; 462 463 nir_ssa_def *coord = nir_load_var(&b, coord_var); 464 465 unsigned active_count = 0; 466 for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) { 467 if (key->surfaces[i].type == nir_type_invalid) 468 continue; 469 470 /* Resolve operations only work for N -> 1 samples. */ 471 assert(key->surfaces[i].dst_samples == 1 || 472 key->surfaces[i].src_samples == key->surfaces[i].dst_samples); 473 474 static const char *out_names[] = { 475 "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7", 476 }; 477 478 unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1; 479 nir_variable *out = 480 nir_variable_create(b.shader, nir_var_shader_out, 481 glsl_vector_type(GLSL_TYPE_FLOAT, ncomps), 482 out_names[active_count]); 483 out->data.location = key->surfaces[i].loc; 484 out->data.driver_location = active_count; 485 486 bool resolve = key->surfaces[i].src_samples > key->surfaces[i].dst_samples; 487 bool ms = key->surfaces[i].src_samples > 1; 488 enum glsl_sampler_dim sampler_dim; 489 490 switch (key->surfaces[i].dim) { 491 case MALI_TEXTURE_DIMENSION_1D: 492 sampler_dim = GLSL_SAMPLER_DIM_1D; 493 break; 494 case MALI_TEXTURE_DIMENSION_2D: 495 sampler_dim = ms ? 496 GLSL_SAMPLER_DIM_MS : 497 GLSL_SAMPLER_DIM_2D; 498 break; 499 case MALI_TEXTURE_DIMENSION_3D: 500 sampler_dim = GLSL_SAMPLER_DIM_3D; 501 break; 502 case MALI_TEXTURE_DIMENSION_CUBE: 503 sampler_dim = GLSL_SAMPLER_DIM_CUBE; 504 break; 505 } 506 507 nir_ssa_def *res = NULL; 508 509 if (resolve) { 510 /* When resolving a float type, we need to calculate 511 * the average of all samples. For integer resolve, GL 512 * and Vulkan say that one sample should be chosen 513 * without telling which. Let's just pick the first one 514 * in that case. 515 */ 516 nir_alu_type base_type = 517 nir_alu_type_get_base_type(key->surfaces[i].type); 518 unsigned nsamples = base_type == nir_type_float ? 519 key->surfaces[i].src_samples : 1; 520 521 for (unsigned s = 0; s < nsamples; s++) { 522 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 523 524 tex->op = nir_texop_txf_ms; 525 tex->dest_type = key->surfaces[i].type; 526 tex->texture_index = active_count; 527 tex->is_array = key->surfaces[i].array; 528 tex->sampler_dim = sampler_dim; 529 530 tex->src[0].src_type = nir_tex_src_coord; 531 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord)); 532 tex->coord_components = coord_comps; 533 534 tex->src[1].src_type = nir_tex_src_ms_index; 535 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s)); 536 537 tex->src[2].src_type = nir_tex_src_lod; 538 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 539 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 540 nir_builder_instr_insert(&b, &tex->instr); 541 542 res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa; 543 } 544 545 if (base_type == nir_type_float) { 546 unsigned type_sz = 547 nir_alu_type_get_type_size(key->surfaces[i].type); 548 res = nir_fmul(&b, res, 549 nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz)); 550 } 551 } else { 552 nir_tex_instr *tex = 553 nir_tex_instr_create(b.shader, ms ? 3 : 1); 554 555 tex->dest_type = key->surfaces[i].type; 556 tex->texture_index = active_count; 557 tex->is_array = key->surfaces[i].array; 558 tex->sampler_dim = sampler_dim; 559 560 if (ms) { 561 tex->op = nir_texop_txf_ms; 562 563 tex->src[0].src_type = nir_tex_src_coord; 564 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord)); 565 tex->coord_components = coord_comps; 566 567 tex->src[1].src_type = nir_tex_src_ms_index; 568 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b)); 569 570 tex->src[2].src_type = nir_tex_src_lod; 571 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 572 } else { 573 tex->op = nir_texop_tex; 574 575 tex->src[0].src_type = nir_tex_src_coord; 576 tex->src[0].src = nir_src_for_ssa(coord); 577 tex->coord_components = coord_comps; 578 } 579 580 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 581 nir_builder_instr_insert(&b, &tex->instr); 582 res = &tex->dest.ssa; 583 } 584 585 assert(res); 586 587 if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) { 588 nir_store_var(&b, out, res, 0xFF); 589 } else { 590 unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0; 591 nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF); 592 } 593 active_count++; 594 } 595 596 struct panfrost_compile_inputs inputs = { 597 .gpu_id = dev->gpu_id, 598 .is_blit = true, 599 }; 600 struct util_dynarray binary; 601 struct pan_shader_info info; 602 603 util_dynarray_init(&binary, NULL); 604 605 GENX(pan_shader_compile)(b.shader, &inputs, &binary, &info); 606 607 shader = rzalloc(dev->blitter.shaders.blit, 608 struct pan_blit_shader_data); 609 shader->key = *key; 610 shader->address = 611 pan_pool_upload_aligned(dev->blitter.shaders.pool, 612 binary.data, binary.size, 613 PAN_ARCH >= 6 ? 128 : 64); 614 615 util_dynarray_fini(&binary); 616 ralloc_free(b.shader); 617 618#if PAN_ARCH <= 5 619 shader->address |= info.midgard.first_tag; 620#else 621 for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) { 622 shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset; 623 shader->blend_types[i] = info.bifrost.blend[i].type; 624 } 625#endif 626 627 _mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader); 628 629out: 630 pthread_mutex_unlock(&dev->blitter.shaders.lock); 631 return shader; 632} 633 634static mali_ptr 635pan_blitter_get_rsd(struct panfrost_device *dev, 636 unsigned rt_count, 637 const struct pan_image_view **src_rts, 638 const struct pan_image_view **dst_rts, 639 const struct pan_image_view *src_z, 640 const struct pan_image_view *dst_z, 641 const struct pan_image_view *src_s, 642 const struct pan_image_view *dst_s) 643{ 644 struct pan_blit_rsd_key rsd_key = { 0 }; 645 646 assert(!rt_count || (!src_z && !src_s)); 647 648 struct pan_blit_shader_key blit_key = { 0 }; 649 650 if (src_z) { 651 assert(dst_z); 652 rsd_key.z.format = dst_z->format; 653 blit_key.surfaces[0].loc = FRAG_RESULT_DEPTH; 654 rsd_key.z.type = blit_key.surfaces[0].type = nir_type_float32; 655 rsd_key.z.src_samples = blit_key.surfaces[0].src_samples = src_z->image->layout.nr_samples; 656 rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples = dst_z->image->layout.nr_samples; 657 rsd_key.z.dim = blit_key.surfaces[0].dim = src_z->dim; 658 rsd_key.z.array = blit_key.surfaces[0].array = src_z->first_layer != src_z->last_layer; 659 } 660 661 if (src_s) { 662 assert(dst_s); 663 rsd_key.s.format = dst_s->format; 664 blit_key.surfaces[1].loc = FRAG_RESULT_STENCIL; 665 rsd_key.s.type = blit_key.surfaces[1].type = nir_type_uint32; 666 rsd_key.s.src_samples = blit_key.surfaces[1].src_samples = src_s->image->layout.nr_samples; 667 rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples = dst_s->image->layout.nr_samples; 668 rsd_key.s.dim = blit_key.surfaces[1].dim = src_s->dim; 669 rsd_key.s.array = blit_key.surfaces[1].array = src_s->first_layer != src_s->last_layer; 670 } 671 672 for (unsigned i = 0; i < rt_count; i++) { 673 if (!src_rts[i]) 674 continue; 675 676 assert(dst_rts[i]); 677 rsd_key.rts[i].format = dst_rts[i]->format; 678 blit_key.surfaces[i].loc = FRAG_RESULT_DATA0 + i; 679 rsd_key.rts[i].type = blit_key.surfaces[i].type = 680 util_format_is_pure_uint(src_rts[i]->format) ? nir_type_uint32 : 681 util_format_is_pure_sint(src_rts[i]->format) ? nir_type_int32 : 682 nir_type_float32; 683 rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples = src_rts[i]->image->layout.nr_samples; 684 rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples = dst_rts[i]->image->layout.nr_samples; 685 rsd_key.rts[i].dim = blit_key.surfaces[i].dim = src_rts[i]->dim; 686 rsd_key.rts[i].array = blit_key.surfaces[i].array = src_rts[i]->first_layer != src_rts[i]->last_layer; 687 } 688 689 pthread_mutex_lock(&dev->blitter.rsds.lock); 690 struct hash_entry *he = 691 _mesa_hash_table_search(dev->blitter.rsds.rsds, &rsd_key); 692 struct pan_blit_rsd_data *rsd = he ? he->data : NULL; 693 if (rsd) 694 goto out; 695 696 rsd = rzalloc(dev->blitter.rsds.rsds, struct pan_blit_rsd_data); 697 rsd->key = rsd_key; 698 699 unsigned bd_count = PAN_ARCH >= 5 ? MAX2(rt_count, 1) : 0; 700 struct panfrost_ptr rsd_ptr = 701 pan_pool_alloc_desc_aggregate(dev->blitter.rsds.pool, 702 PAN_DESC(RENDERER_STATE), 703 PAN_DESC_ARRAY(bd_count, BLEND)); 704 705 mali_ptr blend_shaders[8] = { 0 }; 706 707 const struct pan_blit_shader_data *blit_shader = 708 pan_blitter_get_blit_shader(dev, &blit_key); 709 710 pan_blitter_get_blend_shaders(dev, rt_count, dst_rts, 711 blit_shader, blend_shaders); 712 713 pan_blitter_emit_rsd(dev, blit_shader, 714 MAX2(rt_count, 1), dst_rts, blend_shaders, 715 dst_z, dst_s, rsd_ptr.cpu); 716 rsd->address = rsd_ptr.gpu; 717 _mesa_hash_table_insert(dev->blitter.rsds.rsds, &rsd->key, rsd); 718 719out: 720 pthread_mutex_unlock(&dev->blitter.rsds.lock); 721 return rsd->address; 722} 723 724static mali_ptr 725pan_preload_get_rsd(struct panfrost_device *dev, 726 const struct pan_fb_info *fb, 727 bool zs) 728{ 729 const struct pan_image_view *rts[8] = { NULL }; 730 const struct pan_image_view *z = NULL, *s = NULL; 731 struct pan_image_view patched_s_view; 732 unsigned rt_count = 0; 733 734 if (zs) { 735 if (fb->zs.preload.z) 736 z = fb->zs.view.zs; 737 738 if (fb->zs.preload.s) { 739 const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs; 740 enum pipe_format fmt = util_format_get_depth_only(view->format); 741 742 switch (view->format) { 743 case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break; 744 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break; 745 default: fmt = view->format; break; 746 } 747 748 if (fmt != view->format) { 749 patched_s_view = *view; 750 patched_s_view.format = fmt; 751 s = &patched_s_view; 752 } else { 753 s = view; 754 } 755 } 756 } else { 757 for (unsigned i = 0; i < fb->rt_count; i++) { 758 if (fb->rts[i].preload) 759 rts[i] = fb->rts[i].view; 760 } 761 762 rt_count = fb->rt_count; 763 } 764 765 return pan_blitter_get_rsd(dev, rt_count, rts, rts, z, z, s, s); 766} 767 768static mali_ptr 769pan_blit_get_rsd(struct panfrost_device *dev, 770 const struct pan_image_view *src_views, 771 const struct pan_image_view *dst_view) 772{ 773 const struct util_format_description *desc = 774 util_format_description(src_views[0].format); 775 const struct pan_image_view *src_rt = NULL, *dst_rt = NULL; 776 const struct pan_image_view *src_z = NULL, *dst_z = NULL; 777 const struct pan_image_view *src_s = NULL, *dst_s = NULL; 778 779 if (util_format_has_depth(desc)) { 780 src_z = &src_views[0]; 781 dst_z = dst_view; 782 } 783 784 if (src_views[1].format) { 785 src_s = &src_views[1]; 786 dst_s = dst_view; 787 } else if (util_format_has_stencil(desc)) { 788 src_s = &src_views[0]; 789 dst_s = dst_view; 790 } 791 792 if (!src_z && !src_s) { 793 src_rt = &src_views[0]; 794 dst_rt = dst_view; 795 } 796 797 return pan_blitter_get_rsd(dev, src_rt ? 1 : 0, &src_rt, &dst_rt, 798 src_z, dst_z, src_s, dst_s); 799} 800 801static bool 802pan_preload_needed(const struct pan_fb_info *fb, bool zs) 803{ 804 if (zs) { 805 if (fb->zs.preload.z || fb->zs.preload.s) 806 return true; 807 } else { 808 for (unsigned i = 0; i < fb->rt_count; i++) { 809 if (fb->rts[i].preload) 810 return true; 811 } 812 } 813 814 return false; 815} 816 817static void 818pan_blitter_emit_varying(struct pan_pool *pool, 819 mali_ptr coordinates, 820 struct MALI_DRAW *draw) 821{ 822 /* Bifrost needs an empty desc to mark end of prefetching */ 823 bool padding_buffer = PAN_ARCH >= 6; 824 825 struct panfrost_ptr varying = 826 pan_pool_alloc_desc(pool, ATTRIBUTE); 827 struct panfrost_ptr varying_buffer = 828 pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1), 829 ATTRIBUTE_BUFFER); 830 831 pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) { 832 cfg.pointer = coordinates; 833 cfg.stride = 4 * sizeof(float); 834 cfg.size = cfg.stride * 4; 835 } 836 837 if (padding_buffer) { 838 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER), 839 ATTRIBUTE_BUFFER, cfg); 840 } 841 842 pan_pack(varying.cpu, ATTRIBUTE, cfg) { 843 cfg.buffer_index = 0; 844 cfg.offset_enable = PAN_ARCH <= 5; 845 cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw; 846 } 847 848 draw->varyings = varying.gpu; 849 draw->varying_buffers = varying_buffer.gpu; 850} 851 852static mali_ptr 853pan_blitter_emit_sampler(struct pan_pool *pool, 854 bool nearest_filter) 855{ 856 struct panfrost_ptr sampler = 857 pan_pool_alloc_desc(pool, SAMPLER); 858 859 pan_pack(sampler.cpu, SAMPLER, cfg) { 860 cfg.seamless_cube_map = false; 861 cfg.normalized_coordinates = false; 862 cfg.minify_nearest = nearest_filter; 863 cfg.magnify_nearest = nearest_filter; 864 } 865 866 return sampler.gpu; 867} 868 869static mali_ptr 870pan_blitter_emit_textures(struct pan_pool *pool, 871 unsigned tex_count, 872 const struct pan_image_view **views) 873{ 874#if PAN_ARCH >= 6 875 struct panfrost_ptr textures = 876 pan_pool_alloc_desc_array(pool, tex_count, TEXTURE); 877 878 for (unsigned i = 0; i < tex_count; i++) { 879 void *texture = textures.cpu + (pan_size(TEXTURE) * i); 880 size_t payload_size = 881 GENX(panfrost_estimate_texture_payload_size)(views[i]); 882 struct panfrost_ptr surfaces = 883 pan_pool_alloc_aligned(pool, payload_size, 884 pan_alignment(SURFACE_WITH_STRIDE)); 885 886 GENX(panfrost_new_texture)(pool->dev, views[i], texture, &surfaces); 887 } 888 889 return textures.gpu; 890#else 891 mali_ptr textures[8] = { 0 }; 892 893 for (unsigned i = 0; i < tex_count; i++) { 894 size_t sz = pan_size(TEXTURE) + 895 GENX(panfrost_estimate_texture_payload_size)(views[i]); 896 struct panfrost_ptr texture = 897 pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE)); 898 struct panfrost_ptr surfaces = { 899 .cpu = texture.cpu + pan_size(TEXTURE), 900 .gpu = texture.gpu + pan_size(TEXTURE), 901 }; 902 903 GENX(panfrost_new_texture)(pool->dev, views[i], texture.cpu, &surfaces); 904 textures[i] = texture.gpu; 905 } 906 907 return pan_pool_upload_aligned(pool, textures, 908 tex_count * sizeof(mali_ptr), 909 sizeof(mali_ptr)); 910#endif 911} 912 913static void 914pan_preload_emit_textures(struct pan_pool *pool, 915 const struct pan_fb_info *fb, bool zs, 916 struct MALI_DRAW *draw) 917{ 918 const struct pan_image_view *views[8]; 919 struct pan_image_view patched_s_view; 920 unsigned tex_count = 0; 921 922 if (zs) { 923 if (fb->zs.preload.z) 924 views[tex_count++] = fb->zs.view.zs; 925 926 if (fb->zs.preload.s) { 927 const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs; 928 enum pipe_format fmt = util_format_get_depth_only(view->format); 929 930 switch (view->format) { 931 case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break; 932 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break; 933 default: fmt = view->format; break; 934 } 935 936 if (fmt != view->format) { 937 patched_s_view = *view; 938 patched_s_view.format = fmt; 939 view = &patched_s_view; 940 } 941 views[tex_count++] = view; 942 } 943 } else { 944 for (unsigned i = 0; i < fb->rt_count; i++) { 945 if (fb->rts[i].preload) 946 views[tex_count++] = fb->rts[i].view; 947 } 948 949 } 950 951 draw->textures = pan_blitter_emit_textures(pool, tex_count, views); 952} 953 954static mali_ptr 955pan_blitter_emit_viewport(struct pan_pool *pool, 956 uint16_t minx, uint16_t miny, 957 uint16_t maxx, uint16_t maxy) 958{ 959 struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT); 960 961 pan_pack(vp.cpu, VIEWPORT, cfg) { 962 cfg.scissor_minimum_x = minx; 963 cfg.scissor_minimum_y = miny; 964 cfg.scissor_maximum_x = maxx; 965 cfg.scissor_maximum_y = maxy; 966 } 967 968 return vp.gpu; 969} 970 971static void 972pan_preload_emit_dcd(struct pan_pool *pool, 973 struct pan_fb_info *fb, bool zs, 974 mali_ptr coordinates, 975 mali_ptr tsd, mali_ptr rsd, 976 void *out, bool always_write) 977{ 978 pan_pack(out, DRAW, cfg) { 979 cfg.four_components_per_vertex = true; 980 cfg.draw_descriptor_is_64b = true; 981 cfg.thread_storage = tsd; 982 cfg.state = rsd; 983 984 cfg.position = coordinates; 985 pan_blitter_emit_varying(pool, coordinates, &cfg); 986 uint16_t minx = 0, miny = 0, maxx, maxy; 987 988#if PAN_ARCH == 4 989 maxx = fb->width - 1; 990 maxy = fb->height - 1; 991#else 992 /* Align on 32x32 tiles */ 993 minx = fb->extent.minx & ~31; 994 miny = fb->extent.miny & ~31; 995 maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1; 996 maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1; 997#endif 998 999 cfg.viewport = 1000 pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy); 1001 1002 pan_preload_emit_textures(pool, fb, zs, &cfg); 1003 1004 cfg.samplers = pan_blitter_emit_sampler(pool, true); 1005 1006#if PAN_ARCH >= 6 1007 /* Tiles updated by blit shaders are still considered 1008 * clean (separate for colour and Z/S), allowing us to 1009 * suppress unnecessary writeback */ 1010 cfg.clean_fragment_write = !always_write; 1011#endif 1012 } 1013} 1014 1015static void 1016pan_blit_emit_dcd(struct pan_pool *pool, 1017 mali_ptr src_coords, mali_ptr dst_coords, 1018 mali_ptr textures, mali_ptr samplers, 1019 mali_ptr vpd, mali_ptr tsd, mali_ptr rsd, 1020 void *out) 1021{ 1022 pan_pack(out, DRAW, cfg) { 1023 cfg.four_components_per_vertex = true; 1024 cfg.draw_descriptor_is_64b = true; 1025 cfg.thread_storage = tsd; 1026 cfg.state = rsd; 1027 1028 cfg.position = dst_coords; 1029 pan_blitter_emit_varying(pool, src_coords, &cfg); 1030 cfg.viewport = vpd; 1031 cfg.textures = textures; 1032 cfg.samplers = samplers; 1033 } 1034} 1035 1036static struct panfrost_ptr 1037pan_blit_emit_tiler_job(struct pan_pool *desc_pool, 1038 struct pan_scoreboard *scoreboard, 1039 mali_ptr src_coords, mali_ptr dst_coords, 1040 mali_ptr textures, mali_ptr samplers, 1041 mali_ptr vpd, mali_ptr rsd, mali_ptr tsd, 1042 mali_ptr tiler) 1043{ 1044 struct panfrost_ptr job = 1045 pan_pool_alloc_desc(desc_pool, TILER_JOB); 1046 1047 pan_blit_emit_dcd(desc_pool, 1048 src_coords, dst_coords, textures, samplers, 1049 vpd, tsd, rsd, 1050 pan_section_ptr(job.cpu, TILER_JOB, DRAW)); 1051 1052 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) { 1053 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; 1054 cfg.index_count = 4; 1055 cfg.job_task_split = 6; 1056 } 1057 1058 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) { 1059 cfg.constant = 1.0f; 1060 } 1061 1062 void *invoc = pan_section_ptr(job.cpu, 1063 TILER_JOB, 1064 INVOCATION); 1065 panfrost_pack_work_groups_compute(invoc, 1, 4, 1066 1, 1, 1, 1, true, false); 1067 1068#if PAN_ARCH >= 6 1069 pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg); 1070 pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) { 1071 cfg.address = tiler; 1072 } 1073#endif 1074 1075 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER, 1076 false, false, 0, 0, &job, false); 1077 return job; 1078} 1079 1080#if PAN_ARCH >= 6 1081static void 1082pan_preload_fb_alloc_pre_post_dcds(struct pan_pool *desc_pool, 1083 struct pan_fb_info *fb) 1084{ 1085 if (fb->bifrost.pre_post.dcds.gpu) 1086 return; 1087 1088 fb->bifrost.pre_post.dcds = 1089 pan_pool_alloc_desc_array(desc_pool, 3, DRAW); 1090} 1091 1092static void 1093pan_preload_emit_pre_frame_dcd(struct pan_pool *desc_pool, 1094 struct pan_fb_info *fb, bool zs, 1095 mali_ptr coords, mali_ptr rsd, 1096 mali_ptr tsd) 1097{ 1098 unsigned dcd_idx = zs ? 0 : 1; 1099 pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb); 1100 assert(fb->bifrost.pre_post.dcds.cpu); 1101 void *dcd = fb->bifrost.pre_post.dcds.cpu + 1102 (dcd_idx * pan_size(DRAW)); 1103 1104 int crc_rt = GENX(pan_select_crc_rt)(fb); 1105 1106 bool always_write = false; 1107 1108 /* If CRC data is currently invalid and this batch will make it valid, 1109 * write even clean tiles to make sure CRC data is updated. */ 1110 if (crc_rt >= 0) { 1111 bool *valid = fb->rts[crc_rt].crc_valid; 1112 bool full = !fb->extent.minx && !fb->extent.miny && 1113 fb->extent.maxx == (fb->width - 1) && 1114 fb->extent.maxy == (fb->height - 1); 1115 1116 if (full && !(*valid)) 1117 always_write = true; 1118 } 1119 1120 pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd, dcd, always_write); 1121 if (zs) { 1122 enum pipe_format fmt = fb->zs.view.zs ? 1123 fb->zs.view.zs->image->layout.format : 1124 fb->zs.view.s->image->layout.format; 1125 bool always = false; 1126 1127 /* If we're dealing with a combined ZS resource and only one 1128 * component is cleared, we need to reload the whole surface 1129 * because the zs_clean_pixel_write_enable flag is set in that 1130 * case. 1131 */ 1132 if (util_format_is_depth_and_stencil(fmt) && 1133 fb->zs.clear.z != fb->zs.clear.s) 1134 always = true; 1135 1136 /* We could use INTERSECT on Bifrost v7 too, but 1137 * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile 1138 * buffer one or more tiles ahead, making ZS data immediately 1139 * available for any ZS tests taking place in other shaders. 1140 * Thing's haven't been benchmarked to determine what's 1141 * preferable (saving bandwidth vs having ZS preloaded 1142 * earlier), so let's leave it like that for now. 1143 */ 1144 fb->bifrost.pre_post.modes[dcd_idx] = 1145 desc_pool->dev->arch > 6 ? 1146 MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS : 1147 always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS : 1148 MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT; 1149 } else { 1150 fb->bifrost.pre_post.modes[dcd_idx] = 1151 always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS : 1152 MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT; 1153 } 1154} 1155#else 1156static struct panfrost_ptr 1157pan_preload_emit_tiler_job(struct pan_pool *desc_pool, 1158 struct pan_scoreboard *scoreboard, 1159 struct pan_fb_info *fb, bool zs, 1160 mali_ptr coords, mali_ptr rsd, mali_ptr tsd) 1161{ 1162 struct panfrost_ptr job = 1163 pan_pool_alloc_desc(desc_pool, TILER_JOB); 1164 1165 pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd, 1166 pan_section_ptr(job.cpu, TILER_JOB, DRAW), 1167 false); 1168 1169 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) { 1170 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; 1171 cfg.index_count = 4; 1172 cfg.job_task_split = 6; 1173 } 1174 1175 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) { 1176 cfg.constant = 1.0f; 1177 } 1178 1179 void *invoc = pan_section_ptr(job.cpu, 1180 TILER_JOB, 1181 INVOCATION); 1182 panfrost_pack_work_groups_compute(invoc, 1, 4, 1183 1, 1, 1, 1, true, false); 1184 1185 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER, 1186 false, false, 0, 0, &job, true); 1187 return job; 1188} 1189#endif 1190 1191static struct panfrost_ptr 1192pan_preload_fb_part(struct pan_pool *pool, 1193 struct pan_scoreboard *scoreboard, 1194 struct pan_fb_info *fb, bool zs, 1195 mali_ptr coords, mali_ptr tsd, mali_ptr tiler) 1196{ 1197 struct panfrost_device *dev = pool->dev; 1198 mali_ptr rsd = pan_preload_get_rsd(dev, fb, zs); 1199 struct panfrost_ptr job = { 0 }; 1200 1201#if PAN_ARCH >= 6 1202 pan_preload_emit_pre_frame_dcd(pool, fb, zs, 1203 coords, rsd, tsd); 1204#else 1205 job = pan_preload_emit_tiler_job(pool, scoreboard, 1206 fb, zs, coords, rsd, tsd); 1207#endif 1208 return job; 1209} 1210 1211unsigned 1212GENX(pan_preload_fb)(struct pan_pool *pool, 1213 struct pan_scoreboard *scoreboard, 1214 struct pan_fb_info *fb, 1215 mali_ptr tsd, mali_ptr tiler, 1216 struct panfrost_ptr *jobs) 1217{ 1218 bool preload_zs = pan_preload_needed(fb, true); 1219 bool preload_rts = pan_preload_needed(fb, false); 1220 mali_ptr coords; 1221 1222 if (!preload_zs && !preload_rts) 1223 return 0; 1224 1225 float rect[] = { 1226 0.0, 0.0, 0.0, 1.0, 1227 fb->width, 0.0, 0.0, 1.0, 1228 0.0, fb->height, 0.0, 1.0, 1229 fb->width, fb->height, 0.0, 1.0, 1230 }; 1231 1232 coords = pan_pool_upload_aligned(pool, rect, 1233 sizeof(rect), 64); 1234 1235 unsigned njobs = 0; 1236 if (preload_zs) { 1237 struct panfrost_ptr job = 1238 pan_preload_fb_part(pool, scoreboard, fb, true, 1239 coords, tsd, tiler); 1240 if (jobs && job.cpu) 1241 jobs[njobs++] = job; 1242 } 1243 1244 if (preload_rts) { 1245 struct panfrost_ptr job = 1246 pan_preload_fb_part(pool, scoreboard, fb, false, 1247 coords, tsd, tiler); 1248 if (jobs && job.cpu) 1249 jobs[njobs++] = job; 1250 } 1251 1252 return njobs; 1253} 1254 1255void 1256GENX(pan_blit_ctx_init)(struct panfrost_device *dev, 1257 const struct pan_blit_info *info, 1258 struct pan_pool *blit_pool, 1259 struct pan_blit_context *ctx) 1260{ 1261 memset(ctx, 0, sizeof(*ctx)); 1262 1263 struct pan_image_view sviews[2] = { 1264 { 1265 .format = info->src.planes[0].format, 1266 .image = info->src.planes[0].image, 1267 .dim = info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE ? 1268 MALI_TEXTURE_DIMENSION_2D : info->src.planes[0].image->layout.dim, 1269 .first_level = info->src.level, 1270 .last_level = info->src.level, 1271 .first_layer = info->src.start.layer, 1272 .last_layer = info->src.end.layer, 1273 .swizzle = { 1274 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 1275 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W, 1276 }, 1277 }, 1278 }; 1279 1280 struct pan_image_view dview = { 1281 .format = info->dst.planes[0].format, 1282 .image = info->dst.planes[0].image, 1283 .dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1284 MALI_TEXTURE_DIMENSION_1D : MALI_TEXTURE_DIMENSION_2D, 1285 .first_level = info->dst.level, 1286 .last_level = info->dst.level, 1287 .first_layer = info->dst.start.layer, 1288 .last_layer = info->dst.start.layer, 1289 .swizzle = { 1290 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 1291 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W, 1292 }, 1293 }; 1294 1295 ctx->src.start.x = info->src.start.x; 1296 ctx->src.start.y = info->src.start.y; 1297 ctx->src.end.x = info->src.end.x; 1298 ctx->src.end.y = info->src.end.y; 1299 ctx->src.dim = sviews[0].dim; 1300 1301 if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) { 1302 unsigned max_z = u_minify(info->dst.planes[0].image->layout.depth, info->dst.level) - 1; 1303 1304 ctx->z_scale = (float)(info->src.end.z - info->src.start.z) / 1305 (info->dst.end.z - info->dst.start.z); 1306 assert(info->dst.start.z != info->dst.end.z); 1307 if (info->dst.start.z > info->dst.end.z) { 1308 ctx->dst.cur_layer = info->dst.start.z - 1; 1309 ctx->dst.last_layer = info->dst.end.z; 1310 } else { 1311 ctx->dst.cur_layer = info->dst.start.z; 1312 ctx->dst.last_layer = info->dst.end.z - 1; 1313 } 1314 ctx->dst.cur_layer = MIN2(MAX2(ctx->dst.cur_layer, 0), max_z); 1315 ctx->dst.last_layer = MIN2(MAX2(ctx->dst.last_layer, 0), max_z); 1316 ctx->dst.layer_offset = ctx->dst.cur_layer; 1317 } else { 1318 unsigned max_layer = info->dst.planes[0].image->layout.array_size - 1; 1319 ctx->dst.layer_offset = info->dst.start.layer; 1320 ctx->dst.cur_layer = info->dst.start.layer; 1321 ctx->dst.last_layer = MIN2(info->dst.end.layer, max_layer); 1322 ctx->z_scale = 1; 1323 } 1324 1325 if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D) { 1326 if (info->src.start.z < info->src.end.z) 1327 ctx->src.z_offset = info->src.start.z + fabs(ctx->z_scale * 0.5f); 1328 else 1329 ctx->src.z_offset = info->src.start.z - fabs(ctx->z_scale * 0.5f); 1330 } else { 1331 ctx->src.layer_offset = info->src.start.layer; 1332 } 1333 1334 /* Split depth and stencil */ 1335 if (util_format_is_depth_and_stencil(sviews[0].format)) { 1336 sviews[1] = sviews[0]; 1337 sviews[0].format = util_format_get_depth_only(sviews[0].format); 1338 sviews[1].format = util_format_stencil_only(sviews[1].format); 1339 } else if (info->src.planes[1].format) { 1340 sviews[1] = sviews[0]; 1341 sviews[1].format = info->src.planes[1].format; 1342 sviews[1].image = info->src.planes[1].image; 1343 } 1344 1345 ctx->rsd = pan_blit_get_rsd(dev, sviews, &dview); 1346 1347 ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1; 1348 1349 assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1)); 1350 1351 unsigned dst_w = u_minify(info->dst.planes[0].image->layout.width, info->dst.level); 1352 unsigned dst_h = u_minify(info->dst.planes[0].image->layout.height, info->dst.level); 1353 unsigned maxx = MIN2(MAX2(info->dst.start.x, info->dst.end.x), dst_w - 1); 1354 unsigned maxy = MIN2(MAX2(info->dst.start.y, info->dst.end.y), dst_h - 1); 1355 unsigned minx = MAX2(MIN3(info->dst.start.x, info->dst.end.x, maxx), 0); 1356 unsigned miny = MAX2(MIN3(info->dst.start.y, info->dst.end.y, maxy), 0); 1357 1358 if (info->scissor.enable) { 1359 minx = MAX2(minx, info->scissor.minx); 1360 miny = MAX2(miny, info->scissor.miny); 1361 maxx = MIN2(maxx, info->scissor.maxx); 1362 maxy = MIN2(maxy, info->scissor.maxy); 1363 } 1364 1365 const struct pan_image_view *sview_ptrs[] = { &sviews[0], &sviews[1] }; 1366 unsigned nviews = sviews[1].format ? 2 : 1; 1367 1368 ctx->textures = pan_blitter_emit_textures(blit_pool, nviews, sview_ptrs); 1369 ctx->samplers = pan_blitter_emit_sampler(blit_pool, info->nearest); 1370 1371 ctx->vpd = pan_blitter_emit_viewport(blit_pool, 1372 minx, miny, maxx, maxy); 1373 1374 float dst_rect[] = { 1375 info->dst.start.x, info->dst.start.y, 0.0, 1.0, 1376 info->dst.end.x, info->dst.start.y, 0.0, 1.0, 1377 info->dst.start.x, info->dst.end.y, 0.0, 1.0, 1378 info->dst.end.x, info->dst.end.y, 0.0, 1.0, 1379 }; 1380 1381 ctx->position = 1382 pan_pool_upload_aligned(blit_pool, dst_rect, 1383 sizeof(dst_rect), 64); 1384} 1385 1386struct panfrost_ptr 1387GENX(pan_blit)(struct pan_blit_context *ctx, 1388 struct pan_pool *pool, 1389 struct pan_scoreboard *scoreboard, 1390 mali_ptr tsd, mali_ptr tiler) 1391{ 1392 if (ctx->dst.cur_layer < 0 || 1393 (ctx->dst.last_layer >= ctx->dst.layer_offset && 1394 ctx->dst.cur_layer > ctx->dst.last_layer) || 1395 (ctx->dst.last_layer < ctx->dst.layer_offset && 1396 ctx->dst.cur_layer < ctx->dst.last_layer)) 1397 return (struct panfrost_ptr){ 0 }; 1398 1399 int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset; 1400 float src_z; 1401 if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D) 1402 src_z = (ctx->z_scale * layer) + ctx->src.z_offset; 1403 else 1404 src_z = ctx->src.layer_offset + layer; 1405 1406 float src_rect[] = { 1407 ctx->src.start.x, ctx->src.start.y, src_z, 1.0, 1408 ctx->src.end.x, ctx->src.start.y, src_z, 1.0, 1409 ctx->src.start.x, ctx->src.end.y, src_z, 1.0, 1410 ctx->src.end.x, ctx->src.end.y, src_z, 1.0, 1411 }; 1412 1413 mali_ptr src_coords = 1414 pan_pool_upload_aligned(pool, src_rect, 1415 sizeof(src_rect), 64); 1416 1417 return pan_blit_emit_tiler_job(pool, scoreboard, 1418 src_coords, ctx->position, 1419 ctx->textures, ctx->samplers, 1420 ctx->vpd, ctx->rsd, tsd, tiler); 1421} 1422 1423static uint32_t pan_blit_shader_key_hash(const void *key) 1424{ 1425 return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key)); 1426} 1427 1428static bool pan_blit_shader_key_equal(const void *a, const void *b) 1429{ 1430 return !memcmp(a, b, sizeof(struct pan_blit_shader_key)); 1431} 1432 1433static uint32_t pan_blit_blend_shader_key_hash(const void *key) 1434{ 1435 return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key)); 1436} 1437 1438static bool pan_blit_blend_shader_key_equal(const void *a, const void *b) 1439{ 1440 return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key)); 1441} 1442 1443static uint32_t pan_blit_rsd_key_hash(const void *key) 1444{ 1445 return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key)); 1446} 1447 1448static bool pan_blit_rsd_key_equal(const void *a, const void *b) 1449{ 1450 return !memcmp(a, b, sizeof(struct pan_blit_rsd_key)); 1451} 1452 1453static void 1454pan_blitter_prefill_blit_shader_cache(struct panfrost_device *dev) 1455{ 1456 static const struct pan_blit_shader_key prefill[] = { 1457 { 1458 .surfaces[0] = { 1459 .loc = FRAG_RESULT_DEPTH, 1460 .type = nir_type_float32, 1461 .dim = MALI_TEXTURE_DIMENSION_2D, 1462 .src_samples = 1, 1463 .dst_samples = 1, 1464 }, 1465 }, 1466 { 1467 .surfaces[1] = { 1468 .loc = FRAG_RESULT_STENCIL, 1469 .type = nir_type_uint32, 1470 .dim = MALI_TEXTURE_DIMENSION_2D, 1471 .src_samples = 1, 1472 .dst_samples = 1, 1473 }, 1474 }, 1475 { 1476 .surfaces[0] = { 1477 .loc = FRAG_RESULT_DATA0, 1478 .type = nir_type_float32, 1479 .dim = MALI_TEXTURE_DIMENSION_2D, 1480 .src_samples = 1, 1481 .dst_samples = 1, 1482 }, 1483 }, 1484 }; 1485 1486 for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++) 1487 pan_blitter_get_blit_shader(dev, &prefill[i]); 1488} 1489 1490void 1491GENX(pan_blitter_init)(struct panfrost_device *dev, 1492 struct pan_pool *bin_pool, 1493 struct pan_pool *desc_pool) 1494{ 1495 dev->blitter.shaders.blit = 1496 _mesa_hash_table_create(NULL, pan_blit_shader_key_hash, 1497 pan_blit_shader_key_equal); 1498 dev->blitter.shaders.blend = 1499 _mesa_hash_table_create(NULL, pan_blit_blend_shader_key_hash, 1500 pan_blit_blend_shader_key_equal); 1501 dev->blitter.shaders.pool = bin_pool; 1502 pthread_mutex_init(&dev->blitter.shaders.lock, NULL); 1503 pan_blitter_prefill_blit_shader_cache(dev); 1504 1505 dev->blitter.rsds.pool = desc_pool; 1506 dev->blitter.rsds.rsds = 1507 _mesa_hash_table_create(NULL, pan_blit_rsd_key_hash, 1508 pan_blit_rsd_key_equal); 1509 pthread_mutex_init(&dev->blitter.rsds.lock, NULL); 1510} 1511 1512void 1513GENX(pan_blitter_cleanup)(struct panfrost_device *dev) 1514{ 1515 _mesa_hash_table_destroy(dev->blitter.shaders.blit, NULL); 1516 _mesa_hash_table_destroy(dev->blitter.shaders.blend, NULL); 1517 pthread_mutex_destroy(&dev->blitter.shaders.lock); 1518 _mesa_hash_table_destroy(dev->blitter.rsds.rsds, NULL); 1519 pthread_mutex_destroy(&dev->blitter.rsds.lock); 1520} 1521