pan_cs.c revision 7ec681f3
1/* 2 * Copyright (C) 2021 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 * Boris Brezillon <boris.brezillon@collabora.com> 26 */ 27 28#include "util/macros.h" 29 30#include "panfrost-quirks.h" 31 32#include "pan_cs.h" 33#include "pan_encoder.h" 34#include "pan_texture.h" 35 36static unsigned 37mod_to_block_fmt(uint64_t mod) 38{ 39 switch (mod) { 40 case DRM_FORMAT_MOD_LINEAR: 41 return MALI_BLOCK_FORMAT_LINEAR; 42 case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED: 43 return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; 44 default: 45#if PAN_ARCH >= 5 46 if (drm_is_afbc(mod)) 47 return MALI_BLOCK_FORMAT_AFBC; 48#endif 49 50 unreachable("Unsupported modifer"); 51 } 52} 53 54static enum mali_msaa 55mali_sampling_mode(const struct pan_image_view *view) 56{ 57 if (view->image->layout.nr_samples > 1) { 58 assert(view->nr_samples == view->image->layout.nr_samples); 59 assert(view->image->layout.slices[0].surface_stride != 0); 60 return MALI_MSAA_LAYERED; 61 } 62 63 if (view->nr_samples > view->image->layout.nr_samples) { 64 assert(view->image->layout.nr_samples == 1); 65 return MALI_MSAA_AVERAGE; 66 } 67 68 assert(view->nr_samples == view->image->layout.nr_samples); 69 assert(view->nr_samples == 1); 70 71 return MALI_MSAA_SINGLE; 72} 73 74static inline enum mali_sample_pattern 75pan_sample_pattern(unsigned samples) 76{ 77 switch (samples) { 78 case 1: return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED; 79 case 4: return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID; 80 case 8: return MALI_SAMPLE_PATTERN_D3D_8X_GRID; 81 case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID; 82 default: unreachable("Unsupported sample count"); 83 } 84} 85 86int 87GENX(pan_select_crc_rt)(const struct pan_fb_info *fb) 88{ 89#if PAN_ARCH <= 6 90 if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard && 91 fb->rts[0].view->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) 92 return 0; 93 94 return -1; 95#else 96 bool best_rt_valid = false; 97 int best_rt = -1; 98 99 for (unsigned i = 0; i < fb->rt_count; i++) { 100 if (!fb->rts[i].view || fb->rts[0].discard || 101 fb->rts[i].view->image->layout.crc_mode == PAN_IMAGE_CRC_NONE) 102 continue; 103 104 bool valid = *(fb->rts[i].crc_valid); 105 bool full = !fb->extent.minx && !fb->extent.miny && 106 fb->extent.maxx == (fb->width - 1) && 107 fb->extent.maxy == (fb->height - 1); 108 if (!full && !valid) 109 continue; 110 111 if (best_rt < 0 || (valid && !best_rt_valid)) { 112 best_rt = i; 113 best_rt_valid = valid; 114 } 115 116 if (valid) 117 break; 118 } 119 120 return best_rt; 121#endif 122} 123 124static enum mali_zs_format 125translate_zs_format(enum pipe_format in) 126{ 127 switch (in) { 128 case PIPE_FORMAT_Z16_UNORM: return MALI_ZS_FORMAT_D16; 129 case PIPE_FORMAT_Z24_UNORM_S8_UINT: return MALI_ZS_FORMAT_D24S8; 130 case PIPE_FORMAT_Z24X8_UNORM: return MALI_ZS_FORMAT_D24X8; 131 case PIPE_FORMAT_Z32_FLOAT: return MALI_ZS_FORMAT_D32; 132 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_ZS_FORMAT_D32_S8X24; 133 default: unreachable("Unsupported depth/stencil format."); 134 } 135} 136 137#if PAN_ARCH >= 5 138static enum mali_s_format 139translate_s_format(enum pipe_format in) 140{ 141 switch (in) { 142 case PIPE_FORMAT_S8_UINT: return MALI_S_FORMAT_S8; 143 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 144 case PIPE_FORMAT_S8X24_UINT: 145 return MALI_S_FORMAT_S8X24; 146 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 147 case PIPE_FORMAT_X24S8_UINT: 148 return MALI_S_FORMAT_X24S8; 149 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 150 return MALI_S_FORMAT_X32_S8X24; 151 default: 152 unreachable("Unsupported stencil format."); 153 } 154} 155 156static void 157pan_prepare_s(const struct pan_fb_info *fb, 158 struct MALI_ZS_CRC_EXTENSION *ext) 159{ 160 const struct pan_image_view *s = fb->zs.view.s; 161 162 if (!s) 163 return; 164 165 unsigned level = s->first_level; 166 167 ext->s_msaa = mali_sampling_mode(s); 168 169 struct pan_surface surf; 170 pan_iview_get_surface(s, 0, 0, 0, &surf); 171 172 assert(s->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || 173 s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR); 174 ext->s_writeback_base = surf.data; 175 ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride; 176 ext->s_writeback_surface_stride = 177 (s->image->layout.nr_samples > 1) ? 178 s->image->layout.slices[level].surface_stride : 0; 179 ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier); 180 ext->s_write_format = translate_s_format(s->format); 181} 182 183static void 184pan_prepare_zs(const struct pan_fb_info *fb, 185 struct MALI_ZS_CRC_EXTENSION *ext) 186{ 187 const struct pan_image_view *zs = fb->zs.view.zs; 188 189 if (!zs) 190 return; 191 192 unsigned level = zs->first_level; 193 194 ext->zs_msaa = mali_sampling_mode(zs); 195 196 struct pan_surface surf; 197 pan_iview_get_surface(zs, 0, 0, 0, &surf); 198 199 if (drm_is_afbc(zs->image->layout.modifier)) { 200#if PAN_ARCH >= 6 201 const struct pan_image_slice_layout *slice = &zs->image->layout.slices[level]; 202 203 ext->zs_afbc_row_stride = slice->afbc.row_stride / 204 AFBC_HEADER_BYTES_PER_TILE; 205#else 206 ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC; 207 ext->zs_afbc_body_size = 0x1000; 208 ext->zs_afbc_chunk_size = 9; 209 ext->zs_afbc_sparse = true; 210#endif 211 212 ext->zs_afbc_header = surf.afbc.header; 213 ext->zs_afbc_body = surf.afbc.body; 214 } else { 215 assert(zs->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || 216 zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR); 217 218 /* TODO: Z32F(S8) support, which is always linear */ 219 220 ext->zs_writeback_base = surf.data; 221 ext->zs_writeback_row_stride = 222 zs->image->layout.slices[level].row_stride; 223 ext->zs_writeback_surface_stride = 224 (zs->image->layout.nr_samples > 1) ? 225 zs->image->layout.slices[level].surface_stride : 0; 226 } 227 228 ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier); 229 ext->zs_write_format = translate_zs_format(zs->format); 230 if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8) 231 ext->s_writeback_base = ext->zs_writeback_base; 232} 233 234static void 235pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc, 236 struct MALI_ZS_CRC_EXTENSION *ext) 237{ 238 if (rt_crc < 0) 239 return; 240 241 assert(rt_crc < fb->rt_count); 242 243 const struct pan_image_view *rt = fb->rts[rt_crc].view; 244 const struct pan_image_slice_layout *slice = &rt->image->layout.slices[rt->first_level]; 245 ext->crc_base = (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND ? 246 (rt->image->data.bo->ptr.gpu + rt->image->data.offset) : 247 (rt->image->crc.bo->ptr.gpu + rt->image->crc.offset)) + 248 slice->crc.offset; 249 ext->crc_row_stride = slice->crc.stride; 250 251#if PAN_ARCH >= 7 252 ext->crc_render_target = rt_crc; 253 254 if (fb->rts[rt_crc].clear) { 255 uint32_t clear_val = fb->rts[rt_crc].clear_value[0]; 256 ext->crc_clear_color = clear_val | 0xc000000000000000 | 257 (((uint64_t)clear_val & 0xffff) << 32); 258 } 259#endif 260} 261 262static void 263pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc, 264 void *zs_crc_ext) 265{ 266 pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) { 267 pan_prepare_crc(fb, rt_crc, &cfg); 268 cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s; 269 pan_prepare_zs(fb, &cfg); 270 pan_prepare_s(fb, &cfg); 271 } 272} 273 274/* Measure format as it appears in the tile buffer */ 275 276static unsigned 277pan_bytes_per_pixel_tib(enum pipe_format format) 278{ 279 if (panfrost_blendable_formats_v7[format].internal) { 280 /* Blendable formats are always 32-bits in the tile buffer, 281 * extra bits are used as padding or to dither */ 282 return 4; 283 } else { 284 /* Non-blendable formats are raw, rounded up to the nearest 285 * power-of-two size */ 286 unsigned bytes = util_format_get_blocksize(format); 287 return util_next_power_of_two(bytes); 288 } 289} 290 291static unsigned 292pan_internal_cbuf_size(const struct pan_fb_info *fb, 293 unsigned *tile_size) 294{ 295 unsigned total_size = 0; 296 297 *tile_size = 16 * 16; 298 for (int cb = 0; cb < fb->rt_count; ++cb) { 299 const struct pan_image_view *rt = fb->rts[cb].view; 300 301 if (!rt) 302 continue; 303 304 total_size += pan_bytes_per_pixel_tib(rt->format) * 305 rt->nr_samples * (*tile_size); 306 } 307 308 /* We have a 4KB budget, let's reduce the tile size until it fits. */ 309 while (total_size > 4096) { 310 total_size >>= 1; 311 *tile_size >>= 1; 312 } 313 314 /* Align on 1k. */ 315 total_size = ALIGN_POT(total_size, 1024); 316 317 /* Minimum tile size is 4x4. */ 318 assert(*tile_size >= 4 * 4); 319 return total_size; 320} 321 322static enum mali_color_format 323pan_mfbd_raw_format(unsigned bits) 324{ 325 switch (bits) { 326 case 8: return MALI_COLOR_FORMAT_RAW8; 327 case 16: return MALI_COLOR_FORMAT_RAW16; 328 case 24: return MALI_COLOR_FORMAT_RAW24; 329 case 32: return MALI_COLOR_FORMAT_RAW32; 330 case 48: return MALI_COLOR_FORMAT_RAW48; 331 case 64: return MALI_COLOR_FORMAT_RAW64; 332 case 96: return MALI_COLOR_FORMAT_RAW96; 333 case 128: return MALI_COLOR_FORMAT_RAW128; 334 case 192: return MALI_COLOR_FORMAT_RAW192; 335 case 256: return MALI_COLOR_FORMAT_RAW256; 336 case 384: return MALI_COLOR_FORMAT_RAW384; 337 case 512: return MALI_COLOR_FORMAT_RAW512; 338 case 768: return MALI_COLOR_FORMAT_RAW768; 339 case 1024: return MALI_COLOR_FORMAT_RAW1024; 340 case 1536: return MALI_COLOR_FORMAT_RAW1536; 341 case 2048: return MALI_COLOR_FORMAT_RAW2048; 342 default: unreachable("invalid raw bpp"); 343 } 344} 345 346static void 347pan_rt_init_format(const struct pan_image_view *rt, 348 struct MALI_RENDER_TARGET *cfg) 349{ 350 /* Explode details on the format */ 351 352 const struct util_format_description *desc = 353 util_format_description(rt->format); 354 355 /* The swizzle for rendering is inverted from texturing */ 356 357 unsigned char swizzle[4] = { 358 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W, 359 }; 360 361 /* Fill in accordingly, defaulting to 8-bit UNORM */ 362 363 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 364 cfg->srgb = true; 365 366 struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format]; 367 368 if (fmt.internal) { 369 cfg->internal_format = fmt.internal; 370 cfg->writeback_format = fmt.writeback; 371 panfrost_invert_swizzle(desc->swizzle, swizzle); 372 } else { 373 /* Construct RAW internal/writeback, where internal is 374 * specified logarithmically (round to next power-of-two). 375 * Offset specified from RAW8, where 8 = 2^3 */ 376 377 unsigned bits = desc->block.bits; 378 unsigned offset = util_logbase2_ceil(bits) - 3; 379 assert(offset <= 4); 380 381 cfg->internal_format = 382 MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset; 383 384 cfg->writeback_format = pan_mfbd_raw_format(bits); 385 } 386 387 cfg->swizzle = panfrost_translate_swizzle_4(swizzle); 388} 389 390static void 391pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx, 392 unsigned cbuf_offset, 393 struct MALI_RENDER_TARGET *cfg) 394{ 395 cfg->clean_pixel_write_enable = fb->rts[idx].clear; 396 cfg->internal_buffer_offset = cbuf_offset; 397 if (fb->rts[idx].clear) { 398 cfg->clear.color_0 = fb->rts[idx].clear_value[0]; 399 cfg->clear.color_1 = fb->rts[idx].clear_value[1]; 400 cfg->clear.color_2 = fb->rts[idx].clear_value[2]; 401 cfg->clear.color_3 = fb->rts[idx].clear_value[3]; 402 } 403 404 const struct pan_image_view *rt = fb->rts[idx].view; 405 if (!rt || fb->rts[idx].discard) { 406 cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8; 407 cfg->internal_buffer_offset = cbuf_offset; 408#if PAN_ARCH >= 7 409 cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; 410 cfg->dithering_enable = true; 411#endif 412 return; 413 } 414 415 cfg->write_enable = true; 416 cfg->dithering_enable = true; 417 418 unsigned level = rt->first_level; 419 assert(rt->last_level == rt->first_level); 420 assert(rt->last_layer == rt->first_layer); 421 422 int row_stride = rt->image->layout.slices[level].row_stride; 423 424 /* Only set layer_stride for layered MSAA rendering */ 425 426 unsigned layer_stride = 427 (rt->image->layout.nr_samples > 1) ? 428 rt->image->layout.slices[level].surface_stride : 0; 429 430 cfg->writeback_msaa = mali_sampling_mode(rt); 431 432 pan_rt_init_format(rt, cfg); 433 434#if PAN_ARCH <= 5 435 cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier); 436#else 437 cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier); 438#endif 439 440 struct pan_surface surf; 441 pan_iview_get_surface(rt, 0, 0, 0, &surf); 442 443 if (drm_is_afbc(rt->image->layout.modifier)) { 444 const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level]; 445 446#if PAN_ARCH >= 6 447 cfg->afbc.row_stride = slice->afbc.row_stride / 448 AFBC_HEADER_BYTES_PER_TILE; 449 cfg->afbc.afbc_wide_block_enable = 450 panfrost_block_dim(rt->image->layout.modifier, true, 0) > 16; 451#else 452 cfg->afbc.chunk_size = 9; 453 cfg->afbc.sparse = true; 454 cfg->afbc.body_size = slice->afbc.body_size; 455#endif 456 457 cfg->afbc.header = surf.afbc.header; 458 cfg->afbc.body = surf.afbc.body; 459 460 if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR) 461 cfg->afbc.yuv_transform_enable = true; 462 } else { 463 assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR || 464 rt->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED); 465 cfg->rgb.base = surf.data; 466 cfg->rgb.row_stride = row_stride; 467 cfg->rgb.surface_stride = layer_stride; 468 } 469} 470#endif 471 472void 473GENX(pan_emit_tls)(const struct pan_tls_info *info, 474 void *out) 475{ 476 pan_pack(out, LOCAL_STORAGE, cfg) { 477 if (info->tls.size) { 478 unsigned shift = 479 panfrost_get_stack_shift(info->tls.size); 480 481 cfg.tls_size = shift; 482 cfg.tls_base_pointer = info->tls.ptr; 483 } 484 485 if (info->wls.size) { 486 assert(!(info->wls.ptr & 4095)); 487 assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL)); 488 cfg.wls_base_pointer = info->wls.ptr; 489 unsigned wls_size = pan_wls_adjust_size(info->wls.size); 490 cfg.wls_instances = pan_wls_instances(&info->wls.dim); 491 cfg.wls_size_scale = util_logbase2(wls_size) + 1; 492 } else { 493 cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; 494 } 495 } 496} 497 498#if PAN_ARCH <= 5 499static void 500pan_emit_midgard_tiler(const struct panfrost_device *dev, 501 const struct pan_fb_info *fb, 502 const struct pan_tiler_context *tiler_ctx, 503 void *out) 504{ 505 bool hierarchy = !(dev->quirks & MIDGARD_NO_HIER_TILING); 506 507 assert(tiler_ctx->midgard.polygon_list->ptr.gpu); 508 509 pan_pack(out, TILER_CONTEXT, cfg) { 510 unsigned header_size; 511 512 if (tiler_ctx->midgard.disable) { 513 cfg.hierarchy_mask = 514 hierarchy ? 515 MALI_MIDGARD_TILER_DISABLED : 516 MALI_MIDGARD_TILER_USER; 517 header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE; 518 cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4); 519 cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu; 520 cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu; 521 } else { 522 cfg.hierarchy_mask = 523 panfrost_choose_hierarchy_mask(fb->width, 524 fb->height, 525 1, hierarchy); 526 header_size = panfrost_tiler_header_size(fb->width, 527 fb->height, 528 cfg.hierarchy_mask, 529 hierarchy); 530 cfg.polygon_list_size = 531 panfrost_tiler_full_size(fb->width, fb->height, 532 cfg.hierarchy_mask, 533 hierarchy); 534 cfg.heap_start = dev->tiler_heap->ptr.gpu; 535 cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size; 536 } 537 538 cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu; 539 cfg.polygon_list_body = cfg.polygon_list + header_size; 540 } 541} 542#endif 543 544#if PAN_ARCH >= 5 545static void 546pan_emit_rt(const struct pan_fb_info *fb, 547 unsigned idx, unsigned cbuf_offset, void *out) 548{ 549 pan_pack(out, RENDER_TARGET, cfg) { 550 pan_prepare_rt(fb, idx, cbuf_offset, &cfg); 551 } 552} 553 554#if PAN_ARCH >= 6 555/* All Bifrost and Valhall GPUs are affected by issue TSIX-2033: 556 * 557 * Forcing clean_tile_writes breaks INTERSECT readbacks 558 * 559 * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if 560 * clean tile writes is forced. Since INTERSECT is a hint that the hardware may 561 * ignore, this cannot affect correctness, only performance */ 562 563static enum mali_pre_post_frame_shader_mode 564pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_clean_tile) 565{ 566 if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT) 567 return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS; 568 else 569 return mode; 570} 571 572/* Regardless of clean_tile_write_enable, the hardware writes clean tiles if 573 * the effective tile size differs from the superblock size of any enabled AFBC 574 * render target. Check this condition. */ 575 576static bool 577pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size) 578{ 579 if (!drm_is_afbc(rt->image->layout.modifier)) 580 return false; 581 582 unsigned superblock = panfrost_block_dim(rt->image->layout.modifier, true, 0); 583 584 assert(superblock >= 16); 585 assert(tile_size <= 16*16); 586 587 /* Tile size and superblock differ unless they are both 16x16 */ 588 return !(superblock == 16 && tile_size == 16*16); 589} 590 591static bool 592pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size) 593{ 594 /* Maximum tile size */ 595 assert(tile_size <= 16*16); 596 597 for (unsigned i = 0; i < fb->rt_count; ++i) { 598 if (fb->rts[i].view && !fb->rts[i].discard && 599 pan_force_clean_write_rt(fb->rts[i].view, tile_size)) 600 return true; 601 } 602 603 if (fb->zs.view.zs && !fb->zs.discard.z && 604 pan_force_clean_write_rt(fb->zs.view.zs, tile_size)) 605 return true; 606 607 if (fb->zs.view.s && !fb->zs.discard.s && 608 pan_force_clean_write_rt(fb->zs.view.s, tile_size)) 609 return true; 610 611 return false; 612} 613 614#endif 615 616static unsigned 617pan_emit_mfbd(const struct panfrost_device *dev, 618 const struct pan_fb_info *fb, 619 const struct pan_tls_info *tls, 620 const struct pan_tiler_context *tiler_ctx, 621 void *out) 622{ 623 unsigned tags = MALI_FBD_TAG_IS_MFBD; 624 void *fbd = out; 625 void *rtd = out + pan_size(FRAMEBUFFER); 626 627#if PAN_ARCH <= 5 628 GENX(pan_emit_tls)(tls, 629 pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE)); 630#endif 631 632 unsigned tile_size; 633 unsigned internal_cbuf_size = pan_internal_cbuf_size(fb, &tile_size); 634 int crc_rt = GENX(pan_select_crc_rt)(fb); 635 bool has_zs_crc_ext = pan_fbd_has_zs_crc_ext(fb); 636 637 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) { 638#if PAN_ARCH >= 6 639 bool force_clean_write = pan_force_clean_write(fb, tile_size); 640 641 cfg.sample_locations = 642 panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples)); 643 cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], force_clean_write); 644 cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], force_clean_write); 645 cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2], force_clean_write); 646 cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu; 647 cfg.tiler = tiler_ctx->bifrost; 648#endif 649 cfg.width = fb->width; 650 cfg.height = fb->height; 651 cfg.bound_max_x = fb->width - 1; 652 cfg.bound_max_y = fb->height - 1; 653 654 cfg.effective_tile_size = tile_size; 655 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT; 656 cfg.render_target_count = MAX2(fb->rt_count, 1); 657 658 /* Default to 24 bit depth if there's no surface. */ 659 cfg.z_internal_format = 660 fb->zs.view.zs ? 661 panfrost_get_z_internal_format(fb->zs.view.zs->format) : 662 MALI_Z_INTERNAL_FORMAT_D24; 663 664 cfg.z_clear = fb->zs.clear_value.depth; 665 cfg.s_clear = fb->zs.clear_value.stencil; 666 cfg.color_buffer_allocation = internal_cbuf_size; 667 cfg.sample_count = fb->nr_samples; 668 cfg.sample_pattern = pan_sample_pattern(fb->nr_samples); 669 cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z); 670 cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s); 671 cfg.has_zs_crc_extension = has_zs_crc_ext; 672 673 if (crc_rt >= 0) { 674 bool *valid = fb->rts[crc_rt].crc_valid; 675 bool full = !fb->extent.minx && !fb->extent.miny && 676 fb->extent.maxx == (fb->width - 1) && 677 fb->extent.maxy == (fb->height - 1); 678 679 cfg.crc_read_enable = *valid; 680 681 /* If the data is currently invalid, still write CRC 682 * data if we are doing a full write, so that it is 683 * valid for next time. */ 684 cfg.crc_write_enable = *valid || full; 685 686 *valid |= full; 687 } 688 } 689 690#if PAN_ARCH >= 6 691 pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding); 692#else 693 pan_emit_midgard_tiler(dev, fb, tiler_ctx, 694 pan_section_ptr(fbd, FRAMEBUFFER, TILER)); 695 696 /* All weights set to 0, nothing to do here */ 697 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w); 698#endif 699 700 if (has_zs_crc_ext) { 701 pan_emit_zs_crc_ext(fb, crc_rt, 702 out + pan_size(FRAMEBUFFER)); 703 rtd += pan_size(ZS_CRC_EXTENSION); 704 tags |= MALI_FBD_TAG_HAS_ZS_RT; 705 } 706 707 unsigned rt_count = MAX2(fb->rt_count, 1); 708 unsigned cbuf_offset = 0; 709 for (unsigned i = 0; i < rt_count; i++) { 710 pan_emit_rt(fb, i, cbuf_offset, rtd); 711 rtd += pan_size(RENDER_TARGET); 712 if (!fb->rts[i].view) 713 continue; 714 715 cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) * 716 tile_size * fb->rts[i].view->image->layout.nr_samples; 717 718 if (i != crc_rt) 719 *(fb->rts[i].crc_valid) = false; 720 } 721 tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2; 722 723 return tags; 724} 725#else /* PAN_ARCH == 4 */ 726static void 727pan_emit_sfbd_tiler(const struct panfrost_device *dev, 728 const struct pan_fb_info *fb, 729 const struct pan_tiler_context *ctx, 730 void *fbd) 731{ 732 pan_emit_midgard_tiler(dev, fb, ctx, 733 pan_section_ptr(fbd, FRAMEBUFFER, TILER)); 734 735 /* All weights set to 0, nothing to do here */ 736 pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding); 737 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w); 738} 739 740static void 741pan_emit_sfbd(const struct panfrost_device *dev, 742 const struct pan_fb_info *fb, 743 const struct pan_tls_info *tls, 744 const struct pan_tiler_context *tiler_ctx, 745 void *fbd) 746{ 747 GENX(pan_emit_tls)(tls, 748 pan_section_ptr(fbd, FRAMEBUFFER, 749 LOCAL_STORAGE)); 750 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) { 751 cfg.bound_max_x = fb->width - 1; 752 cfg.bound_max_y = fb->height - 1; 753 cfg.dithering_enable = true; 754 cfg.clean_pixel_write_enable = true; 755 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT; 756 if (fb->rts[0].clear) { 757 cfg.clear_color_0 = fb->rts[0].clear_value[0]; 758 cfg.clear_color_1 = fb->rts[0].clear_value[1]; 759 cfg.clear_color_2 = fb->rts[0].clear_value[2]; 760 cfg.clear_color_3 = fb->rts[0].clear_value[3]; 761 } 762 763 if (fb->zs.clear.z) 764 cfg.z_clear = fb->zs.clear_value.depth; 765 766 if (fb->zs.clear.s) 767 cfg.s_clear = fb->zs.clear_value.stencil; 768 769 if (fb->rt_count && fb->rts[0].view) { 770 const struct pan_image_view *rt = fb->rts[0].view; 771 772 const struct util_format_description *desc = 773 util_format_description(rt->format); 774 775 /* The swizzle for rendering is inverted from texturing */ 776 unsigned char swizzle[4]; 777 panfrost_invert_swizzle(desc->swizzle, swizzle); 778 cfg.swizzle = panfrost_translate_swizzle_4(swizzle); 779 780 struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format]; 781 if (fmt.internal) { 782 cfg.internal_format = fmt.internal; 783 cfg.color_writeback_format = fmt.writeback; 784 } else { 785 unreachable("raw formats not finished for SFBD"); 786 } 787 788 unsigned level = rt->first_level; 789 struct pan_surface surf; 790 791 pan_iview_get_surface(rt, 0, 0, 0, &surf); 792 793 cfg.color_write_enable = !fb->rts[0].discard; 794 cfg.color_writeback.base = surf.data; 795 cfg.color_writeback.row_stride = 796 rt->image->layout.slices[level].row_stride; 797 798 cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier); 799 assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR || 800 cfg.color_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED); 801 802 if (rt->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) { 803 const struct pan_image_slice_layout *slice = 804 &rt->image->layout.slices[level]; 805 806 cfg.crc_buffer.row_stride = slice->crc.stride; 807 if (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND) { 808 cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu + 809 rt->image->data.offset + 810 slice->crc.offset; 811 } else { 812 cfg.crc_buffer.base = rt->image->crc.bo->ptr.gpu + 813 rt->image->crc.offset + 814 slice->crc.offset; 815 } 816 } 817 } 818 819 if (fb->zs.view.zs) { 820 const struct pan_image_view *zs = fb->zs.view.zs; 821 unsigned level = zs->first_level; 822 struct pan_surface surf; 823 824 pan_iview_get_surface(zs, 0, 0, 0, &surf); 825 826 cfg.zs_write_enable = !fb->zs.discard.z; 827 cfg.zs_writeback.base = surf.data; 828 cfg.zs_writeback.row_stride = 829 zs->image->layout.slices[level].row_stride; 830 cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier); 831 assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR || 832 cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED); 833 834 cfg.zs_format = translate_zs_format(zs->format); 835 } 836 837 cfg.sample_count = fb->nr_samples; 838 839 if (fb->rt_count) 840 cfg.msaa = mali_sampling_mode(fb->rts[0].view); 841 } 842 pan_emit_sfbd_tiler(dev, fb, tiler_ctx, fbd); 843 pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding); 844} 845#endif 846 847unsigned 848GENX(pan_emit_fbd)(const struct panfrost_device *dev, 849 const struct pan_fb_info *fb, 850 const struct pan_tls_info *tls, 851 const struct pan_tiler_context *tiler_ctx, 852 void *out) 853{ 854#if PAN_ARCH == 4 855 assert(fb->rt_count <= 1); 856 pan_emit_sfbd(dev, fb, tls, tiler_ctx, out); 857 return 0; 858#else 859 return pan_emit_mfbd(dev, fb, tls, tiler_ctx, out); 860#endif 861} 862 863#if PAN_ARCH >= 6 864void 865GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev, 866 void *out) 867{ 868 pan_pack(out, TILER_HEAP, heap) { 869 heap.size = dev->tiler_heap->size; 870 heap.base = dev->tiler_heap->ptr.gpu; 871 heap.bottom = dev->tiler_heap->ptr.gpu; 872 heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size; 873 } 874} 875 876void 877GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev, 878 unsigned fb_width, unsigned fb_height, 879 unsigned nr_samples, 880 mali_ptr heap, 881 void *out) 882{ 883 unsigned max_levels = dev->tiler_features.max_levels; 884 assert(max_levels >= 2); 885 886 pan_pack(out, TILER_CONTEXT, tiler) { 887 /* TODO: Select hierarchy mask more effectively */ 888 tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28; 889 tiler.fb_width = fb_width; 890 tiler.fb_height = fb_height; 891 tiler.heap = heap; 892 tiler.sample_pattern = pan_sample_pattern(nr_samples); 893 } 894} 895#endif 896 897void 898GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb, 899 mali_ptr fbd, 900 void *out) 901{ 902 pan_section_pack(out, FRAGMENT_JOB, HEADER, header) { 903 header.type = MALI_JOB_TYPE_FRAGMENT; 904 header.index = 1; 905 } 906 907 pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) { 908 payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT; 909 payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT; 910 payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT; 911 payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT; 912 payload.framebuffer = fbd; 913 914#if PAN_ARCH >= 5 915 if (fb->tile_map.base) { 916 payload.has_tile_enable_map = true; 917 payload.tile_enable_map = fb->tile_map.base; 918 payload.tile_enable_map_row_stride = fb->tile_map.stride; 919 } 920#endif 921 } 922} 923