v3d_blit.c revision 7ec681f3
1/* 2 * Copyright © 2015-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/format/u_format.h" 25#include "util/u_surface.h" 26#include "util/u_blitter.h" 27#include "compiler/nir/nir_builder.h" 28#include "v3d_context.h" 29#include "broadcom/common/v3d_tiling.h" 30 31void 32v3d_blitter_save(struct v3d_context *v3d) 33{ 34 util_blitter_save_fragment_constant_buffer_slot(v3d->blitter, 35 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb); 36 util_blitter_save_vertex_buffer_slot(v3d->blitter, v3d->vertexbuf.vb); 37 util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx); 38 util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs); 39 util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs); 40 util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets, 41 v3d->streamout.targets); 42 util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer); 43 util_blitter_save_viewport(v3d->blitter, &v3d->viewport); 44 util_blitter_save_scissor(v3d->blitter, &v3d->scissor); 45 util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs); 46 util_blitter_save_blend(v3d->blitter, v3d->blend); 47 util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa); 48 util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref); 49 util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask); 50 util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer); 51 util_blitter_save_fragment_sampler_states(v3d->blitter, 52 v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers, 53 (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers); 54 util_blitter_save_fragment_sampler_views(v3d->blitter, 55 v3d->tex[PIPE_SHADER_FRAGMENT].num_textures, 56 v3d->tex[PIPE_SHADER_FRAGMENT].textures); 57 util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets, 58 v3d->streamout.targets); 59} 60 61static void 62v3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) 63{ 64 struct v3d_context *v3d = v3d_context(ctx); 65 struct v3d_resource *src = v3d_resource(info->src.resource); 66 struct pipe_resource *tiled = NULL; 67 68 if (!info->mask) 69 return; 70 71 if (!src->tiled) { 72 struct pipe_box box = { 73 .x = 0, 74 .y = 0, 75 .width = u_minify(info->src.resource->width0, 76 info->src.level), 77 .height = u_minify(info->src.resource->height0, 78 info->src.level), 79 .depth = 1, 80 }; 81 struct pipe_resource tmpl = { 82 .target = info->src.resource->target, 83 .format = info->src.resource->format, 84 .width0 = box.width, 85 .height0 = box.height, 86 .depth0 = 1, 87 .array_size = 1, 88 }; 89 tiled = ctx->screen->resource_create(ctx->screen, &tmpl); 90 if (!tiled) { 91 fprintf(stderr, "Failed to create tiled blit temp\n"); 92 return; 93 } 94 ctx->resource_copy_region(ctx, 95 tiled, 0, 96 0, 0, 0, 97 info->src.resource, info->src.level, 98 &box); 99 info->src.level = 0; 100 info->src.resource = tiled; 101 } 102 103 if (!util_blitter_is_blit_supported(v3d->blitter, info)) { 104 fprintf(stderr, "blit unsupported %s -> %s\n", 105 util_format_short_name(info->src.resource->format), 106 util_format_short_name(info->dst.resource->format)); 107 return; 108 } 109 110 v3d_blitter_save(v3d); 111 util_blitter_blit(v3d->blitter, info); 112 113 pipe_resource_reference(&tiled, NULL); 114 info->mask = 0; 115} 116 117/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888 118 * or R8 texture. 119 */ 120static void 121v3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info) 122{ 123 struct v3d_context *v3d = v3d_context(ctx); 124 struct v3d_resource *src = v3d_resource(info->src.resource); 125 struct v3d_resource *dst = v3d_resource(info->dst.resource); 126 enum pipe_format src_format, dst_format; 127 128 if ((info->mask & PIPE_MASK_S) == 0) 129 return; 130 131 if (src->separate_stencil) { 132 src = src->separate_stencil; 133 src_format = PIPE_FORMAT_R8_UINT; 134 } else { 135 src_format = PIPE_FORMAT_RGBA8888_UINT; 136 } 137 138 if (dst->separate_stencil) { 139 dst = dst->separate_stencil; 140 dst_format = PIPE_FORMAT_R8_UINT; 141 } else { 142 dst_format = PIPE_FORMAT_RGBA8888_UINT; 143 } 144 145 /* Initialize the surface. */ 146 struct pipe_surface dst_tmpl = { 147 .u.tex = { 148 .level = info->dst.level, 149 .first_layer = info->dst.box.z, 150 .last_layer = info->dst.box.z, 151 }, 152 .format = dst_format, 153 }; 154 struct pipe_surface *dst_surf = 155 ctx->create_surface(ctx, &dst->base, &dst_tmpl); 156 157 /* Initialize the sampler view. */ 158 struct pipe_sampler_view src_tmpl = { 159 .target = src->base.target, 160 .format = src_format, 161 .u.tex = { 162 .first_level = info->src.level, 163 .last_level = info->src.level, 164 .first_layer = 0, 165 .last_layer = (PIPE_TEXTURE_3D ? 166 u_minify(src->base.depth0, 167 info->src.level) - 1 : 168 src->base.array_size - 1), 169 }, 170 .swizzle_r = PIPE_SWIZZLE_X, 171 .swizzle_g = PIPE_SWIZZLE_Y, 172 .swizzle_b = PIPE_SWIZZLE_Z, 173 .swizzle_a = PIPE_SWIZZLE_W, 174 }; 175 struct pipe_sampler_view *src_view = 176 ctx->create_sampler_view(ctx, &src->base, &src_tmpl); 177 178 v3d_blitter_save(v3d); 179 util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box, 180 src_view, &info->src.box, 181 src->base.width0, src->base.height0, 182 PIPE_MASK_R, 183 PIPE_TEX_FILTER_NEAREST, 184 info->scissor_enable ? &info->scissor : NULL, 185 info->alpha_blend, false); 186 187 pipe_surface_reference(&dst_surf, NULL); 188 pipe_sampler_view_reference(&src_view, NULL); 189 190 info->mask &= ~PIPE_MASK_S; 191} 192 193/* Disable level 0 write, just write following mipmaps */ 194#define V3D_TFU_IOA_DIMTW (1 << 0) 195#define V3D_TFU_IOA_FORMAT_SHIFT 3 196#define V3D_TFU_IOA_FORMAT_LINEARTILE 3 197#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4 198#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5 199#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6 200#define V3D_TFU_IOA_FORMAT_UIF_XOR 7 201 202#define V3D_TFU_ICFG_NUMMM_SHIFT 5 203#define V3D_TFU_ICFG_TTYPE_SHIFT 9 204 205#define V3D_TFU_ICFG_OPAD_SHIFT 22 206 207#define V3D_TFU_ICFG_FORMAT_SHIFT 18 208#define V3D_TFU_ICFG_FORMAT_RASTER 0 209#define V3D_TFU_ICFG_FORMAT_SAND_128 1 210#define V3D_TFU_ICFG_FORMAT_SAND_256 2 211#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11 212#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12 213#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13 214#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14 215#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15 216 217static bool 218v3d_tfu(struct pipe_context *pctx, 219 struct pipe_resource *pdst, 220 struct pipe_resource *psrc, 221 unsigned int src_level, 222 unsigned int base_level, 223 unsigned int last_level, 224 unsigned int src_layer, 225 unsigned int dst_layer, 226 bool for_mipmap) 227{ 228 struct v3d_context *v3d = v3d_context(pctx); 229 struct v3d_screen *screen = v3d->screen; 230 struct v3d_resource *src = v3d_resource(psrc); 231 struct v3d_resource *dst = v3d_resource(pdst); 232 struct v3d_resource_slice *src_base_slice = &src->slices[src_level]; 233 struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level]; 234 int msaa_scale = pdst->nr_samples > 1 ? 2 : 1; 235 int width = u_minify(pdst->width0, base_level) * msaa_scale; 236 int height = u_minify(pdst->height0, base_level) * msaa_scale; 237 enum pipe_format pformat; 238 239 if (psrc->format != pdst->format) 240 return false; 241 if (psrc->nr_samples != pdst->nr_samples) 242 return false; 243 244 if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D) 245 return false; 246 247 /* Can't write to raster. */ 248 if (dst_base_slice->tiling == V3D_TILING_RASTER) 249 return false; 250 251 /* When using TFU for blit, we are doing exact copies (both input and 252 * output format must be the same, no scaling, etc), so there is no 253 * pixel format conversions. Thus we can rewrite the format to use one 254 * that is TFU compatible based on its texel size. 255 */ 256 if (for_mipmap) { 257 pformat = pdst->format; 258 } else { 259 switch (dst->cpp) { 260 case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT; break; 261 case 8: pformat = PIPE_FORMAT_R16G16B16A16_FLOAT; break; 262 case 4: pformat = PIPE_FORMAT_R32_FLOAT; break; 263 case 2: pformat = PIPE_FORMAT_R16_FLOAT; break; 264 case 1: pformat = PIPE_FORMAT_R8_UNORM; break; 265 default: unreachable("unsupported format bit-size"); break; 266 }; 267 } 268 269 uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat); 270 271 if (!v3d_tfu_supports_tex_format(&screen->devinfo, tex_format, for_mipmap)) { 272 assert(for_mipmap); 273 return false; 274 } 275 276 v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false); 277 v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false); 278 279 struct drm_v3d_submit_tfu tfu = { 280 .ios = (height << 16) | width, 281 .bo_handles = { 282 dst->bo->handle, 283 src != dst ? src->bo->handle : 0 284 }, 285 .in_sync = v3d->out_sync, 286 .out_sync = v3d->out_sync, 287 }; 288 uint32_t src_offset = (src->bo->offset + 289 v3d_layer_offset(psrc, src_level, src_layer)); 290 tfu.iia |= src_offset; 291 if (src_base_slice->tiling == V3D_TILING_RASTER) { 292 tfu.icfg |= (V3D_TFU_ICFG_FORMAT_RASTER << 293 V3D_TFU_ICFG_FORMAT_SHIFT); 294 } else { 295 tfu.icfg |= ((V3D_TFU_ICFG_FORMAT_LINEARTILE + 296 (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << 297 V3D_TFU_ICFG_FORMAT_SHIFT); 298 } 299 300 uint32_t dst_offset = (dst->bo->offset + 301 v3d_layer_offset(pdst, base_level, dst_layer)); 302 tfu.ioa |= dst_offset; 303 if (last_level != base_level) 304 tfu.ioa |= V3D_TFU_IOA_DIMTW; 305 tfu.ioa |= ((V3D_TFU_IOA_FORMAT_LINEARTILE + 306 (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << 307 V3D_TFU_IOA_FORMAT_SHIFT); 308 309 tfu.icfg |= tex_format << V3D_TFU_ICFG_TTYPE_SHIFT; 310 tfu.icfg |= (last_level - base_level) << V3D_TFU_ICFG_NUMMM_SHIFT; 311 312 switch (src_base_slice->tiling) { 313 case V3D_TILING_UIF_NO_XOR: 314 case V3D_TILING_UIF_XOR: 315 tfu.iis |= (src_base_slice->padded_height / 316 (2 * v3d_utile_height(src->cpp))); 317 break; 318 case V3D_TILING_RASTER: 319 tfu.iis |= src_base_slice->stride / src->cpp; 320 break; 321 case V3D_TILING_LINEARTILE: 322 case V3D_TILING_UBLINEAR_1_COLUMN: 323 case V3D_TILING_UBLINEAR_2_COLUMN: 324 break; 325 } 326 327 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the 328 * OPAD field for the destination (how many extra UIF blocks beyond 329 * those necessary to cover the height). When filling mipmaps, the 330 * miplevel 1+ tiling state is inferred. 331 */ 332 if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR || 333 dst_base_slice->tiling == V3D_TILING_UIF_XOR) { 334 int uif_block_h = 2 * v3d_utile_height(dst->cpp); 335 int implicit_padded_height = align(height, uif_block_h); 336 337 tfu.icfg |= (((dst_base_slice->padded_height - 338 implicit_padded_height) / uif_block_h) << 339 V3D_TFU_ICFG_OPAD_SHIFT); 340 } 341 342 int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu); 343 if (ret != 0) { 344 fprintf(stderr, "Failed to submit TFU job: %d\n", ret); 345 return false; 346 } 347 348 dst->writes++; 349 350 return true; 351} 352 353bool 354v3d_generate_mipmap(struct pipe_context *pctx, 355 struct pipe_resource *prsc, 356 enum pipe_format format, 357 unsigned int base_level, 358 unsigned int last_level, 359 unsigned int first_layer, 360 unsigned int last_layer) 361{ 362 if (format != prsc->format) 363 return false; 364 365 /* We could maybe support looping over layers for array textures, but 366 * we definitely don't support 3D. 367 */ 368 if (first_layer != last_layer) 369 return false; 370 371 return v3d_tfu(pctx, 372 prsc, prsc, 373 base_level, 374 base_level, last_level, 375 first_layer, first_layer, 376 true); 377} 378 379static void 380v3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 381{ 382 int dst_width = u_minify(info->dst.resource->width0, info->dst.level); 383 int dst_height = u_minify(info->dst.resource->height0, info->dst.level); 384 385 if ((info->mask & PIPE_MASK_RGBA) == 0) 386 return; 387 388 if (info->scissor_enable || 389 info->dst.box.x != 0 || 390 info->dst.box.y != 0 || 391 info->dst.box.width != dst_width || 392 info->dst.box.height != dst_height || 393 info->src.box.x != 0 || 394 info->src.box.y != 0 || 395 info->src.box.width != info->dst.box.width || 396 info->src.box.height != info->dst.box.height) { 397 return; 398 } 399 400 if (info->dst.format != info->src.format) 401 return; 402 403 if (v3d_tfu(pctx, info->dst.resource, info->src.resource, 404 info->src.level, 405 info->dst.level, info->dst.level, 406 info->src.box.z, info->dst.box.z, 407 false)) { 408 info->mask &= ~PIPE_MASK_RGBA; 409 } 410} 411 412static struct pipe_surface * 413v3d_get_blit_surface(struct pipe_context *pctx, 414 struct pipe_resource *prsc, 415 unsigned level, 416 int16_t layer) 417{ 418 struct pipe_surface tmpl; 419 420 tmpl.format = prsc->format; 421 tmpl.u.tex.level = level; 422 tmpl.u.tex.first_layer = layer; 423 tmpl.u.tex.last_layer = layer; 424 425 return pctx->create_surface(pctx, prsc, &tmpl); 426} 427 428static bool 429is_tile_unaligned(unsigned size, unsigned tile_size) 430{ 431 return size & (tile_size - 1); 432} 433 434static void 435v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 436{ 437 struct v3d_context *v3d = v3d_context(pctx); 438 struct v3d_screen *screen = v3d->screen; 439 440 if (screen->devinfo.ver < 40 || !info->mask) 441 return; 442 443 bool is_color_blit = info->mask & PIPE_MASK_RGBA; 444 bool is_depth_blit = info->mask & PIPE_MASK_Z; 445 bool is_stencil_blit = info->mask & PIPE_MASK_S; 446 447 /* We should receive either a depth/stencil blit, or color blit, but 448 * not both. 449 */ 450 assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) || 451 (!is_color_blit && (is_depth_blit || is_stencil_blit))); 452 453 if (info->scissor_enable) 454 return; 455 456 if (info->src.box.x != info->dst.box.x || 457 info->src.box.y != info->dst.box.y || 458 info->src.box.width != info->dst.box.width || 459 info->src.box.height != info->dst.box.height) 460 return; 461 462 if (is_color_blit && 463 util_format_is_depth_or_stencil(info->dst.resource->format)) 464 return; 465 466 if (!v3d_rt_format_supported(&screen->devinfo, info->src.resource->format)) 467 return; 468 469 if (v3d_get_rt_format(&screen->devinfo, info->src.resource->format) != 470 v3d_get_rt_format(&screen->devinfo, info->dst.resource->format)) 471 return; 472 473 bool msaa = (info->src.resource->nr_samples > 1 || 474 info->dst.resource->nr_samples > 1); 475 bool is_msaa_resolve = (info->src.resource->nr_samples > 1 && 476 info->dst.resource->nr_samples < 2); 477 478 if (is_msaa_resolve && 479 !v3d_format_supports_tlb_msaa_resolve(&screen->devinfo, info->src.resource->format)) 480 return; 481 482 v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false); 483 484 struct pipe_surface *dst_surf = 485 v3d_get_blit_surface(pctx, info->dst.resource, info->dst.level, info->dst.box.z); 486 struct pipe_surface *src_surf = 487 v3d_get_blit_surface(pctx, info->src.resource, info->src.level, info->src.box.z); 488 489 struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 }; 490 if (is_color_blit) 491 surfaces[0] = dst_surf; 492 493 uint32_t tile_width, tile_height, max_bpp; 494 v3d_get_tile_buffer_size(msaa, is_color_blit ? 1 : 0, surfaces, src_surf, &tile_width, &tile_height, &max_bpp); 495 496 int dst_surface_width = u_minify(info->dst.resource->width0, 497 info->dst.level); 498 int dst_surface_height = u_minify(info->dst.resource->height0, 499 info->dst.level); 500 if (is_tile_unaligned(info->dst.box.x, tile_width) || 501 is_tile_unaligned(info->dst.box.y, tile_height) || 502 (is_tile_unaligned(info->dst.box.width, tile_width) && 503 info->dst.box.x + info->dst.box.width != dst_surface_width) || 504 (is_tile_unaligned(info->dst.box.height, tile_height) && 505 info->dst.box.y + info->dst.box.height != dst_surface_height)) { 506 pipe_surface_reference(&dst_surf, NULL); 507 pipe_surface_reference(&src_surf, NULL); 508 return; 509 } 510 511 struct v3d_job *job = v3d_get_job(v3d, 512 is_color_blit ? 1u : 0u, 513 surfaces, 514 is_color_blit ? NULL : dst_surf, 515 src_surf); 516 job->msaa = msaa; 517 job->tile_width = tile_width; 518 job->tile_height = tile_height; 519 job->internal_bpp = max_bpp; 520 job->draw_min_x = info->dst.box.x; 521 job->draw_min_y = info->dst.box.y; 522 job->draw_max_x = info->dst.box.x + info->dst.box.width; 523 job->draw_max_y = info->dst.box.y + info->dst.box.height; 524 job->scissor.disabled = false; 525 526 /* The simulator complains if we do a TLB load from a source with a 527 * stride that is smaller than the destination's, so we program the 528 * 'frame region' to match the smallest dimensions of the two surfaces. 529 * This should be fine because we only get here if the src and dst boxes 530 * match, so we know the blit involves the same tiles on both surfaces. 531 */ 532 job->draw_width = MIN2(dst_surf->width, src_surf->width); 533 job->draw_height = MIN2(dst_surf->height, src_surf->height); 534 job->draw_tiles_x = DIV_ROUND_UP(job->draw_width, 535 job->tile_width); 536 job->draw_tiles_y = DIV_ROUND_UP(job->draw_height, 537 job->tile_height); 538 539 job->needs_flush = true; 540 job->num_layers = info->dst.box.depth; 541 542 job->store = 0; 543 if (is_color_blit) { 544 job->store |= PIPE_CLEAR_COLOR0; 545 info->mask &= ~PIPE_MASK_RGBA; 546 } 547 if (is_depth_blit) { 548 job->store |= PIPE_CLEAR_DEPTH; 549 info->mask &= ~PIPE_MASK_Z; 550 } 551 if (is_stencil_blit){ 552 job->store |= PIPE_CLEAR_STENCIL; 553 info->mask &= ~PIPE_MASK_S; 554 } 555 556 v3d41_start_binning(v3d, job); 557 558 v3d_job_submit(v3d, job); 559 560 pipe_surface_reference(&dst_surf, NULL); 561 pipe_surface_reference(&src_surf, NULL); 562} 563 564/** 565 * Creates the VS of the custom blit shader to convert YUV plane from 566 * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format. 567 * This vertex shader is mostly a pass-through VS. 568 */ 569static void * 570v3d_get_sand8_vs(struct pipe_context *pctx) 571{ 572 struct v3d_context *v3d = v3d_context(pctx); 573 struct pipe_screen *pscreen = pctx->screen; 574 575 if (v3d->sand8_blit_vs) 576 return v3d->sand8_blit_vs; 577 578 const struct nir_shader_compiler_options *options = 579 pscreen->get_compiler_options(pscreen, 580 PIPE_SHADER_IR_NIR, 581 PIPE_SHADER_VERTEX); 582 583 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, 584 options, 585 "sand8_blit_vs"); 586 587 const struct glsl_type *vec4 = glsl_vec4_type(); 588 nir_variable *pos_in = nir_variable_create(b.shader, 589 nir_var_shader_in, 590 vec4, "pos"); 591 592 nir_variable *pos_out = nir_variable_create(b.shader, 593 nir_var_shader_out, 594 vec4, "gl_Position"); 595 pos_out->data.location = VARYING_SLOT_POS; 596 nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf); 597 598 struct pipe_shader_state shader_tmpl = { 599 .type = PIPE_SHADER_IR_NIR, 600 .ir.nir = b.shader, 601 }; 602 603 v3d->sand8_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl); 604 605 return v3d->sand8_blit_vs; 606} 607/** 608 * Creates the FS of the custom blit shader to convert YUV plane from 609 * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format. 610 * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1) 611 * plane for a NV12 format without the SAND modifier. 612 */ 613static void * 614v3d_get_sand8_fs(struct pipe_context *pctx, int cpp) 615{ 616 struct v3d_context *v3d = v3d_context(pctx); 617 struct pipe_screen *pscreen = pctx->screen; 618 struct pipe_shader_state **cached_shader; 619 const char *name; 620 621 if (cpp == 1) { 622 cached_shader = &v3d->sand8_blit_fs_luma; 623 name = "sand8_blit_fs_luma"; 624 } else { 625 cached_shader = &v3d->sand8_blit_fs_chroma; 626 name = "sand8_blit_fs_chroma"; 627 } 628 629 if (*cached_shader) 630 return *cached_shader; 631 632 const struct nir_shader_compiler_options *options = 633 pscreen->get_compiler_options(pscreen, 634 PIPE_SHADER_IR_NIR, 635 PIPE_SHADER_FRAGMENT); 636 637 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 638 options, "%s", name); 639 const struct glsl_type *vec4 = glsl_vec4_type(); 640 641 const struct glsl_type *glsl_int = glsl_int_type(); 642 643 nir_variable *color_out = 644 nir_variable_create(b.shader, nir_var_shader_out, 645 vec4, "f_color"); 646 color_out->data.location = FRAG_RESULT_COLOR; 647 648 nir_variable *pos_in = 649 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos"); 650 pos_in->data.location = VARYING_SLOT_POS; 651 nir_ssa_def *pos = nir_load_var(&b, pos_in); 652 653 nir_ssa_def *zero = nir_imm_int(&b, 0); 654 nir_ssa_def *one = nir_imm_int(&b, 1); 655 nir_ssa_def *two = nir_imm_int(&b, 2); 656 nir_ssa_def *six = nir_imm_int(&b, 6); 657 nir_ssa_def *seven = nir_imm_int(&b, 7); 658 nir_ssa_def *eight = nir_imm_int(&b, 8); 659 660 nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0)); 661 nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1)); 662 663 nir_variable *stride_in = 664 nir_variable_create(b.shader, nir_var_uniform, glsl_int, 665 "sand8_stride"); 666 nir_ssa_def *stride = 667 nir_load_uniform(&b, 1, 32, zero, 668 .base = stride_in->data.driver_location, 669 .range = 1, 670 .dest_type = nir_type_int32); 671 672 nir_ssa_def *x_offset; 673 nir_ssa_def *y_offset; 674 675 /* UIF tiled format is composed by UIF blocks, Each block has 676 * four 64 byte microtiles. Inside each microtile pixels are stored 677 * in raster format. But microtiles have different dimensions 678 * based in the bits per pixel of the image. 679 * 680 * 8bpp microtile dimensions are 8x8 681 * 16bpp microtile dimensions are 8x4 682 * 32bpp microtile dimensions are 4x4 683 * 684 * As we are reading and writing with 32bpp to optimize 685 * the number of texture operations during the blit. We need 686 * to adjust the offsets were we read and write as data will 687 * be later read using 8bpp (luma) and 16bpp (chroma). 688 * 689 * For chroma 8x4 16bpp raster order is compatible with 4x4 690 * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes. 691 * But luma 8x8 8bpp raster order is not compatible 692 * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has 693 * 16 bytes per line. So if we read a 8bpp texture that was 694 * written as 32bpp texture. Bytes would be misplaced. 695 * 696 * inter/intra_utile_x_offests takes care of mapping the offsets 697 * between microtiles to deal with this issue for luma planes. 698 */ 699 if (cpp == 1) { 700 nir_ssa_def *intra_utile_x_offset = 701 nir_ishl(&b, nir_iand_imm(&b, x, 1), two); 702 nir_ssa_def *inter_utile_x_offset = 703 nir_ishl(&b, nir_iand_imm(&b, x, 60), one); 704 nir_ssa_def *stripe_offset= 705 nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6), 706 stride), 707 seven); 708 709 x_offset = nir_iadd(&b, stripe_offset, 710 nir_iadd(&b, intra_utile_x_offset, 711 inter_utile_x_offset)); 712 y_offset = nir_iadd(&b, 713 nir_ishl(&b, nir_iand_imm(&b, x, 2), six), 714 nir_ishl(&b, y, eight)); 715 } else { 716 nir_ssa_def *stripe_offset= 717 nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5), 718 stride), 719 seven); 720 x_offset = nir_iadd(&b, stripe_offset, 721 nir_ishl(&b, nir_iand_imm(&b, x, 31), two)); 722 y_offset = nir_ishl(&b, y, seven); 723 } 724 nir_ssa_def *ubo_offset = nir_iadd(&b, x_offset, y_offset); 725 nir_ssa_def *load = 726 nir_load_ubo(&b, 1, 32, one, ubo_offset, 727 .align_mul = 4, 728 .align_offset = 0, 729 .range_base = 0, 730 .range = ~0); 731 732 nir_ssa_def *output = nir_unpack_unorm_4x8(&b, load); 733 734 nir_store_var(&b, color_out, 735 output, 736 0xF); 737 738 struct pipe_shader_state shader_tmpl = { 739 .type = PIPE_SHADER_IR_NIR, 740 .ir.nir = b.shader, 741 }; 742 743 *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl); 744 745 return *cached_shader; 746} 747 748/** 749 * Turns NV12 with SAND8 format modifier from raster-order with interleaved 750 * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma. 751 * 752 * This implementation is based on vc4_yuv_blit. 753 */ 754static void 755v3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 756{ 757 struct v3d_context *v3d = v3d_context(pctx); 758 struct v3d_resource *src = v3d_resource(info->src.resource); 759 ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource); 760 761 if (!src->sand_col128_stride) 762 return; 763 if (src->tiled) 764 return; 765 if (src->base.format != PIPE_FORMAT_R8_UNORM && 766 src->base.format != PIPE_FORMAT_R8G8_UNORM) 767 return; 768 if (!(info->mask & PIPE_MASK_RGBA)) 769 return; 770 771 assert(dst->base.format == src->base.format); 772 assert(dst->tiled); 773 774 assert(info->src.box.x == 0 && info->dst.box.x == 0); 775 assert(info->src.box.y == 0 && info->dst.box.y == 0); 776 assert(info->src.box.width == info->dst.box.width); 777 assert(info->src.box.height == info->dst.box.height); 778 779 v3d_blitter_save(v3d); 780 781 struct pipe_surface dst_tmpl; 782 util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource, 783 info->dst.level, info->dst.box.z); 784 /* Although the src textures are cpp=1 or cpp=2, the dst texture 785 * uses a cpp=4 dst texture. So, all read/write texture ops will 786 * be done using 32-bit read and writes. 787 */ 788 dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM; 789 struct pipe_surface *dst_surf = 790 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl); 791 if (!dst_surf) { 792 fprintf(stderr, "Failed to create YUV dst surface\n"); 793 util_blitter_unset_running_flag(v3d->blitter); 794 return; 795 } 796 797 uint32_t sand8_stride = src->sand_col128_stride; 798 799 /* Adjust the dimensions of dst luma/chroma to match src 800 * size now we are using a cpp=4 format. Next dimension take into 801 * account the UIF microtile layouts. 802 */ 803 dst_surf->width = align(dst_surf->width, 8) / 2; 804 if (src->cpp == 1) 805 dst_surf->height /= 2; 806 807 /* Set the constant buffer. */ 808 struct pipe_constant_buffer cb_uniforms = { 809 .user_buffer = &sand8_stride, 810 .buffer_size = sizeof(sand8_stride), 811 }; 812 813 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false, 814 &cb_uniforms); 815 struct pipe_constant_buffer cb_src = { 816 .buffer = info->src.resource, 817 .buffer_offset = src->slices[info->src.level].offset, 818 .buffer_size = (src->bo->size - 819 src->slices[info->src.level].offset), 820 }; 821 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 2, false, 822 &cb_src); 823 /* Unbind the textures, to make sure we don't try to recurse into the 824 * shadow blit. 825 */ 826 pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL); 827 pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); 828 829 util_blitter_custom_shader(v3d->blitter, dst_surf, 830 v3d_get_sand8_vs(pctx), 831 v3d_get_sand8_fs(pctx, src->cpp)); 832 833 util_blitter_restore_textures(v3d->blitter); 834 util_blitter_restore_constant_buffer_state(v3d->blitter); 835 836 /* Restore cb1 (util_blitter doesn't handle this one). */ 837 struct pipe_constant_buffer cb_disabled = { 0 }; 838 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, 839 &cb_disabled); 840 841 pipe_surface_reference(&dst_surf, NULL); 842 843 info->mask &= ~PIPE_MASK_RGBA; 844 return; 845} 846 847 848/* Optimal hardware path for blitting pixels. 849 * Scaling, format conversion, up- and downsampling (resolve) are allowed. 850 */ 851void 852v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) 853{ 854 struct v3d_context *v3d = v3d_context(pctx); 855 struct pipe_blit_info info = *blit_info; 856 857 v3d_sand8_blit(pctx, &info); 858 859 v3d_tfu_blit(pctx, &info); 860 861 v3d_tlb_blit(pctx, &info); 862 863 v3d_stencil_blit(pctx, &info); 864 865 v3d_render_blit(pctx, &info); 866 867 /* Flush our blit jobs immediately. They're unlikely to get reused by 868 * normal drawing or other blits, and without flushing we can easily 869 * run into unexpected OOMs when blits are used for a large series of 870 * texture uploads before using the textures. 871 */ 872 v3d_flush_jobs_writing_resource(v3d, info.dst.resource, 873 V3D_FLUSH_DEFAULT, false); 874} 875