101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015-2017 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 247ec681f3Smrg#include "util/format/u_format.h" 2501e04c3fSmrg#include "util/u_surface.h" 2601e04c3fSmrg#include "util/u_blitter.h" 277ec681f3Smrg#include "compiler/nir/nir_builder.h" 2801e04c3fSmrg#include "v3d_context.h" 297ec681f3Smrg#include "broadcom/common/v3d_tiling.h" 3001e04c3fSmrg 3101e04c3fSmrgvoid 3201e04c3fSmrgv3d_blitter_save(struct v3d_context *v3d) 3301e04c3fSmrg{ 3401e04c3fSmrg util_blitter_save_fragment_constant_buffer_slot(v3d->blitter, 3501e04c3fSmrg v3d->constbuf[PIPE_SHADER_FRAGMENT].cb); 3601e04c3fSmrg util_blitter_save_vertex_buffer_slot(v3d->blitter, v3d->vertexbuf.vb); 3701e04c3fSmrg util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx); 3801e04c3fSmrg util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs); 397ec681f3Smrg util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs); 4001e04c3fSmrg util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets, 4101e04c3fSmrg v3d->streamout.targets); 4201e04c3fSmrg util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer); 4301e04c3fSmrg util_blitter_save_viewport(v3d->blitter, &v3d->viewport); 4401e04c3fSmrg util_blitter_save_scissor(v3d->blitter, &v3d->scissor); 4501e04c3fSmrg util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs); 4601e04c3fSmrg util_blitter_save_blend(v3d->blitter, v3d->blend); 4701e04c3fSmrg util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa); 4801e04c3fSmrg util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref); 4901e04c3fSmrg util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask); 5001e04c3fSmrg util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer); 5101e04c3fSmrg util_blitter_save_fragment_sampler_states(v3d->blitter, 529f464c52Smaya v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers, 539f464c52Smaya (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers); 5401e04c3fSmrg util_blitter_save_fragment_sampler_views(v3d->blitter, 559f464c52Smaya v3d->tex[PIPE_SHADER_FRAGMENT].num_textures, 569f464c52Smaya v3d->tex[PIPE_SHADER_FRAGMENT].textures); 5701e04c3fSmrg util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets, 5801e04c3fSmrg v3d->streamout.targets); 5901e04c3fSmrg} 6001e04c3fSmrg 617ec681f3Smrgstatic void 6201e04c3fSmrgv3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) 6301e04c3fSmrg{ 6401e04c3fSmrg struct v3d_context *v3d = v3d_context(ctx); 6501e04c3fSmrg struct v3d_resource *src = v3d_resource(info->src.resource); 6601e04c3fSmrg struct pipe_resource *tiled = NULL; 6701e04c3fSmrg 687ec681f3Smrg if (!info->mask) 697ec681f3Smrg return; 707ec681f3Smrg 7101e04c3fSmrg if (!src->tiled) { 7201e04c3fSmrg struct pipe_box box = { 7301e04c3fSmrg .x = 0, 7401e04c3fSmrg .y = 0, 7501e04c3fSmrg .width = u_minify(info->src.resource->width0, 7601e04c3fSmrg info->src.level), 7701e04c3fSmrg .height = u_minify(info->src.resource->height0, 7801e04c3fSmrg info->src.level), 7901e04c3fSmrg .depth = 1, 8001e04c3fSmrg }; 8101e04c3fSmrg struct pipe_resource tmpl = { 8201e04c3fSmrg .target = info->src.resource->target, 8301e04c3fSmrg .format = info->src.resource->format, 8401e04c3fSmrg .width0 = box.width, 8501e04c3fSmrg .height0 = box.height, 8601e04c3fSmrg .depth0 = 1, 8701e04c3fSmrg .array_size = 1, 8801e04c3fSmrg }; 8901e04c3fSmrg tiled = ctx->screen->resource_create(ctx->screen, &tmpl); 9001e04c3fSmrg if (!tiled) { 9101e04c3fSmrg fprintf(stderr, "Failed to create tiled blit temp\n"); 927ec681f3Smrg return; 9301e04c3fSmrg } 9401e04c3fSmrg ctx->resource_copy_region(ctx, 9501e04c3fSmrg tiled, 0, 9601e04c3fSmrg 0, 0, 0, 9701e04c3fSmrg info->src.resource, info->src.level, 9801e04c3fSmrg &box); 9901e04c3fSmrg info->src.level = 0; 10001e04c3fSmrg info->src.resource = tiled; 10101e04c3fSmrg } 10201e04c3fSmrg 10301e04c3fSmrg if (!util_blitter_is_blit_supported(v3d->blitter, info)) { 10401e04c3fSmrg fprintf(stderr, "blit unsupported %s -> %s\n", 10501e04c3fSmrg util_format_short_name(info->src.resource->format), 10601e04c3fSmrg util_format_short_name(info->dst.resource->format)); 1077ec681f3Smrg return; 10801e04c3fSmrg } 10901e04c3fSmrg 11001e04c3fSmrg v3d_blitter_save(v3d); 11101e04c3fSmrg util_blitter_blit(v3d->blitter, info); 11201e04c3fSmrg 11301e04c3fSmrg pipe_resource_reference(&tiled, NULL); 1147ec681f3Smrg info->mask = 0; 11501e04c3fSmrg} 11601e04c3fSmrg 11701e04c3fSmrg/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888 11801e04c3fSmrg * or R8 texture. 11901e04c3fSmrg */ 12001e04c3fSmrgstatic void 1217ec681f3Smrgv3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info) 12201e04c3fSmrg{ 12301e04c3fSmrg struct v3d_context *v3d = v3d_context(ctx); 12401e04c3fSmrg struct v3d_resource *src = v3d_resource(info->src.resource); 12501e04c3fSmrg struct v3d_resource *dst = v3d_resource(info->dst.resource); 12601e04c3fSmrg enum pipe_format src_format, dst_format; 12701e04c3fSmrg 1287ec681f3Smrg if ((info->mask & PIPE_MASK_S) == 0) 1297ec681f3Smrg return; 1307ec681f3Smrg 13101e04c3fSmrg if (src->separate_stencil) { 13201e04c3fSmrg src = src->separate_stencil; 1337ec681f3Smrg src_format = PIPE_FORMAT_R8_UINT; 13401e04c3fSmrg } else { 1357ec681f3Smrg src_format = PIPE_FORMAT_RGBA8888_UINT; 13601e04c3fSmrg } 13701e04c3fSmrg 13801e04c3fSmrg if (dst->separate_stencil) { 13901e04c3fSmrg dst = dst->separate_stencil; 1407ec681f3Smrg dst_format = PIPE_FORMAT_R8_UINT; 14101e04c3fSmrg } else { 1427ec681f3Smrg dst_format = PIPE_FORMAT_RGBA8888_UINT; 14301e04c3fSmrg } 14401e04c3fSmrg 14501e04c3fSmrg /* Initialize the surface. */ 14601e04c3fSmrg struct pipe_surface dst_tmpl = { 14701e04c3fSmrg .u.tex = { 14801e04c3fSmrg .level = info->dst.level, 14901e04c3fSmrg .first_layer = info->dst.box.z, 15001e04c3fSmrg .last_layer = info->dst.box.z, 15101e04c3fSmrg }, 15201e04c3fSmrg .format = dst_format, 15301e04c3fSmrg }; 15401e04c3fSmrg struct pipe_surface *dst_surf = 15501e04c3fSmrg ctx->create_surface(ctx, &dst->base, &dst_tmpl); 15601e04c3fSmrg 15701e04c3fSmrg /* Initialize the sampler view. */ 15801e04c3fSmrg struct pipe_sampler_view src_tmpl = { 15901e04c3fSmrg .target = src->base.target, 16001e04c3fSmrg .format = src_format, 16101e04c3fSmrg .u.tex = { 16201e04c3fSmrg .first_level = info->src.level, 16301e04c3fSmrg .last_level = info->src.level, 16401e04c3fSmrg .first_layer = 0, 16501e04c3fSmrg .last_layer = (PIPE_TEXTURE_3D ? 16601e04c3fSmrg u_minify(src->base.depth0, 16701e04c3fSmrg info->src.level) - 1 : 16801e04c3fSmrg src->base.array_size - 1), 16901e04c3fSmrg }, 17001e04c3fSmrg .swizzle_r = PIPE_SWIZZLE_X, 17101e04c3fSmrg .swizzle_g = PIPE_SWIZZLE_Y, 17201e04c3fSmrg .swizzle_b = PIPE_SWIZZLE_Z, 17301e04c3fSmrg .swizzle_a = PIPE_SWIZZLE_W, 17401e04c3fSmrg }; 17501e04c3fSmrg struct pipe_sampler_view *src_view = 17601e04c3fSmrg ctx->create_sampler_view(ctx, &src->base, &src_tmpl); 17701e04c3fSmrg 17801e04c3fSmrg v3d_blitter_save(v3d); 17901e04c3fSmrg util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box, 18001e04c3fSmrg src_view, &info->src.box, 18101e04c3fSmrg src->base.width0, src->base.height0, 18201e04c3fSmrg PIPE_MASK_R, 18301e04c3fSmrg PIPE_TEX_FILTER_NEAREST, 18401e04c3fSmrg info->scissor_enable ? &info->scissor : NULL, 1857ec681f3Smrg info->alpha_blend, false); 18601e04c3fSmrg 18701e04c3fSmrg pipe_surface_reference(&dst_surf, NULL); 18801e04c3fSmrg pipe_sampler_view_reference(&src_view, NULL); 1897ec681f3Smrg 1907ec681f3Smrg info->mask &= ~PIPE_MASK_S; 19101e04c3fSmrg} 19201e04c3fSmrg 1939f464c52Smaya/* Disable level 0 write, just write following mipmaps */ 1949f464c52Smaya#define V3D_TFU_IOA_DIMTW (1 << 0) 1959f464c52Smaya#define V3D_TFU_IOA_FORMAT_SHIFT 3 1969f464c52Smaya#define V3D_TFU_IOA_FORMAT_LINEARTILE 3 1979f464c52Smaya#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4 1989f464c52Smaya#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5 1999f464c52Smaya#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6 2009f464c52Smaya#define V3D_TFU_IOA_FORMAT_UIF_XOR 7 2019f464c52Smaya 2029f464c52Smaya#define V3D_TFU_ICFG_NUMMM_SHIFT 5 2039f464c52Smaya#define V3D_TFU_ICFG_TTYPE_SHIFT 9 2049f464c52Smaya 2059f464c52Smaya#define V3D_TFU_ICFG_OPAD_SHIFT 22 2069f464c52Smaya 2079f464c52Smaya#define V3D_TFU_ICFG_FORMAT_SHIFT 18 2089f464c52Smaya#define V3D_TFU_ICFG_FORMAT_RASTER 0 2099f464c52Smaya#define V3D_TFU_ICFG_FORMAT_SAND_128 1 2109f464c52Smaya#define V3D_TFU_ICFG_FORMAT_SAND_256 2 2119f464c52Smaya#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11 2129f464c52Smaya#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12 2139f464c52Smaya#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13 2149f464c52Smaya#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14 2159f464c52Smaya#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15 2169f464c52Smaya 2179f464c52Smayastatic bool 2189f464c52Smayav3d_tfu(struct pipe_context *pctx, 2199f464c52Smaya struct pipe_resource *pdst, 2209f464c52Smaya struct pipe_resource *psrc, 2219f464c52Smaya unsigned int src_level, 2229f464c52Smaya unsigned int base_level, 2239f464c52Smaya unsigned int last_level, 2249f464c52Smaya unsigned int src_layer, 2257ec681f3Smrg unsigned int dst_layer, 2267ec681f3Smrg bool for_mipmap) 2279f464c52Smaya{ 2289f464c52Smaya struct v3d_context *v3d = v3d_context(pctx); 2299f464c52Smaya struct v3d_screen *screen = v3d->screen; 2309f464c52Smaya struct v3d_resource *src = v3d_resource(psrc); 2319f464c52Smaya struct v3d_resource *dst = v3d_resource(pdst); 2329f464c52Smaya struct v3d_resource_slice *src_base_slice = &src->slices[src_level]; 2339f464c52Smaya struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level]; 2349f464c52Smaya int msaa_scale = pdst->nr_samples > 1 ? 2 : 1; 2359f464c52Smaya int width = u_minify(pdst->width0, base_level) * msaa_scale; 2369f464c52Smaya int height = u_minify(pdst->height0, base_level) * msaa_scale; 2377ec681f3Smrg enum pipe_format pformat; 2389f464c52Smaya 2399f464c52Smaya if (psrc->format != pdst->format) 2409f464c52Smaya return false; 2419f464c52Smaya if (psrc->nr_samples != pdst->nr_samples) 2429f464c52Smaya return false; 2439f464c52Smaya 2449f464c52Smaya if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D) 2459f464c52Smaya return false; 2469f464c52Smaya 2479f464c52Smaya /* Can't write to raster. */ 2487ec681f3Smrg if (dst_base_slice->tiling == V3D_TILING_RASTER) 2499f464c52Smaya return false; 2509f464c52Smaya 2517ec681f3Smrg /* When using TFU for blit, we are doing exact copies (both input and 2527ec681f3Smrg * output format must be the same, no scaling, etc), so there is no 2537ec681f3Smrg * pixel format conversions. Thus we can rewrite the format to use one 2547ec681f3Smrg * that is TFU compatible based on its texel size. 2557ec681f3Smrg */ 2567ec681f3Smrg if (for_mipmap) { 2577ec681f3Smrg pformat = pdst->format; 2587ec681f3Smrg } else { 2597ec681f3Smrg switch (dst->cpp) { 2607ec681f3Smrg case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT; break; 2617ec681f3Smrg case 8: pformat = PIPE_FORMAT_R16G16B16A16_FLOAT; break; 2627ec681f3Smrg case 4: pformat = PIPE_FORMAT_R32_FLOAT; break; 2637ec681f3Smrg case 2: pformat = PIPE_FORMAT_R16_FLOAT; break; 2647ec681f3Smrg case 1: pformat = PIPE_FORMAT_R8_UNORM; break; 2657ec681f3Smrg default: unreachable("unsupported format bit-size"); break; 2667ec681f3Smrg }; 2677ec681f3Smrg } 2687ec681f3Smrg 2697ec681f3Smrg uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat); 2707ec681f3Smrg 2717ec681f3Smrg if (!v3d_tfu_supports_tex_format(&screen->devinfo, tex_format, for_mipmap)) { 2727ec681f3Smrg assert(for_mipmap); 2737ec681f3Smrg return false; 2747ec681f3Smrg } 2757ec681f3Smrg 2767ec681f3Smrg v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false); 2777ec681f3Smrg v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false); 2789f464c52Smaya 2799f464c52Smaya struct drm_v3d_submit_tfu tfu = { 2809f464c52Smaya .ios = (height << 16) | width, 2819f464c52Smaya .bo_handles = { 2829f464c52Smaya dst->bo->handle, 2839f464c52Smaya src != dst ? src->bo->handle : 0 2849f464c52Smaya }, 2859f464c52Smaya .in_sync = v3d->out_sync, 2869f464c52Smaya .out_sync = v3d->out_sync, 2879f464c52Smaya }; 2889f464c52Smaya uint32_t src_offset = (src->bo->offset + 2899f464c52Smaya v3d_layer_offset(psrc, src_level, src_layer)); 2909f464c52Smaya tfu.iia |= src_offset; 2917ec681f3Smrg if (src_base_slice->tiling == V3D_TILING_RASTER) { 2929f464c52Smaya tfu.icfg |= (V3D_TFU_ICFG_FORMAT_RASTER << 2939f464c52Smaya V3D_TFU_ICFG_FORMAT_SHIFT); 2949f464c52Smaya } else { 2959f464c52Smaya tfu.icfg |= ((V3D_TFU_ICFG_FORMAT_LINEARTILE + 2967ec681f3Smrg (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << 2979f464c52Smaya V3D_TFU_ICFG_FORMAT_SHIFT); 2989f464c52Smaya } 2999f464c52Smaya 3009f464c52Smaya uint32_t dst_offset = (dst->bo->offset + 3017ec681f3Smrg v3d_layer_offset(pdst, base_level, dst_layer)); 3029f464c52Smaya tfu.ioa |= dst_offset; 3039f464c52Smaya if (last_level != base_level) 3049f464c52Smaya tfu.ioa |= V3D_TFU_IOA_DIMTW; 3059f464c52Smaya tfu.ioa |= ((V3D_TFU_IOA_FORMAT_LINEARTILE + 3067ec681f3Smrg (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << 3079f464c52Smaya V3D_TFU_IOA_FORMAT_SHIFT); 3089f464c52Smaya 3099f464c52Smaya tfu.icfg |= tex_format << V3D_TFU_ICFG_TTYPE_SHIFT; 3109f464c52Smaya tfu.icfg |= (last_level - base_level) << V3D_TFU_ICFG_NUMMM_SHIFT; 3119f464c52Smaya 3129f464c52Smaya switch (src_base_slice->tiling) { 3137ec681f3Smrg case V3D_TILING_UIF_NO_XOR: 3147ec681f3Smrg case V3D_TILING_UIF_XOR: 3159f464c52Smaya tfu.iis |= (src_base_slice->padded_height / 3169f464c52Smaya (2 * v3d_utile_height(src->cpp))); 3179f464c52Smaya break; 3187ec681f3Smrg case V3D_TILING_RASTER: 3199f464c52Smaya tfu.iis |= src_base_slice->stride / src->cpp; 3209f464c52Smaya break; 3217ec681f3Smrg case V3D_TILING_LINEARTILE: 3227ec681f3Smrg case V3D_TILING_UBLINEAR_1_COLUMN: 3237ec681f3Smrg case V3D_TILING_UBLINEAR_2_COLUMN: 3249f464c52Smaya break; 3259f464c52Smaya } 3269f464c52Smaya 3279f464c52Smaya /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the 3289f464c52Smaya * OPAD field for the destination (how many extra UIF blocks beyond 3299f464c52Smaya * those necessary to cover the height). When filling mipmaps, the 3309f464c52Smaya * miplevel 1+ tiling state is inferred. 3319f464c52Smaya */ 3327ec681f3Smrg if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR || 3337ec681f3Smrg dst_base_slice->tiling == V3D_TILING_UIF_XOR) { 3349f464c52Smaya int uif_block_h = 2 * v3d_utile_height(dst->cpp); 3359f464c52Smaya int implicit_padded_height = align(height, uif_block_h); 3369f464c52Smaya 3379f464c52Smaya tfu.icfg |= (((dst_base_slice->padded_height - 3389f464c52Smaya implicit_padded_height) / uif_block_h) << 3399f464c52Smaya V3D_TFU_ICFG_OPAD_SHIFT); 3409f464c52Smaya } 3419f464c52Smaya 3429f464c52Smaya int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu); 3439f464c52Smaya if (ret != 0) { 3449f464c52Smaya fprintf(stderr, "Failed to submit TFU job: %d\n", ret); 3459f464c52Smaya return false; 3469f464c52Smaya } 3479f464c52Smaya 3489f464c52Smaya dst->writes++; 3499f464c52Smaya 3509f464c52Smaya return true; 3519f464c52Smaya} 3529f464c52Smaya 3537ec681f3Smrgbool 3549f464c52Smayav3d_generate_mipmap(struct pipe_context *pctx, 3559f464c52Smaya struct pipe_resource *prsc, 3569f464c52Smaya enum pipe_format format, 3579f464c52Smaya unsigned int base_level, 3589f464c52Smaya unsigned int last_level, 3599f464c52Smaya unsigned int first_layer, 3609f464c52Smaya unsigned int last_layer) 3619f464c52Smaya{ 3629f464c52Smaya if (format != prsc->format) 3639f464c52Smaya return false; 3649f464c52Smaya 3659f464c52Smaya /* We could maybe support looping over layers for array textures, but 3669f464c52Smaya * we definitely don't support 3D. 3679f464c52Smaya */ 3689f464c52Smaya if (first_layer != last_layer) 3699f464c52Smaya return false; 3709f464c52Smaya 3719f464c52Smaya return v3d_tfu(pctx, 3729f464c52Smaya prsc, prsc, 3739f464c52Smaya base_level, 3749f464c52Smaya base_level, last_level, 3757ec681f3Smrg first_layer, first_layer, 3767ec681f3Smrg true); 3779f464c52Smaya} 3789f464c52Smaya 3797ec681f3Smrgstatic void 3807ec681f3Smrgv3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 3819f464c52Smaya{ 3829f464c52Smaya int dst_width = u_minify(info->dst.resource->width0, info->dst.level); 3839f464c52Smaya int dst_height = u_minify(info->dst.resource->height0, info->dst.level); 3849f464c52Smaya 3859f464c52Smaya if ((info->mask & PIPE_MASK_RGBA) == 0) 3867ec681f3Smrg return; 3879f464c52Smaya 3889f464c52Smaya if (info->scissor_enable || 3899f464c52Smaya info->dst.box.x != 0 || 3909f464c52Smaya info->dst.box.y != 0 || 3919f464c52Smaya info->dst.box.width != dst_width || 3929f464c52Smaya info->dst.box.height != dst_height || 3939f464c52Smaya info->src.box.x != 0 || 3949f464c52Smaya info->src.box.y != 0 || 3959f464c52Smaya info->src.box.width != info->dst.box.width || 3969f464c52Smaya info->src.box.height != info->dst.box.height) { 3977ec681f3Smrg return; 3989f464c52Smaya } 3999f464c52Smaya 4009f464c52Smaya if (info->dst.format != info->src.format) 4017ec681f3Smrg return; 4027ec681f3Smrg 4037ec681f3Smrg if (v3d_tfu(pctx, info->dst.resource, info->src.resource, 4047ec681f3Smrg info->src.level, 4057ec681f3Smrg info->dst.level, info->dst.level, 4067ec681f3Smrg info->src.box.z, info->dst.box.z, 4077ec681f3Smrg false)) { 4087ec681f3Smrg info->mask &= ~PIPE_MASK_RGBA; 4097ec681f3Smrg } 4107ec681f3Smrg} 4117ec681f3Smrg 4127ec681f3Smrgstatic struct pipe_surface * 4137ec681f3Smrgv3d_get_blit_surface(struct pipe_context *pctx, 4147ec681f3Smrg struct pipe_resource *prsc, 4157ec681f3Smrg unsigned level, 4167ec681f3Smrg int16_t layer) 4177ec681f3Smrg{ 4187ec681f3Smrg struct pipe_surface tmpl; 4197ec681f3Smrg 4207ec681f3Smrg tmpl.format = prsc->format; 4217ec681f3Smrg tmpl.u.tex.level = level; 4227ec681f3Smrg tmpl.u.tex.first_layer = layer; 4237ec681f3Smrg tmpl.u.tex.last_layer = layer; 4247ec681f3Smrg 4257ec681f3Smrg return pctx->create_surface(pctx, prsc, &tmpl); 4267ec681f3Smrg} 4279f464c52Smaya 4287ec681f3Smrgstatic bool 4297ec681f3Smrgis_tile_unaligned(unsigned size, unsigned tile_size) 4307ec681f3Smrg{ 4317ec681f3Smrg return size & (tile_size - 1); 4329f464c52Smaya} 4339f464c52Smaya 4347ec681f3Smrgstatic void 4357ec681f3Smrgv3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 4367ec681f3Smrg{ 4377ec681f3Smrg struct v3d_context *v3d = v3d_context(pctx); 4387ec681f3Smrg struct v3d_screen *screen = v3d->screen; 4397ec681f3Smrg 4407ec681f3Smrg if (screen->devinfo.ver < 40 || !info->mask) 4417ec681f3Smrg return; 4427ec681f3Smrg 4437ec681f3Smrg bool is_color_blit = info->mask & PIPE_MASK_RGBA; 4447ec681f3Smrg bool is_depth_blit = info->mask & PIPE_MASK_Z; 4457ec681f3Smrg bool is_stencil_blit = info->mask & PIPE_MASK_S; 4467ec681f3Smrg 4477ec681f3Smrg /* We should receive either a depth/stencil blit, or color blit, but 4487ec681f3Smrg * not both. 4497ec681f3Smrg */ 4507ec681f3Smrg assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) || 4517ec681f3Smrg (!is_color_blit && (is_depth_blit || is_stencil_blit))); 4527ec681f3Smrg 4537ec681f3Smrg if (info->scissor_enable) 4547ec681f3Smrg return; 4557ec681f3Smrg 4567ec681f3Smrg if (info->src.box.x != info->dst.box.x || 4577ec681f3Smrg info->src.box.y != info->dst.box.y || 4587ec681f3Smrg info->src.box.width != info->dst.box.width || 4597ec681f3Smrg info->src.box.height != info->dst.box.height) 4607ec681f3Smrg return; 4617ec681f3Smrg 4627ec681f3Smrg if (is_color_blit && 4637ec681f3Smrg util_format_is_depth_or_stencil(info->dst.resource->format)) 4647ec681f3Smrg return; 4657ec681f3Smrg 4667ec681f3Smrg if (!v3d_rt_format_supported(&screen->devinfo, info->src.resource->format)) 4677ec681f3Smrg return; 4687ec681f3Smrg 4697ec681f3Smrg if (v3d_get_rt_format(&screen->devinfo, info->src.resource->format) != 4707ec681f3Smrg v3d_get_rt_format(&screen->devinfo, info->dst.resource->format)) 4717ec681f3Smrg return; 4727ec681f3Smrg 4737ec681f3Smrg bool msaa = (info->src.resource->nr_samples > 1 || 4747ec681f3Smrg info->dst.resource->nr_samples > 1); 4757ec681f3Smrg bool is_msaa_resolve = (info->src.resource->nr_samples > 1 && 4767ec681f3Smrg info->dst.resource->nr_samples < 2); 4777ec681f3Smrg 4787ec681f3Smrg if (is_msaa_resolve && 4797ec681f3Smrg !v3d_format_supports_tlb_msaa_resolve(&screen->devinfo, info->src.resource->format)) 4807ec681f3Smrg return; 4817ec681f3Smrg 4827ec681f3Smrg v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false); 4837ec681f3Smrg 4847ec681f3Smrg struct pipe_surface *dst_surf = 4857ec681f3Smrg v3d_get_blit_surface(pctx, info->dst.resource, info->dst.level, info->dst.box.z); 4867ec681f3Smrg struct pipe_surface *src_surf = 4877ec681f3Smrg v3d_get_blit_surface(pctx, info->src.resource, info->src.level, info->src.box.z); 4887ec681f3Smrg 4897ec681f3Smrg struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 }; 4907ec681f3Smrg if (is_color_blit) 4917ec681f3Smrg surfaces[0] = dst_surf; 4927ec681f3Smrg 4937ec681f3Smrg uint32_t tile_width, tile_height, max_bpp; 4947ec681f3Smrg v3d_get_tile_buffer_size(msaa, is_color_blit ? 1 : 0, surfaces, src_surf, &tile_width, &tile_height, &max_bpp); 4957ec681f3Smrg 4967ec681f3Smrg int dst_surface_width = u_minify(info->dst.resource->width0, 4977ec681f3Smrg info->dst.level); 4987ec681f3Smrg int dst_surface_height = u_minify(info->dst.resource->height0, 4997ec681f3Smrg info->dst.level); 5007ec681f3Smrg if (is_tile_unaligned(info->dst.box.x, tile_width) || 5017ec681f3Smrg is_tile_unaligned(info->dst.box.y, tile_height) || 5027ec681f3Smrg (is_tile_unaligned(info->dst.box.width, tile_width) && 5037ec681f3Smrg info->dst.box.x + info->dst.box.width != dst_surface_width) || 5047ec681f3Smrg (is_tile_unaligned(info->dst.box.height, tile_height) && 5057ec681f3Smrg info->dst.box.y + info->dst.box.height != dst_surface_height)) { 5067ec681f3Smrg pipe_surface_reference(&dst_surf, NULL); 5077ec681f3Smrg pipe_surface_reference(&src_surf, NULL); 5087ec681f3Smrg return; 5097ec681f3Smrg } 5107ec681f3Smrg 5117ec681f3Smrg struct v3d_job *job = v3d_get_job(v3d, 5127ec681f3Smrg is_color_blit ? 1u : 0u, 5137ec681f3Smrg surfaces, 5147ec681f3Smrg is_color_blit ? NULL : dst_surf, 5157ec681f3Smrg src_surf); 5167ec681f3Smrg job->msaa = msaa; 5177ec681f3Smrg job->tile_width = tile_width; 5187ec681f3Smrg job->tile_height = tile_height; 5197ec681f3Smrg job->internal_bpp = max_bpp; 5207ec681f3Smrg job->draw_min_x = info->dst.box.x; 5217ec681f3Smrg job->draw_min_y = info->dst.box.y; 5227ec681f3Smrg job->draw_max_x = info->dst.box.x + info->dst.box.width; 5237ec681f3Smrg job->draw_max_y = info->dst.box.y + info->dst.box.height; 5247ec681f3Smrg job->scissor.disabled = false; 5257ec681f3Smrg 5267ec681f3Smrg /* The simulator complains if we do a TLB load from a source with a 5277ec681f3Smrg * stride that is smaller than the destination's, so we program the 5287ec681f3Smrg * 'frame region' to match the smallest dimensions of the two surfaces. 5297ec681f3Smrg * This should be fine because we only get here if the src and dst boxes 5307ec681f3Smrg * match, so we know the blit involves the same tiles on both surfaces. 5317ec681f3Smrg */ 5327ec681f3Smrg job->draw_width = MIN2(dst_surf->width, src_surf->width); 5337ec681f3Smrg job->draw_height = MIN2(dst_surf->height, src_surf->height); 5347ec681f3Smrg job->draw_tiles_x = DIV_ROUND_UP(job->draw_width, 5357ec681f3Smrg job->tile_width); 5367ec681f3Smrg job->draw_tiles_y = DIV_ROUND_UP(job->draw_height, 5377ec681f3Smrg job->tile_height); 5387ec681f3Smrg 5397ec681f3Smrg job->needs_flush = true; 5407ec681f3Smrg job->num_layers = info->dst.box.depth; 5417ec681f3Smrg 5427ec681f3Smrg job->store = 0; 5437ec681f3Smrg if (is_color_blit) { 5447ec681f3Smrg job->store |= PIPE_CLEAR_COLOR0; 5457ec681f3Smrg info->mask &= ~PIPE_MASK_RGBA; 5467ec681f3Smrg } 5477ec681f3Smrg if (is_depth_blit) { 5487ec681f3Smrg job->store |= PIPE_CLEAR_DEPTH; 5497ec681f3Smrg info->mask &= ~PIPE_MASK_Z; 5507ec681f3Smrg } 5517ec681f3Smrg if (is_stencil_blit){ 5527ec681f3Smrg job->store |= PIPE_CLEAR_STENCIL; 5537ec681f3Smrg info->mask &= ~PIPE_MASK_S; 5547ec681f3Smrg } 5557ec681f3Smrg 5567ec681f3Smrg v3d41_start_binning(v3d, job); 5577ec681f3Smrg 5587ec681f3Smrg v3d_job_submit(v3d, job); 5597ec681f3Smrg 5607ec681f3Smrg pipe_surface_reference(&dst_surf, NULL); 5617ec681f3Smrg pipe_surface_reference(&src_surf, NULL); 5627ec681f3Smrg} 5637ec681f3Smrg 5647ec681f3Smrg/** 5657ec681f3Smrg * Creates the VS of the custom blit shader to convert YUV plane from 5667ec681f3Smrg * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format. 5677ec681f3Smrg * This vertex shader is mostly a pass-through VS. 5687ec681f3Smrg */ 5697ec681f3Smrgstatic void * 5707ec681f3Smrgv3d_get_sand8_vs(struct pipe_context *pctx) 5717ec681f3Smrg{ 5727ec681f3Smrg struct v3d_context *v3d = v3d_context(pctx); 5737ec681f3Smrg struct pipe_screen *pscreen = pctx->screen; 5747ec681f3Smrg 5757ec681f3Smrg if (v3d->sand8_blit_vs) 5767ec681f3Smrg return v3d->sand8_blit_vs; 5777ec681f3Smrg 5787ec681f3Smrg const struct nir_shader_compiler_options *options = 5797ec681f3Smrg pscreen->get_compiler_options(pscreen, 5807ec681f3Smrg PIPE_SHADER_IR_NIR, 5817ec681f3Smrg PIPE_SHADER_VERTEX); 5827ec681f3Smrg 5837ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, 5847ec681f3Smrg options, 5857ec681f3Smrg "sand8_blit_vs"); 5867ec681f3Smrg 5877ec681f3Smrg const struct glsl_type *vec4 = glsl_vec4_type(); 5887ec681f3Smrg nir_variable *pos_in = nir_variable_create(b.shader, 5897ec681f3Smrg nir_var_shader_in, 5907ec681f3Smrg vec4, "pos"); 5917ec681f3Smrg 5927ec681f3Smrg nir_variable *pos_out = nir_variable_create(b.shader, 5937ec681f3Smrg nir_var_shader_out, 5947ec681f3Smrg vec4, "gl_Position"); 5957ec681f3Smrg pos_out->data.location = VARYING_SLOT_POS; 5967ec681f3Smrg nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf); 5977ec681f3Smrg 5987ec681f3Smrg struct pipe_shader_state shader_tmpl = { 5997ec681f3Smrg .type = PIPE_SHADER_IR_NIR, 6007ec681f3Smrg .ir.nir = b.shader, 6017ec681f3Smrg }; 6027ec681f3Smrg 6037ec681f3Smrg v3d->sand8_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl); 6047ec681f3Smrg 6057ec681f3Smrg return v3d->sand8_blit_vs; 6067ec681f3Smrg} 6077ec681f3Smrg/** 6087ec681f3Smrg * Creates the FS of the custom blit shader to convert YUV plane from 6097ec681f3Smrg * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format. 6107ec681f3Smrg * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1) 6117ec681f3Smrg * plane for a NV12 format without the SAND modifier. 6127ec681f3Smrg */ 6137ec681f3Smrgstatic void * 6147ec681f3Smrgv3d_get_sand8_fs(struct pipe_context *pctx, int cpp) 6157ec681f3Smrg{ 6167ec681f3Smrg struct v3d_context *v3d = v3d_context(pctx); 6177ec681f3Smrg struct pipe_screen *pscreen = pctx->screen; 6187ec681f3Smrg struct pipe_shader_state **cached_shader; 6197ec681f3Smrg const char *name; 6207ec681f3Smrg 6217ec681f3Smrg if (cpp == 1) { 6227ec681f3Smrg cached_shader = &v3d->sand8_blit_fs_luma; 6237ec681f3Smrg name = "sand8_blit_fs_luma"; 6247ec681f3Smrg } else { 6257ec681f3Smrg cached_shader = &v3d->sand8_blit_fs_chroma; 6267ec681f3Smrg name = "sand8_blit_fs_chroma"; 6277ec681f3Smrg } 6287ec681f3Smrg 6297ec681f3Smrg if (*cached_shader) 6307ec681f3Smrg return *cached_shader; 6317ec681f3Smrg 6327ec681f3Smrg const struct nir_shader_compiler_options *options = 6337ec681f3Smrg pscreen->get_compiler_options(pscreen, 6347ec681f3Smrg PIPE_SHADER_IR_NIR, 6357ec681f3Smrg PIPE_SHADER_FRAGMENT); 6367ec681f3Smrg 6377ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 6387ec681f3Smrg options, "%s", name); 6397ec681f3Smrg const struct glsl_type *vec4 = glsl_vec4_type(); 6407ec681f3Smrg 6417ec681f3Smrg const struct glsl_type *glsl_int = glsl_int_type(); 6427ec681f3Smrg 6437ec681f3Smrg nir_variable *color_out = 6447ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_out, 6457ec681f3Smrg vec4, "f_color"); 6467ec681f3Smrg color_out->data.location = FRAG_RESULT_COLOR; 6477ec681f3Smrg 6487ec681f3Smrg nir_variable *pos_in = 6497ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos"); 6507ec681f3Smrg pos_in->data.location = VARYING_SLOT_POS; 6517ec681f3Smrg nir_ssa_def *pos = nir_load_var(&b, pos_in); 6527ec681f3Smrg 6537ec681f3Smrg nir_ssa_def *zero = nir_imm_int(&b, 0); 6547ec681f3Smrg nir_ssa_def *one = nir_imm_int(&b, 1); 6557ec681f3Smrg nir_ssa_def *two = nir_imm_int(&b, 2); 6567ec681f3Smrg nir_ssa_def *six = nir_imm_int(&b, 6); 6577ec681f3Smrg nir_ssa_def *seven = nir_imm_int(&b, 7); 6587ec681f3Smrg nir_ssa_def *eight = nir_imm_int(&b, 8); 6597ec681f3Smrg 6607ec681f3Smrg nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0)); 6617ec681f3Smrg nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1)); 6627ec681f3Smrg 6637ec681f3Smrg nir_variable *stride_in = 6647ec681f3Smrg nir_variable_create(b.shader, nir_var_uniform, glsl_int, 6657ec681f3Smrg "sand8_stride"); 6667ec681f3Smrg nir_ssa_def *stride = 6677ec681f3Smrg nir_load_uniform(&b, 1, 32, zero, 6687ec681f3Smrg .base = stride_in->data.driver_location, 6697ec681f3Smrg .range = 1, 6707ec681f3Smrg .dest_type = nir_type_int32); 6717ec681f3Smrg 6727ec681f3Smrg nir_ssa_def *x_offset; 6737ec681f3Smrg nir_ssa_def *y_offset; 6747ec681f3Smrg 6757ec681f3Smrg /* UIF tiled format is composed by UIF blocks, Each block has 6767ec681f3Smrg * four 64 byte microtiles. Inside each microtile pixels are stored 6777ec681f3Smrg * in raster format. But microtiles have different dimensions 6787ec681f3Smrg * based in the bits per pixel of the image. 6797ec681f3Smrg * 6807ec681f3Smrg * 8bpp microtile dimensions are 8x8 6817ec681f3Smrg * 16bpp microtile dimensions are 8x4 6827ec681f3Smrg * 32bpp microtile dimensions are 4x4 6837ec681f3Smrg * 6847ec681f3Smrg * As we are reading and writing with 32bpp to optimize 6857ec681f3Smrg * the number of texture operations during the blit. We need 6867ec681f3Smrg * to adjust the offsets were we read and write as data will 6877ec681f3Smrg * be later read using 8bpp (luma) and 16bpp (chroma). 6887ec681f3Smrg * 6897ec681f3Smrg * For chroma 8x4 16bpp raster order is compatible with 4x4 6907ec681f3Smrg * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes. 6917ec681f3Smrg * But luma 8x8 8bpp raster order is not compatible 6927ec681f3Smrg * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has 6937ec681f3Smrg * 16 bytes per line. So if we read a 8bpp texture that was 6947ec681f3Smrg * written as 32bpp texture. Bytes would be misplaced. 6957ec681f3Smrg * 6967ec681f3Smrg * inter/intra_utile_x_offests takes care of mapping the offsets 6977ec681f3Smrg * between microtiles to deal with this issue for luma planes. 6987ec681f3Smrg */ 6997ec681f3Smrg if (cpp == 1) { 7007ec681f3Smrg nir_ssa_def *intra_utile_x_offset = 7017ec681f3Smrg nir_ishl(&b, nir_iand_imm(&b, x, 1), two); 7027ec681f3Smrg nir_ssa_def *inter_utile_x_offset = 7037ec681f3Smrg nir_ishl(&b, nir_iand_imm(&b, x, 60), one); 7047ec681f3Smrg nir_ssa_def *stripe_offset= 7057ec681f3Smrg nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6), 7067ec681f3Smrg stride), 7077ec681f3Smrg seven); 7087ec681f3Smrg 7097ec681f3Smrg x_offset = nir_iadd(&b, stripe_offset, 7107ec681f3Smrg nir_iadd(&b, intra_utile_x_offset, 7117ec681f3Smrg inter_utile_x_offset)); 7127ec681f3Smrg y_offset = nir_iadd(&b, 7137ec681f3Smrg nir_ishl(&b, nir_iand_imm(&b, x, 2), six), 7147ec681f3Smrg nir_ishl(&b, y, eight)); 7157ec681f3Smrg } else { 7167ec681f3Smrg nir_ssa_def *stripe_offset= 7177ec681f3Smrg nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5), 7187ec681f3Smrg stride), 7197ec681f3Smrg seven); 7207ec681f3Smrg x_offset = nir_iadd(&b, stripe_offset, 7217ec681f3Smrg nir_ishl(&b, nir_iand_imm(&b, x, 31), two)); 7227ec681f3Smrg y_offset = nir_ishl(&b, y, seven); 7237ec681f3Smrg } 7247ec681f3Smrg nir_ssa_def *ubo_offset = nir_iadd(&b, x_offset, y_offset); 7257ec681f3Smrg nir_ssa_def *load = 7267ec681f3Smrg nir_load_ubo(&b, 1, 32, one, ubo_offset, 7277ec681f3Smrg .align_mul = 4, 7287ec681f3Smrg .align_offset = 0, 7297ec681f3Smrg .range_base = 0, 7307ec681f3Smrg .range = ~0); 7317ec681f3Smrg 7327ec681f3Smrg nir_ssa_def *output = nir_unpack_unorm_4x8(&b, load); 7337ec681f3Smrg 7347ec681f3Smrg nir_store_var(&b, color_out, 7357ec681f3Smrg output, 7367ec681f3Smrg 0xF); 7377ec681f3Smrg 7387ec681f3Smrg struct pipe_shader_state shader_tmpl = { 7397ec681f3Smrg .type = PIPE_SHADER_IR_NIR, 7407ec681f3Smrg .ir.nir = b.shader, 7417ec681f3Smrg }; 7427ec681f3Smrg 7437ec681f3Smrg *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl); 7447ec681f3Smrg 7457ec681f3Smrg return *cached_shader; 7467ec681f3Smrg} 7477ec681f3Smrg 7487ec681f3Smrg/** 7497ec681f3Smrg * Turns NV12 with SAND8 format modifier from raster-order with interleaved 7507ec681f3Smrg * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma. 7517ec681f3Smrg * 7527ec681f3Smrg * This implementation is based on vc4_yuv_blit. 7537ec681f3Smrg */ 7547ec681f3Smrgstatic void 7557ec681f3Smrgv3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 7567ec681f3Smrg{ 7577ec681f3Smrg struct v3d_context *v3d = v3d_context(pctx); 7587ec681f3Smrg struct v3d_resource *src = v3d_resource(info->src.resource); 7597ec681f3Smrg ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource); 7607ec681f3Smrg 7617ec681f3Smrg if (!src->sand_col128_stride) 7627ec681f3Smrg return; 7637ec681f3Smrg if (src->tiled) 7647ec681f3Smrg return; 7657ec681f3Smrg if (src->base.format != PIPE_FORMAT_R8_UNORM && 7667ec681f3Smrg src->base.format != PIPE_FORMAT_R8G8_UNORM) 7677ec681f3Smrg return; 7687ec681f3Smrg if (!(info->mask & PIPE_MASK_RGBA)) 7697ec681f3Smrg return; 7707ec681f3Smrg 7717ec681f3Smrg assert(dst->base.format == src->base.format); 7727ec681f3Smrg assert(dst->tiled); 7737ec681f3Smrg 7747ec681f3Smrg assert(info->src.box.x == 0 && info->dst.box.x == 0); 7757ec681f3Smrg assert(info->src.box.y == 0 && info->dst.box.y == 0); 7767ec681f3Smrg assert(info->src.box.width == info->dst.box.width); 7777ec681f3Smrg assert(info->src.box.height == info->dst.box.height); 7787ec681f3Smrg 7797ec681f3Smrg v3d_blitter_save(v3d); 7807ec681f3Smrg 7817ec681f3Smrg struct pipe_surface dst_tmpl; 7827ec681f3Smrg util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource, 7837ec681f3Smrg info->dst.level, info->dst.box.z); 7847ec681f3Smrg /* Although the src textures are cpp=1 or cpp=2, the dst texture 7857ec681f3Smrg * uses a cpp=4 dst texture. So, all read/write texture ops will 7867ec681f3Smrg * be done using 32-bit read and writes. 7877ec681f3Smrg */ 7887ec681f3Smrg dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM; 7897ec681f3Smrg struct pipe_surface *dst_surf = 7907ec681f3Smrg pctx->create_surface(pctx, info->dst.resource, &dst_tmpl); 7917ec681f3Smrg if (!dst_surf) { 7927ec681f3Smrg fprintf(stderr, "Failed to create YUV dst surface\n"); 7937ec681f3Smrg util_blitter_unset_running_flag(v3d->blitter); 7947ec681f3Smrg return; 7957ec681f3Smrg } 7967ec681f3Smrg 7977ec681f3Smrg uint32_t sand8_stride = src->sand_col128_stride; 7987ec681f3Smrg 7997ec681f3Smrg /* Adjust the dimensions of dst luma/chroma to match src 8007ec681f3Smrg * size now we are using a cpp=4 format. Next dimension take into 8017ec681f3Smrg * account the UIF microtile layouts. 8027ec681f3Smrg */ 8037ec681f3Smrg dst_surf->width = align(dst_surf->width, 8) / 2; 8047ec681f3Smrg if (src->cpp == 1) 8057ec681f3Smrg dst_surf->height /= 2; 8067ec681f3Smrg 8077ec681f3Smrg /* Set the constant buffer. */ 8087ec681f3Smrg struct pipe_constant_buffer cb_uniforms = { 8097ec681f3Smrg .user_buffer = &sand8_stride, 8107ec681f3Smrg .buffer_size = sizeof(sand8_stride), 8117ec681f3Smrg }; 8127ec681f3Smrg 8137ec681f3Smrg pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false, 8147ec681f3Smrg &cb_uniforms); 8157ec681f3Smrg struct pipe_constant_buffer cb_src = { 8167ec681f3Smrg .buffer = info->src.resource, 8177ec681f3Smrg .buffer_offset = src->slices[info->src.level].offset, 8187ec681f3Smrg .buffer_size = (src->bo->size - 8197ec681f3Smrg src->slices[info->src.level].offset), 8207ec681f3Smrg }; 8217ec681f3Smrg pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 2, false, 8227ec681f3Smrg &cb_src); 8237ec681f3Smrg /* Unbind the textures, to make sure we don't try to recurse into the 8247ec681f3Smrg * shadow blit. 8257ec681f3Smrg */ 8267ec681f3Smrg pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL); 8277ec681f3Smrg pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); 8287ec681f3Smrg 8297ec681f3Smrg util_blitter_custom_shader(v3d->blitter, dst_surf, 8307ec681f3Smrg v3d_get_sand8_vs(pctx), 8317ec681f3Smrg v3d_get_sand8_fs(pctx, src->cpp)); 8327ec681f3Smrg 8337ec681f3Smrg util_blitter_restore_textures(v3d->blitter); 8347ec681f3Smrg util_blitter_restore_constant_buffer_state(v3d->blitter); 8357ec681f3Smrg 8367ec681f3Smrg /* Restore cb1 (util_blitter doesn't handle this one). */ 8377ec681f3Smrg struct pipe_constant_buffer cb_disabled = { 0 }; 8387ec681f3Smrg pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, 8397ec681f3Smrg &cb_disabled); 8407ec681f3Smrg 8417ec681f3Smrg pipe_surface_reference(&dst_surf, NULL); 8427ec681f3Smrg 8437ec681f3Smrg info->mask &= ~PIPE_MASK_RGBA; 8447ec681f3Smrg return; 8457ec681f3Smrg} 8467ec681f3Smrg 8477ec681f3Smrg 84801e04c3fSmrg/* Optimal hardware path for blitting pixels. 84901e04c3fSmrg * Scaling, format conversion, up- and downsampling (resolve) are allowed. 85001e04c3fSmrg */ 85101e04c3fSmrgvoid 85201e04c3fSmrgv3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) 85301e04c3fSmrg{ 8549f464c52Smaya struct v3d_context *v3d = v3d_context(pctx); 85501e04c3fSmrg struct pipe_blit_info info = *blit_info; 85601e04c3fSmrg 8577ec681f3Smrg v3d_sand8_blit(pctx, &info); 8587ec681f3Smrg 8597ec681f3Smrg v3d_tfu_blit(pctx, &info); 8607ec681f3Smrg 8617ec681f3Smrg v3d_tlb_blit(pctx, &info); 86201e04c3fSmrg 8637ec681f3Smrg v3d_stencil_blit(pctx, &info); 8649f464c52Smaya 8657ec681f3Smrg v3d_render_blit(pctx, &info); 86601e04c3fSmrg 8679f464c52Smaya /* Flush our blit jobs immediately. They're unlikely to get reused by 8689f464c52Smaya * normal drawing or other blits, and without flushing we can easily 8699f464c52Smaya * run into unexpected OOMs when blits are used for a large series of 8709f464c52Smaya * texture uploads before using the textures. 8719f464c52Smaya */ 8727ec681f3Smrg v3d_flush_jobs_writing_resource(v3d, info.dst.resource, 8737ec681f3Smrg V3D_FLUSH_DEFAULT, false); 87401e04c3fSmrg} 875