101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2015-2017 Broadcom
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
247ec681f3Smrg#include "util/format/u_format.h"
2501e04c3fSmrg#include "util/u_surface.h"
2601e04c3fSmrg#include "util/u_blitter.h"
277ec681f3Smrg#include "compiler/nir/nir_builder.h"
2801e04c3fSmrg#include "v3d_context.h"
297ec681f3Smrg#include "broadcom/common/v3d_tiling.h"
3001e04c3fSmrg
3101e04c3fSmrgvoid
3201e04c3fSmrgv3d_blitter_save(struct v3d_context *v3d)
3301e04c3fSmrg{
3401e04c3fSmrg        util_blitter_save_fragment_constant_buffer_slot(v3d->blitter,
3501e04c3fSmrg                                                        v3d->constbuf[PIPE_SHADER_FRAGMENT].cb);
3601e04c3fSmrg        util_blitter_save_vertex_buffer_slot(v3d->blitter, v3d->vertexbuf.vb);
3701e04c3fSmrg        util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx);
3801e04c3fSmrg        util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs);
397ec681f3Smrg        util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs);
4001e04c3fSmrg        util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
4101e04c3fSmrg                                     v3d->streamout.targets);
4201e04c3fSmrg        util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer);
4301e04c3fSmrg        util_blitter_save_viewport(v3d->blitter, &v3d->viewport);
4401e04c3fSmrg        util_blitter_save_scissor(v3d->blitter, &v3d->scissor);
4501e04c3fSmrg        util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs);
4601e04c3fSmrg        util_blitter_save_blend(v3d->blitter, v3d->blend);
4701e04c3fSmrg        util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa);
4801e04c3fSmrg        util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref);
4901e04c3fSmrg        util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask);
5001e04c3fSmrg        util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer);
5101e04c3fSmrg        util_blitter_save_fragment_sampler_states(v3d->blitter,
529f464c52Smaya                        v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers,
539f464c52Smaya                        (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers);
5401e04c3fSmrg        util_blitter_save_fragment_sampler_views(v3d->blitter,
559f464c52Smaya                        v3d->tex[PIPE_SHADER_FRAGMENT].num_textures,
569f464c52Smaya                        v3d->tex[PIPE_SHADER_FRAGMENT].textures);
5701e04c3fSmrg        util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
5801e04c3fSmrg                                     v3d->streamout.targets);
5901e04c3fSmrg}
6001e04c3fSmrg
617ec681f3Smrgstatic void
6201e04c3fSmrgv3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
6301e04c3fSmrg{
6401e04c3fSmrg        struct v3d_context *v3d = v3d_context(ctx);
6501e04c3fSmrg        struct v3d_resource *src = v3d_resource(info->src.resource);
6601e04c3fSmrg        struct pipe_resource *tiled = NULL;
6701e04c3fSmrg
687ec681f3Smrg        if (!info->mask)
697ec681f3Smrg                return;
707ec681f3Smrg
7101e04c3fSmrg        if (!src->tiled) {
7201e04c3fSmrg                struct pipe_box box = {
7301e04c3fSmrg                        .x = 0,
7401e04c3fSmrg                        .y = 0,
7501e04c3fSmrg                        .width = u_minify(info->src.resource->width0,
7601e04c3fSmrg                                           info->src.level),
7701e04c3fSmrg                        .height = u_minify(info->src.resource->height0,
7801e04c3fSmrg                                           info->src.level),
7901e04c3fSmrg                        .depth = 1,
8001e04c3fSmrg                };
8101e04c3fSmrg                struct pipe_resource tmpl = {
8201e04c3fSmrg                        .target = info->src.resource->target,
8301e04c3fSmrg                        .format = info->src.resource->format,
8401e04c3fSmrg                        .width0 = box.width,
8501e04c3fSmrg                        .height0 = box.height,
8601e04c3fSmrg                        .depth0 = 1,
8701e04c3fSmrg                        .array_size = 1,
8801e04c3fSmrg                };
8901e04c3fSmrg                tiled = ctx->screen->resource_create(ctx->screen, &tmpl);
9001e04c3fSmrg                if (!tiled) {
9101e04c3fSmrg                        fprintf(stderr, "Failed to create tiled blit temp\n");
927ec681f3Smrg                        return;
9301e04c3fSmrg                }
9401e04c3fSmrg                ctx->resource_copy_region(ctx,
9501e04c3fSmrg                                          tiled, 0,
9601e04c3fSmrg                                          0, 0, 0,
9701e04c3fSmrg                                          info->src.resource, info->src.level,
9801e04c3fSmrg                                          &box);
9901e04c3fSmrg                info->src.level = 0;
10001e04c3fSmrg                info->src.resource = tiled;
10101e04c3fSmrg        }
10201e04c3fSmrg
10301e04c3fSmrg        if (!util_blitter_is_blit_supported(v3d->blitter, info)) {
10401e04c3fSmrg                fprintf(stderr, "blit unsupported %s -> %s\n",
10501e04c3fSmrg                    util_format_short_name(info->src.resource->format),
10601e04c3fSmrg                    util_format_short_name(info->dst.resource->format));
1077ec681f3Smrg                return;
10801e04c3fSmrg        }
10901e04c3fSmrg
11001e04c3fSmrg        v3d_blitter_save(v3d);
11101e04c3fSmrg        util_blitter_blit(v3d->blitter, info);
11201e04c3fSmrg
11301e04c3fSmrg        pipe_resource_reference(&tiled, NULL);
1147ec681f3Smrg        info->mask = 0;
11501e04c3fSmrg}
11601e04c3fSmrg
11701e04c3fSmrg/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888
11801e04c3fSmrg * or R8 texture.
11901e04c3fSmrg */
12001e04c3fSmrgstatic void
1217ec681f3Smrgv3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
12201e04c3fSmrg{
12301e04c3fSmrg        struct v3d_context *v3d = v3d_context(ctx);
12401e04c3fSmrg        struct v3d_resource *src = v3d_resource(info->src.resource);
12501e04c3fSmrg        struct v3d_resource *dst = v3d_resource(info->dst.resource);
12601e04c3fSmrg        enum pipe_format src_format, dst_format;
12701e04c3fSmrg
1287ec681f3Smrg        if ((info->mask & PIPE_MASK_S) == 0)
1297ec681f3Smrg                return;
1307ec681f3Smrg
13101e04c3fSmrg        if (src->separate_stencil) {
13201e04c3fSmrg                src = src->separate_stencil;
1337ec681f3Smrg                src_format = PIPE_FORMAT_R8_UINT;
13401e04c3fSmrg        } else {
1357ec681f3Smrg                src_format = PIPE_FORMAT_RGBA8888_UINT;
13601e04c3fSmrg        }
13701e04c3fSmrg
13801e04c3fSmrg        if (dst->separate_stencil) {
13901e04c3fSmrg                dst = dst->separate_stencil;
1407ec681f3Smrg                dst_format = PIPE_FORMAT_R8_UINT;
14101e04c3fSmrg        } else {
1427ec681f3Smrg                dst_format = PIPE_FORMAT_RGBA8888_UINT;
14301e04c3fSmrg        }
14401e04c3fSmrg
14501e04c3fSmrg        /* Initialize the surface. */
14601e04c3fSmrg        struct pipe_surface dst_tmpl = {
14701e04c3fSmrg                .u.tex = {
14801e04c3fSmrg                        .level = info->dst.level,
14901e04c3fSmrg                        .first_layer = info->dst.box.z,
15001e04c3fSmrg                        .last_layer = info->dst.box.z,
15101e04c3fSmrg                },
15201e04c3fSmrg                .format = dst_format,
15301e04c3fSmrg        };
15401e04c3fSmrg        struct pipe_surface *dst_surf =
15501e04c3fSmrg                ctx->create_surface(ctx, &dst->base, &dst_tmpl);
15601e04c3fSmrg
15701e04c3fSmrg        /* Initialize the sampler view. */
15801e04c3fSmrg        struct pipe_sampler_view src_tmpl = {
15901e04c3fSmrg                .target = src->base.target,
16001e04c3fSmrg                .format = src_format,
16101e04c3fSmrg                .u.tex = {
16201e04c3fSmrg                        .first_level = info->src.level,
16301e04c3fSmrg                        .last_level = info->src.level,
16401e04c3fSmrg                        .first_layer = 0,
16501e04c3fSmrg                        .last_layer = (PIPE_TEXTURE_3D ?
16601e04c3fSmrg                                       u_minify(src->base.depth0,
16701e04c3fSmrg                                                info->src.level) - 1 :
16801e04c3fSmrg                                       src->base.array_size - 1),
16901e04c3fSmrg                },
17001e04c3fSmrg                .swizzle_r = PIPE_SWIZZLE_X,
17101e04c3fSmrg                .swizzle_g = PIPE_SWIZZLE_Y,
17201e04c3fSmrg                .swizzle_b = PIPE_SWIZZLE_Z,
17301e04c3fSmrg                .swizzle_a = PIPE_SWIZZLE_W,
17401e04c3fSmrg        };
17501e04c3fSmrg        struct pipe_sampler_view *src_view =
17601e04c3fSmrg                ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
17701e04c3fSmrg
17801e04c3fSmrg        v3d_blitter_save(v3d);
17901e04c3fSmrg        util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box,
18001e04c3fSmrg                                  src_view, &info->src.box,
18101e04c3fSmrg                                  src->base.width0, src->base.height0,
18201e04c3fSmrg                                  PIPE_MASK_R,
18301e04c3fSmrg                                  PIPE_TEX_FILTER_NEAREST,
18401e04c3fSmrg                                  info->scissor_enable ? &info->scissor : NULL,
1857ec681f3Smrg                                  info->alpha_blend, false);
18601e04c3fSmrg
18701e04c3fSmrg        pipe_surface_reference(&dst_surf, NULL);
18801e04c3fSmrg        pipe_sampler_view_reference(&src_view, NULL);
1897ec681f3Smrg
1907ec681f3Smrg        info->mask &= ~PIPE_MASK_S;
19101e04c3fSmrg}
19201e04c3fSmrg
1939f464c52Smaya/* Disable level 0 write, just write following mipmaps */
1949f464c52Smaya#define V3D_TFU_IOA_DIMTW (1 << 0)
1959f464c52Smaya#define V3D_TFU_IOA_FORMAT_SHIFT 3
1969f464c52Smaya#define V3D_TFU_IOA_FORMAT_LINEARTILE 3
1979f464c52Smaya#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
1989f464c52Smaya#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
1999f464c52Smaya#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6
2009f464c52Smaya#define V3D_TFU_IOA_FORMAT_UIF_XOR 7
2019f464c52Smaya
2029f464c52Smaya#define V3D_TFU_ICFG_NUMMM_SHIFT 5
2039f464c52Smaya#define V3D_TFU_ICFG_TTYPE_SHIFT 9
2049f464c52Smaya
2059f464c52Smaya#define V3D_TFU_ICFG_OPAD_SHIFT 22
2069f464c52Smaya
2079f464c52Smaya#define V3D_TFU_ICFG_FORMAT_SHIFT 18
2089f464c52Smaya#define V3D_TFU_ICFG_FORMAT_RASTER 0
2099f464c52Smaya#define V3D_TFU_ICFG_FORMAT_SAND_128 1
2109f464c52Smaya#define V3D_TFU_ICFG_FORMAT_SAND_256 2
2119f464c52Smaya#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11
2129f464c52Smaya#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
2139f464c52Smaya#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
2149f464c52Smaya#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14
2159f464c52Smaya#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15
2169f464c52Smaya
2179f464c52Smayastatic bool
2189f464c52Smayav3d_tfu(struct pipe_context *pctx,
2199f464c52Smaya        struct pipe_resource *pdst,
2209f464c52Smaya        struct pipe_resource *psrc,
2219f464c52Smaya        unsigned int src_level,
2229f464c52Smaya        unsigned int base_level,
2239f464c52Smaya        unsigned int last_level,
2249f464c52Smaya        unsigned int src_layer,
2257ec681f3Smrg        unsigned int dst_layer,
2267ec681f3Smrg        bool for_mipmap)
2279f464c52Smaya{
2289f464c52Smaya        struct v3d_context *v3d = v3d_context(pctx);
2299f464c52Smaya        struct v3d_screen *screen = v3d->screen;
2309f464c52Smaya        struct v3d_resource *src = v3d_resource(psrc);
2319f464c52Smaya        struct v3d_resource *dst = v3d_resource(pdst);
2329f464c52Smaya        struct v3d_resource_slice *src_base_slice = &src->slices[src_level];
2339f464c52Smaya        struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level];
2349f464c52Smaya        int msaa_scale = pdst->nr_samples > 1 ? 2 : 1;
2359f464c52Smaya        int width = u_minify(pdst->width0, base_level) * msaa_scale;
2369f464c52Smaya        int height = u_minify(pdst->height0, base_level) * msaa_scale;
2377ec681f3Smrg        enum pipe_format pformat;
2389f464c52Smaya
2399f464c52Smaya        if (psrc->format != pdst->format)
2409f464c52Smaya                return false;
2419f464c52Smaya        if (psrc->nr_samples != pdst->nr_samples)
2429f464c52Smaya                return false;
2439f464c52Smaya
2449f464c52Smaya        if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D)
2459f464c52Smaya                return false;
2469f464c52Smaya
2479f464c52Smaya        /* Can't write to raster. */
2487ec681f3Smrg        if (dst_base_slice->tiling == V3D_TILING_RASTER)
2499f464c52Smaya                return false;
2509f464c52Smaya
2517ec681f3Smrg        /* When using TFU for blit, we are doing exact copies (both input and
2527ec681f3Smrg         * output format must be the same, no scaling, etc), so there is no
2537ec681f3Smrg         * pixel format conversions. Thus we can rewrite the format to use one
2547ec681f3Smrg         * that is TFU compatible based on its texel size.
2557ec681f3Smrg         */
2567ec681f3Smrg        if (for_mipmap) {
2577ec681f3Smrg                pformat = pdst->format;
2587ec681f3Smrg        } else {
2597ec681f3Smrg                switch (dst->cpp) {
2607ec681f3Smrg                case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT;   break;
2617ec681f3Smrg                case 8:  pformat = PIPE_FORMAT_R16G16B16A16_FLOAT;   break;
2627ec681f3Smrg                case 4:  pformat = PIPE_FORMAT_R32_FLOAT;            break;
2637ec681f3Smrg                case 2:  pformat = PIPE_FORMAT_R16_FLOAT;            break;
2647ec681f3Smrg                case 1:  pformat = PIPE_FORMAT_R8_UNORM;             break;
2657ec681f3Smrg                default: unreachable("unsupported format bit-size"); break;
2667ec681f3Smrg                };
2677ec681f3Smrg        }
2687ec681f3Smrg
2697ec681f3Smrg        uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat);
2707ec681f3Smrg
2717ec681f3Smrg        if (!v3d_tfu_supports_tex_format(&screen->devinfo, tex_format, for_mipmap)) {
2727ec681f3Smrg                assert(for_mipmap);
2737ec681f3Smrg                return false;
2747ec681f3Smrg        }
2757ec681f3Smrg
2767ec681f3Smrg        v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false);
2777ec681f3Smrg        v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false);
2789f464c52Smaya
2799f464c52Smaya        struct drm_v3d_submit_tfu tfu = {
2809f464c52Smaya                .ios = (height << 16) | width,
2819f464c52Smaya                .bo_handles = {
2829f464c52Smaya                        dst->bo->handle,
2839f464c52Smaya                        src != dst ? src->bo->handle : 0
2849f464c52Smaya                },
2859f464c52Smaya                .in_sync = v3d->out_sync,
2869f464c52Smaya                .out_sync = v3d->out_sync,
2879f464c52Smaya        };
2889f464c52Smaya        uint32_t src_offset = (src->bo->offset +
2899f464c52Smaya                               v3d_layer_offset(psrc, src_level, src_layer));
2909f464c52Smaya        tfu.iia |= src_offset;
2917ec681f3Smrg        if (src_base_slice->tiling == V3D_TILING_RASTER) {
2929f464c52Smaya                tfu.icfg |= (V3D_TFU_ICFG_FORMAT_RASTER <<
2939f464c52Smaya                             V3D_TFU_ICFG_FORMAT_SHIFT);
2949f464c52Smaya        } else {
2959f464c52Smaya                tfu.icfg |= ((V3D_TFU_ICFG_FORMAT_LINEARTILE +
2967ec681f3Smrg                              (src_base_slice->tiling - V3D_TILING_LINEARTILE)) <<
2979f464c52Smaya                             V3D_TFU_ICFG_FORMAT_SHIFT);
2989f464c52Smaya        }
2999f464c52Smaya
3009f464c52Smaya        uint32_t dst_offset = (dst->bo->offset +
3017ec681f3Smrg                               v3d_layer_offset(pdst, base_level, dst_layer));
3029f464c52Smaya        tfu.ioa |= dst_offset;
3039f464c52Smaya        if (last_level != base_level)
3049f464c52Smaya                tfu.ioa |= V3D_TFU_IOA_DIMTW;
3059f464c52Smaya        tfu.ioa |= ((V3D_TFU_IOA_FORMAT_LINEARTILE +
3067ec681f3Smrg                     (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) <<
3079f464c52Smaya                    V3D_TFU_IOA_FORMAT_SHIFT);
3089f464c52Smaya
3099f464c52Smaya        tfu.icfg |= tex_format << V3D_TFU_ICFG_TTYPE_SHIFT;
3109f464c52Smaya        tfu.icfg |= (last_level - base_level) << V3D_TFU_ICFG_NUMMM_SHIFT;
3119f464c52Smaya
3129f464c52Smaya        switch (src_base_slice->tiling) {
3137ec681f3Smrg        case V3D_TILING_UIF_NO_XOR:
3147ec681f3Smrg        case V3D_TILING_UIF_XOR:
3159f464c52Smaya                tfu.iis |= (src_base_slice->padded_height /
3169f464c52Smaya                            (2 * v3d_utile_height(src->cpp)));
3179f464c52Smaya                break;
3187ec681f3Smrg        case V3D_TILING_RASTER:
3199f464c52Smaya                tfu.iis |= src_base_slice->stride / src->cpp;
3209f464c52Smaya                break;
3217ec681f3Smrg        case V3D_TILING_LINEARTILE:
3227ec681f3Smrg        case V3D_TILING_UBLINEAR_1_COLUMN:
3237ec681f3Smrg        case V3D_TILING_UBLINEAR_2_COLUMN:
3249f464c52Smaya                break;
3259f464c52Smaya       }
3269f464c52Smaya
3279f464c52Smaya        /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
3289f464c52Smaya         * OPAD field for the destination (how many extra UIF blocks beyond
3299f464c52Smaya         * those necessary to cover the height).  When filling mipmaps, the
3309f464c52Smaya         * miplevel 1+ tiling state is inferred.
3319f464c52Smaya         */
3327ec681f3Smrg        if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR ||
3337ec681f3Smrg            dst_base_slice->tiling == V3D_TILING_UIF_XOR) {
3349f464c52Smaya                int uif_block_h = 2 * v3d_utile_height(dst->cpp);
3359f464c52Smaya                int implicit_padded_height = align(height, uif_block_h);
3369f464c52Smaya
3379f464c52Smaya                tfu.icfg |= (((dst_base_slice->padded_height -
3389f464c52Smaya                               implicit_padded_height) / uif_block_h) <<
3399f464c52Smaya                             V3D_TFU_ICFG_OPAD_SHIFT);
3409f464c52Smaya        }
3419f464c52Smaya
3429f464c52Smaya        int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu);
3439f464c52Smaya        if (ret != 0) {
3449f464c52Smaya                fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
3459f464c52Smaya                return false;
3469f464c52Smaya        }
3479f464c52Smaya
3489f464c52Smaya        dst->writes++;
3499f464c52Smaya
3509f464c52Smaya        return true;
3519f464c52Smaya}
3529f464c52Smaya
3537ec681f3Smrgbool
3549f464c52Smayav3d_generate_mipmap(struct pipe_context *pctx,
3559f464c52Smaya                    struct pipe_resource *prsc,
3569f464c52Smaya                    enum pipe_format format,
3579f464c52Smaya                    unsigned int base_level,
3589f464c52Smaya                    unsigned int last_level,
3599f464c52Smaya                    unsigned int first_layer,
3609f464c52Smaya                    unsigned int last_layer)
3619f464c52Smaya{
3629f464c52Smaya        if (format != prsc->format)
3639f464c52Smaya                return false;
3649f464c52Smaya
3659f464c52Smaya        /* We could maybe support looping over layers for array textures, but
3669f464c52Smaya         * we definitely don't support 3D.
3679f464c52Smaya         */
3689f464c52Smaya        if (first_layer != last_layer)
3699f464c52Smaya                return false;
3709f464c52Smaya
3719f464c52Smaya        return v3d_tfu(pctx,
3729f464c52Smaya                       prsc, prsc,
3739f464c52Smaya                       base_level,
3749f464c52Smaya                       base_level, last_level,
3757ec681f3Smrg                       first_layer, first_layer,
3767ec681f3Smrg                       true);
3779f464c52Smaya}
3789f464c52Smaya
3797ec681f3Smrgstatic void
3807ec681f3Smrgv3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
3819f464c52Smaya{
3829f464c52Smaya        int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
3839f464c52Smaya        int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
3849f464c52Smaya
3859f464c52Smaya        if ((info->mask & PIPE_MASK_RGBA) == 0)
3867ec681f3Smrg                return;
3879f464c52Smaya
3889f464c52Smaya        if (info->scissor_enable ||
3899f464c52Smaya            info->dst.box.x != 0 ||
3909f464c52Smaya            info->dst.box.y != 0 ||
3919f464c52Smaya            info->dst.box.width != dst_width ||
3929f464c52Smaya            info->dst.box.height != dst_height ||
3939f464c52Smaya            info->src.box.x != 0 ||
3949f464c52Smaya            info->src.box.y != 0 ||
3959f464c52Smaya            info->src.box.width != info->dst.box.width ||
3969f464c52Smaya            info->src.box.height != info->dst.box.height) {
3977ec681f3Smrg                return;
3989f464c52Smaya        }
3999f464c52Smaya
4009f464c52Smaya        if (info->dst.format != info->src.format)
4017ec681f3Smrg                return;
4027ec681f3Smrg
4037ec681f3Smrg        if (v3d_tfu(pctx, info->dst.resource, info->src.resource,
4047ec681f3Smrg                    info->src.level,
4057ec681f3Smrg                    info->dst.level, info->dst.level,
4067ec681f3Smrg                    info->src.box.z, info->dst.box.z,
4077ec681f3Smrg                    false)) {
4087ec681f3Smrg                info->mask &= ~PIPE_MASK_RGBA;
4097ec681f3Smrg        }
4107ec681f3Smrg}
4117ec681f3Smrg
4127ec681f3Smrgstatic struct pipe_surface *
4137ec681f3Smrgv3d_get_blit_surface(struct pipe_context *pctx,
4147ec681f3Smrg                     struct pipe_resource *prsc,
4157ec681f3Smrg                     unsigned level,
4167ec681f3Smrg                     int16_t layer)
4177ec681f3Smrg{
4187ec681f3Smrg        struct pipe_surface tmpl;
4197ec681f3Smrg
4207ec681f3Smrg        tmpl.format = prsc->format;
4217ec681f3Smrg        tmpl.u.tex.level = level;
4227ec681f3Smrg        tmpl.u.tex.first_layer = layer;
4237ec681f3Smrg        tmpl.u.tex.last_layer = layer;
4247ec681f3Smrg
4257ec681f3Smrg        return pctx->create_surface(pctx, prsc, &tmpl);
4267ec681f3Smrg}
4279f464c52Smaya
4287ec681f3Smrgstatic bool
4297ec681f3Smrgis_tile_unaligned(unsigned size, unsigned tile_size)
4307ec681f3Smrg{
4317ec681f3Smrg        return size & (tile_size - 1);
4329f464c52Smaya}
4339f464c52Smaya
4347ec681f3Smrgstatic void
4357ec681f3Smrgv3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
4367ec681f3Smrg{
4377ec681f3Smrg        struct v3d_context *v3d = v3d_context(pctx);
4387ec681f3Smrg        struct v3d_screen *screen = v3d->screen;
4397ec681f3Smrg
4407ec681f3Smrg        if (screen->devinfo.ver < 40 || !info->mask)
4417ec681f3Smrg                return;
4427ec681f3Smrg
4437ec681f3Smrg        bool is_color_blit = info->mask & PIPE_MASK_RGBA;
4447ec681f3Smrg        bool is_depth_blit = info->mask & PIPE_MASK_Z;
4457ec681f3Smrg        bool is_stencil_blit = info->mask & PIPE_MASK_S;
4467ec681f3Smrg
4477ec681f3Smrg        /* We should receive either a depth/stencil blit, or color blit, but
4487ec681f3Smrg         * not both.
4497ec681f3Smrg         */
4507ec681f3Smrg        assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) ||
4517ec681f3Smrg                (!is_color_blit && (is_depth_blit || is_stencil_blit)));
4527ec681f3Smrg
4537ec681f3Smrg        if (info->scissor_enable)
4547ec681f3Smrg                return;
4557ec681f3Smrg
4567ec681f3Smrg        if (info->src.box.x != info->dst.box.x ||
4577ec681f3Smrg            info->src.box.y != info->dst.box.y ||
4587ec681f3Smrg            info->src.box.width != info->dst.box.width ||
4597ec681f3Smrg            info->src.box.height != info->dst.box.height)
4607ec681f3Smrg                return;
4617ec681f3Smrg
4627ec681f3Smrg        if (is_color_blit &&
4637ec681f3Smrg            util_format_is_depth_or_stencil(info->dst.resource->format))
4647ec681f3Smrg                return;
4657ec681f3Smrg
4667ec681f3Smrg        if (!v3d_rt_format_supported(&screen->devinfo, info->src.resource->format))
4677ec681f3Smrg                return;
4687ec681f3Smrg
4697ec681f3Smrg        if (v3d_get_rt_format(&screen->devinfo, info->src.resource->format) !=
4707ec681f3Smrg            v3d_get_rt_format(&screen->devinfo, info->dst.resource->format))
4717ec681f3Smrg                return;
4727ec681f3Smrg
4737ec681f3Smrg        bool msaa = (info->src.resource->nr_samples > 1 ||
4747ec681f3Smrg                     info->dst.resource->nr_samples > 1);
4757ec681f3Smrg        bool is_msaa_resolve = (info->src.resource->nr_samples > 1 &&
4767ec681f3Smrg                                info->dst.resource->nr_samples < 2);
4777ec681f3Smrg
4787ec681f3Smrg        if (is_msaa_resolve &&
4797ec681f3Smrg            !v3d_format_supports_tlb_msaa_resolve(&screen->devinfo, info->src.resource->format))
4807ec681f3Smrg                return;
4817ec681f3Smrg
4827ec681f3Smrg        v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false);
4837ec681f3Smrg
4847ec681f3Smrg        struct pipe_surface *dst_surf =
4857ec681f3Smrg           v3d_get_blit_surface(pctx, info->dst.resource, info->dst.level, info->dst.box.z);
4867ec681f3Smrg        struct pipe_surface *src_surf =
4877ec681f3Smrg           v3d_get_blit_surface(pctx, info->src.resource, info->src.level, info->src.box.z);
4887ec681f3Smrg
4897ec681f3Smrg        struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 };
4907ec681f3Smrg        if (is_color_blit)
4917ec681f3Smrg                surfaces[0] = dst_surf;
4927ec681f3Smrg
4937ec681f3Smrg        uint32_t tile_width, tile_height, max_bpp;
4947ec681f3Smrg        v3d_get_tile_buffer_size(msaa, is_color_blit ? 1 : 0, surfaces, src_surf, &tile_width, &tile_height, &max_bpp);
4957ec681f3Smrg
4967ec681f3Smrg        int dst_surface_width = u_minify(info->dst.resource->width0,
4977ec681f3Smrg                                         info->dst.level);
4987ec681f3Smrg        int dst_surface_height = u_minify(info->dst.resource->height0,
4997ec681f3Smrg                                         info->dst.level);
5007ec681f3Smrg        if (is_tile_unaligned(info->dst.box.x, tile_width) ||
5017ec681f3Smrg            is_tile_unaligned(info->dst.box.y, tile_height) ||
5027ec681f3Smrg            (is_tile_unaligned(info->dst.box.width, tile_width) &&
5037ec681f3Smrg             info->dst.box.x + info->dst.box.width != dst_surface_width) ||
5047ec681f3Smrg            (is_tile_unaligned(info->dst.box.height, tile_height) &&
5057ec681f3Smrg             info->dst.box.y + info->dst.box.height != dst_surface_height)) {
5067ec681f3Smrg                pipe_surface_reference(&dst_surf, NULL);
5077ec681f3Smrg                pipe_surface_reference(&src_surf, NULL);
5087ec681f3Smrg                return;
5097ec681f3Smrg        }
5107ec681f3Smrg
5117ec681f3Smrg        struct v3d_job *job = v3d_get_job(v3d,
5127ec681f3Smrg                                          is_color_blit ? 1u : 0u,
5137ec681f3Smrg                                          surfaces,
5147ec681f3Smrg                                          is_color_blit ? NULL : dst_surf,
5157ec681f3Smrg                                          src_surf);
5167ec681f3Smrg        job->msaa = msaa;
5177ec681f3Smrg        job->tile_width = tile_width;
5187ec681f3Smrg        job->tile_height = tile_height;
5197ec681f3Smrg        job->internal_bpp = max_bpp;
5207ec681f3Smrg        job->draw_min_x = info->dst.box.x;
5217ec681f3Smrg        job->draw_min_y = info->dst.box.y;
5227ec681f3Smrg        job->draw_max_x = info->dst.box.x + info->dst.box.width;
5237ec681f3Smrg        job->draw_max_y = info->dst.box.y + info->dst.box.height;
5247ec681f3Smrg        job->scissor.disabled = false;
5257ec681f3Smrg
5267ec681f3Smrg        /* The simulator complains if we do a TLB load from a source with a
5277ec681f3Smrg         * stride that is smaller than the destination's, so we program the
5287ec681f3Smrg         * 'frame region' to match the smallest dimensions of the two surfaces.
5297ec681f3Smrg         * This should be fine because we only get here if the src and dst boxes
5307ec681f3Smrg         * match, so we know the blit involves the same tiles on both surfaces.
5317ec681f3Smrg         */
5327ec681f3Smrg        job->draw_width = MIN2(dst_surf->width, src_surf->width);
5337ec681f3Smrg        job->draw_height = MIN2(dst_surf->height, src_surf->height);
5347ec681f3Smrg        job->draw_tiles_x = DIV_ROUND_UP(job->draw_width,
5357ec681f3Smrg                                         job->tile_width);
5367ec681f3Smrg        job->draw_tiles_y = DIV_ROUND_UP(job->draw_height,
5377ec681f3Smrg                                         job->tile_height);
5387ec681f3Smrg
5397ec681f3Smrg        job->needs_flush = true;
5407ec681f3Smrg        job->num_layers = info->dst.box.depth;
5417ec681f3Smrg
5427ec681f3Smrg        job->store = 0;
5437ec681f3Smrg        if (is_color_blit) {
5447ec681f3Smrg                job->store |= PIPE_CLEAR_COLOR0;
5457ec681f3Smrg                info->mask &= ~PIPE_MASK_RGBA;
5467ec681f3Smrg        }
5477ec681f3Smrg        if (is_depth_blit) {
5487ec681f3Smrg                job->store |= PIPE_CLEAR_DEPTH;
5497ec681f3Smrg                info->mask &= ~PIPE_MASK_Z;
5507ec681f3Smrg        }
5517ec681f3Smrg        if (is_stencil_blit){
5527ec681f3Smrg                job->store |= PIPE_CLEAR_STENCIL;
5537ec681f3Smrg                info->mask &= ~PIPE_MASK_S;
5547ec681f3Smrg        }
5557ec681f3Smrg
5567ec681f3Smrg        v3d41_start_binning(v3d, job);
5577ec681f3Smrg
5587ec681f3Smrg        v3d_job_submit(v3d, job);
5597ec681f3Smrg
5607ec681f3Smrg        pipe_surface_reference(&dst_surf, NULL);
5617ec681f3Smrg        pipe_surface_reference(&src_surf, NULL);
5627ec681f3Smrg}
5637ec681f3Smrg
5647ec681f3Smrg/**
5657ec681f3Smrg * Creates the VS of the custom blit shader to convert YUV plane from
5667ec681f3Smrg * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
5677ec681f3Smrg * This vertex shader is mostly a pass-through VS.
5687ec681f3Smrg */
5697ec681f3Smrgstatic void *
5707ec681f3Smrgv3d_get_sand8_vs(struct pipe_context *pctx)
5717ec681f3Smrg{
5727ec681f3Smrg        struct v3d_context *v3d = v3d_context(pctx);
5737ec681f3Smrg        struct pipe_screen *pscreen = pctx->screen;
5747ec681f3Smrg
5757ec681f3Smrg        if (v3d->sand8_blit_vs)
5767ec681f3Smrg                return v3d->sand8_blit_vs;
5777ec681f3Smrg
5787ec681f3Smrg        const struct nir_shader_compiler_options *options =
5797ec681f3Smrg                pscreen->get_compiler_options(pscreen,
5807ec681f3Smrg                                              PIPE_SHADER_IR_NIR,
5817ec681f3Smrg                                              PIPE_SHADER_VERTEX);
5827ec681f3Smrg
5837ec681f3Smrg        nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
5847ec681f3Smrg                                                       options,
5857ec681f3Smrg                                                       "sand8_blit_vs");
5867ec681f3Smrg
5877ec681f3Smrg        const struct glsl_type *vec4 = glsl_vec4_type();
5887ec681f3Smrg        nir_variable *pos_in = nir_variable_create(b.shader,
5897ec681f3Smrg                                                   nir_var_shader_in,
5907ec681f3Smrg                                                   vec4, "pos");
5917ec681f3Smrg
5927ec681f3Smrg        nir_variable *pos_out = nir_variable_create(b.shader,
5937ec681f3Smrg                                                    nir_var_shader_out,
5947ec681f3Smrg                                                    vec4, "gl_Position");
5957ec681f3Smrg        pos_out->data.location = VARYING_SLOT_POS;
5967ec681f3Smrg        nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
5977ec681f3Smrg
5987ec681f3Smrg        struct pipe_shader_state shader_tmpl = {
5997ec681f3Smrg                .type = PIPE_SHADER_IR_NIR,
6007ec681f3Smrg                .ir.nir = b.shader,
6017ec681f3Smrg        };
6027ec681f3Smrg
6037ec681f3Smrg        v3d->sand8_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl);
6047ec681f3Smrg
6057ec681f3Smrg        return v3d->sand8_blit_vs;
6067ec681f3Smrg}
6077ec681f3Smrg/**
6087ec681f3Smrg * Creates the FS of the custom blit shader to convert YUV plane from
6097ec681f3Smrg * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
6107ec681f3Smrg * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1)
6117ec681f3Smrg * plane for a NV12 format without the SAND modifier.
6127ec681f3Smrg */
6137ec681f3Smrgstatic void *
6147ec681f3Smrgv3d_get_sand8_fs(struct pipe_context *pctx, int cpp)
6157ec681f3Smrg{
6167ec681f3Smrg        struct v3d_context *v3d = v3d_context(pctx);
6177ec681f3Smrg        struct pipe_screen *pscreen = pctx->screen;
6187ec681f3Smrg        struct pipe_shader_state **cached_shader;
6197ec681f3Smrg        const char *name;
6207ec681f3Smrg
6217ec681f3Smrg        if (cpp == 1) {
6227ec681f3Smrg                cached_shader = &v3d->sand8_blit_fs_luma;
6237ec681f3Smrg                name = "sand8_blit_fs_luma";
6247ec681f3Smrg        } else {
6257ec681f3Smrg                cached_shader = &v3d->sand8_blit_fs_chroma;
6267ec681f3Smrg                name = "sand8_blit_fs_chroma";
6277ec681f3Smrg        }
6287ec681f3Smrg
6297ec681f3Smrg        if (*cached_shader)
6307ec681f3Smrg                return *cached_shader;
6317ec681f3Smrg
6327ec681f3Smrg        const struct nir_shader_compiler_options *options =
6337ec681f3Smrg                pscreen->get_compiler_options(pscreen,
6347ec681f3Smrg                                              PIPE_SHADER_IR_NIR,
6357ec681f3Smrg                                              PIPE_SHADER_FRAGMENT);
6367ec681f3Smrg
6377ec681f3Smrg        nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
6387ec681f3Smrg                                                       options, "%s", name);
6397ec681f3Smrg        const struct glsl_type *vec4 = glsl_vec4_type();
6407ec681f3Smrg
6417ec681f3Smrg        const struct glsl_type *glsl_int = glsl_int_type();
6427ec681f3Smrg
6437ec681f3Smrg        nir_variable *color_out =
6447ec681f3Smrg                nir_variable_create(b.shader, nir_var_shader_out,
6457ec681f3Smrg                                    vec4, "f_color");
6467ec681f3Smrg        color_out->data.location = FRAG_RESULT_COLOR;
6477ec681f3Smrg
6487ec681f3Smrg        nir_variable *pos_in =
6497ec681f3Smrg                nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
6507ec681f3Smrg        pos_in->data.location = VARYING_SLOT_POS;
6517ec681f3Smrg        nir_ssa_def *pos = nir_load_var(&b, pos_in);
6527ec681f3Smrg
6537ec681f3Smrg        nir_ssa_def *zero = nir_imm_int(&b, 0);
6547ec681f3Smrg        nir_ssa_def *one = nir_imm_int(&b, 1);
6557ec681f3Smrg        nir_ssa_def *two = nir_imm_int(&b, 2);
6567ec681f3Smrg        nir_ssa_def *six = nir_imm_int(&b, 6);
6577ec681f3Smrg        nir_ssa_def *seven = nir_imm_int(&b, 7);
6587ec681f3Smrg        nir_ssa_def *eight = nir_imm_int(&b, 8);
6597ec681f3Smrg
6607ec681f3Smrg        nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
6617ec681f3Smrg        nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
6627ec681f3Smrg
6637ec681f3Smrg        nir_variable *stride_in =
6647ec681f3Smrg                nir_variable_create(b.shader, nir_var_uniform, glsl_int,
6657ec681f3Smrg                                    "sand8_stride");
6667ec681f3Smrg        nir_ssa_def *stride =
6677ec681f3Smrg                nir_load_uniform(&b, 1, 32, zero,
6687ec681f3Smrg                                 .base = stride_in->data.driver_location,
6697ec681f3Smrg                                 .range = 1,
6707ec681f3Smrg                                 .dest_type = nir_type_int32);
6717ec681f3Smrg
6727ec681f3Smrg        nir_ssa_def *x_offset;
6737ec681f3Smrg        nir_ssa_def *y_offset;
6747ec681f3Smrg
6757ec681f3Smrg        /* UIF tiled format is composed by UIF blocks, Each block has
6767ec681f3Smrg         * four 64 byte microtiles. Inside each microtile pixels are stored
6777ec681f3Smrg         * in raster format. But microtiles have different dimensions
6787ec681f3Smrg         * based in the bits per pixel of the image.
6797ec681f3Smrg         *
6807ec681f3Smrg         *   8bpp microtile dimensions are 8x8
6817ec681f3Smrg         *  16bpp microtile dimensions are 8x4
6827ec681f3Smrg         *  32bpp microtile dimensions are 4x4
6837ec681f3Smrg         *
6847ec681f3Smrg         * As we are reading and writing with 32bpp to optimize
6857ec681f3Smrg         * the number of texture operations during the blit. We need
6867ec681f3Smrg         * to adjust the offsets were we read and write as data will
6877ec681f3Smrg         * be later read using 8bpp (luma) and 16bpp (chroma).
6887ec681f3Smrg         *
6897ec681f3Smrg         * For chroma 8x4 16bpp raster order is compatible with 4x4
6907ec681f3Smrg         * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes.
6917ec681f3Smrg         * But luma 8x8 8bpp raster order is not compatible
6927ec681f3Smrg         * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has
6937ec681f3Smrg         * 16 bytes per line. So if we read a 8bpp texture that was
6947ec681f3Smrg         * written as 32bpp texture. Bytes would be misplaced.
6957ec681f3Smrg         *
6967ec681f3Smrg         * inter/intra_utile_x_offests takes care of mapping the offsets
6977ec681f3Smrg         * between microtiles to deal with this issue for luma planes.
6987ec681f3Smrg         */
6997ec681f3Smrg        if (cpp == 1) {
7007ec681f3Smrg                nir_ssa_def *intra_utile_x_offset =
7017ec681f3Smrg                        nir_ishl(&b, nir_iand_imm(&b, x, 1), two);
7027ec681f3Smrg                nir_ssa_def *inter_utile_x_offset =
7037ec681f3Smrg                        nir_ishl(&b, nir_iand_imm(&b, x, 60), one);
7047ec681f3Smrg                nir_ssa_def *stripe_offset=
7057ec681f3Smrg                        nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6),
7067ec681f3Smrg                                             stride),
7077ec681f3Smrg                                 seven);
7087ec681f3Smrg
7097ec681f3Smrg                x_offset = nir_iadd(&b, stripe_offset,
7107ec681f3Smrg                                        nir_iadd(&b, intra_utile_x_offset,
7117ec681f3Smrg                                                     inter_utile_x_offset));
7127ec681f3Smrg                y_offset = nir_iadd(&b,
7137ec681f3Smrg                                    nir_ishl(&b, nir_iand_imm(&b, x, 2), six),
7147ec681f3Smrg                                    nir_ishl(&b, y, eight));
7157ec681f3Smrg        } else  {
7167ec681f3Smrg                nir_ssa_def *stripe_offset=
7177ec681f3Smrg                        nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5),
7187ec681f3Smrg                                                stride),
7197ec681f3Smrg                                 seven);
7207ec681f3Smrg                x_offset = nir_iadd(&b, stripe_offset,
7217ec681f3Smrg                               nir_ishl(&b, nir_iand_imm(&b, x, 31), two));
7227ec681f3Smrg                y_offset = nir_ishl(&b, y, seven);
7237ec681f3Smrg        }
7247ec681f3Smrg        nir_ssa_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
7257ec681f3Smrg        nir_ssa_def *load =
7267ec681f3Smrg        nir_load_ubo(&b, 1, 32, one, ubo_offset,
7277ec681f3Smrg                    .align_mul = 4,
7287ec681f3Smrg                    .align_offset = 0,
7297ec681f3Smrg                    .range_base = 0,
7307ec681f3Smrg                    .range = ~0);
7317ec681f3Smrg
7327ec681f3Smrg        nir_ssa_def *output = nir_unpack_unorm_4x8(&b, load);
7337ec681f3Smrg
7347ec681f3Smrg        nir_store_var(&b, color_out,
7357ec681f3Smrg                      output,
7367ec681f3Smrg                      0xF);
7377ec681f3Smrg
7387ec681f3Smrg        struct pipe_shader_state shader_tmpl = {
7397ec681f3Smrg                .type = PIPE_SHADER_IR_NIR,
7407ec681f3Smrg                .ir.nir = b.shader,
7417ec681f3Smrg        };
7427ec681f3Smrg
7437ec681f3Smrg        *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl);
7447ec681f3Smrg
7457ec681f3Smrg        return *cached_shader;
7467ec681f3Smrg}
7477ec681f3Smrg
7487ec681f3Smrg/**
7497ec681f3Smrg * Turns NV12 with SAND8 format modifier from raster-order with interleaved
7507ec681f3Smrg * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma.
7517ec681f3Smrg *
7527ec681f3Smrg * This implementation is based on vc4_yuv_blit.
7537ec681f3Smrg */
7547ec681f3Smrgstatic void
7557ec681f3Smrgv3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
7567ec681f3Smrg{
7577ec681f3Smrg        struct v3d_context *v3d = v3d_context(pctx);
7587ec681f3Smrg        struct v3d_resource *src = v3d_resource(info->src.resource);
7597ec681f3Smrg        ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
7607ec681f3Smrg
7617ec681f3Smrg        if (!src->sand_col128_stride)
7627ec681f3Smrg                return;
7637ec681f3Smrg        if (src->tiled)
7647ec681f3Smrg                return;
7657ec681f3Smrg        if (src->base.format != PIPE_FORMAT_R8_UNORM &&
7667ec681f3Smrg            src->base.format != PIPE_FORMAT_R8G8_UNORM)
7677ec681f3Smrg                return;
7687ec681f3Smrg        if (!(info->mask & PIPE_MASK_RGBA))
7697ec681f3Smrg                return;
7707ec681f3Smrg
7717ec681f3Smrg        assert(dst->base.format == src->base.format);
7727ec681f3Smrg        assert(dst->tiled);
7737ec681f3Smrg
7747ec681f3Smrg        assert(info->src.box.x == 0 && info->dst.box.x == 0);
7757ec681f3Smrg        assert(info->src.box.y == 0 && info->dst.box.y == 0);
7767ec681f3Smrg        assert(info->src.box.width == info->dst.box.width);
7777ec681f3Smrg        assert(info->src.box.height == info->dst.box.height);
7787ec681f3Smrg
7797ec681f3Smrg        v3d_blitter_save(v3d);
7807ec681f3Smrg
7817ec681f3Smrg        struct pipe_surface dst_tmpl;
7827ec681f3Smrg        util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
7837ec681f3Smrg                                         info->dst.level, info->dst.box.z);
7847ec681f3Smrg        /* Although the src textures are cpp=1 or cpp=2, the dst texture
7857ec681f3Smrg         * uses a cpp=4 dst texture. So, all read/write texture ops will
7867ec681f3Smrg         * be done using 32-bit read and writes.
7877ec681f3Smrg         */
7887ec681f3Smrg        dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM;
7897ec681f3Smrg        struct pipe_surface *dst_surf =
7907ec681f3Smrg                pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
7917ec681f3Smrg        if (!dst_surf) {
7927ec681f3Smrg                fprintf(stderr, "Failed to create YUV dst surface\n");
7937ec681f3Smrg                util_blitter_unset_running_flag(v3d->blitter);
7947ec681f3Smrg                return;
7957ec681f3Smrg        }
7967ec681f3Smrg
7977ec681f3Smrg        uint32_t sand8_stride = src->sand_col128_stride;
7987ec681f3Smrg
7997ec681f3Smrg        /* Adjust the dimensions of dst luma/chroma to match src
8007ec681f3Smrg         * size now we are using a cpp=4 format. Next dimension take into
8017ec681f3Smrg         * account the UIF microtile layouts.
8027ec681f3Smrg         */
8037ec681f3Smrg        dst_surf->width = align(dst_surf->width, 8) / 2;
8047ec681f3Smrg        if (src->cpp == 1)
8057ec681f3Smrg                dst_surf->height /= 2;
8067ec681f3Smrg
8077ec681f3Smrg        /* Set the constant buffer. */
8087ec681f3Smrg        struct pipe_constant_buffer cb_uniforms = {
8097ec681f3Smrg                .user_buffer = &sand8_stride,
8107ec681f3Smrg                .buffer_size = sizeof(sand8_stride),
8117ec681f3Smrg        };
8127ec681f3Smrg
8137ec681f3Smrg        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
8147ec681f3Smrg                                  &cb_uniforms);
8157ec681f3Smrg        struct pipe_constant_buffer cb_src = {
8167ec681f3Smrg                .buffer = info->src.resource,
8177ec681f3Smrg                .buffer_offset = src->slices[info->src.level].offset,
8187ec681f3Smrg                .buffer_size = (src->bo->size -
8197ec681f3Smrg                                src->slices[info->src.level].offset),
8207ec681f3Smrg        };
8217ec681f3Smrg        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 2, false,
8227ec681f3Smrg                                  &cb_src);
8237ec681f3Smrg        /* Unbind the textures, to make sure we don't try to recurse into the
8247ec681f3Smrg         * shadow blit.
8257ec681f3Smrg         */
8267ec681f3Smrg        pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL);
8277ec681f3Smrg        pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
8287ec681f3Smrg
8297ec681f3Smrg        util_blitter_custom_shader(v3d->blitter, dst_surf,
8307ec681f3Smrg                                   v3d_get_sand8_vs(pctx),
8317ec681f3Smrg                                   v3d_get_sand8_fs(pctx, src->cpp));
8327ec681f3Smrg
8337ec681f3Smrg        util_blitter_restore_textures(v3d->blitter);
8347ec681f3Smrg        util_blitter_restore_constant_buffer_state(v3d->blitter);
8357ec681f3Smrg
8367ec681f3Smrg        /* Restore cb1 (util_blitter doesn't handle this one). */
8377ec681f3Smrg        struct pipe_constant_buffer cb_disabled = { 0 };
8387ec681f3Smrg        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
8397ec681f3Smrg                                  &cb_disabled);
8407ec681f3Smrg
8417ec681f3Smrg        pipe_surface_reference(&dst_surf, NULL);
8427ec681f3Smrg
8437ec681f3Smrg        info->mask &= ~PIPE_MASK_RGBA;
8447ec681f3Smrg        return;
8457ec681f3Smrg}
8467ec681f3Smrg
8477ec681f3Smrg
84801e04c3fSmrg/* Optimal hardware path for blitting pixels.
84901e04c3fSmrg * Scaling, format conversion, up- and downsampling (resolve) are allowed.
85001e04c3fSmrg */
85101e04c3fSmrgvoid
85201e04c3fSmrgv3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
85301e04c3fSmrg{
8549f464c52Smaya        struct v3d_context *v3d = v3d_context(pctx);
85501e04c3fSmrg        struct pipe_blit_info info = *blit_info;
85601e04c3fSmrg
8577ec681f3Smrg        v3d_sand8_blit(pctx, &info);
8587ec681f3Smrg
8597ec681f3Smrg        v3d_tfu_blit(pctx, &info);
8607ec681f3Smrg
8617ec681f3Smrg        v3d_tlb_blit(pctx, &info);
86201e04c3fSmrg
8637ec681f3Smrg        v3d_stencil_blit(pctx, &info);
8649f464c52Smaya
8657ec681f3Smrg        v3d_render_blit(pctx, &info);
86601e04c3fSmrg
8679f464c52Smaya        /* Flush our blit jobs immediately.  They're unlikely to get reused by
8689f464c52Smaya         * normal drawing or other blits, and without flushing we can easily
8699f464c52Smaya         * run into unexpected OOMs when blits are used for a large series of
8709f464c52Smaya         * texture uploads before using the textures.
8719f464c52Smaya         */
8727ec681f3Smrg        v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
8737ec681f3Smrg                                        V3D_FLUSH_DEFAULT, false);
87401e04c3fSmrg}
875