101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2017 Broadcom
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
247ec681f3Smrg#include "util/format/u_format.h"
2501e04c3fSmrg#include "v3d_context.h"
267ec681f3Smrg#include "broadcom/common/v3d_tiling.h"
2701e04c3fSmrg#include "broadcom/common/v3d_macros.h"
2801e04c3fSmrg#include "broadcom/cle/v3dx_pack.h"
2901e04c3fSmrg
3001e04c3fSmrg#define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 |                   \
3101e04c3fSmrg                                  PIPE_CLEAR_COLOR1 |                   \
3201e04c3fSmrg                                  PIPE_CLEAR_COLOR2 |                   \
3301e04c3fSmrg                                  PIPE_CLEAR_COLOR3)                    \
3401e04c3fSmrg
3501e04c3fSmrg#define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)
3601e04c3fSmrg
3701e04c3fSmrg/* The HW queues up the load until the tile coordinates show up, but can only
3801e04c3fSmrg * track one at a time.  If we need to do more than one load, then we need to
3901e04c3fSmrg * flush out the previous load by emitting the tile coordinates and doing a
4001e04c3fSmrg * dummy store.
4101e04c3fSmrg */
4201e04c3fSmrgstatic void
4301e04c3fSmrgflush_last_load(struct v3d_cl *cl)
4401e04c3fSmrg{
4501e04c3fSmrg        if (V3D_VERSION >= 40)
4601e04c3fSmrg                return;
4701e04c3fSmrg
4801e04c3fSmrg        cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
4901e04c3fSmrg        cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
5001e04c3fSmrg                store.buffer_to_store = NONE;
5101e04c3fSmrg        }
5201e04c3fSmrg}
5301e04c3fSmrg
5401e04c3fSmrgstatic void
5501e04c3fSmrgload_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
567ec681f3Smrg             int layer, uint32_t pipe_bit, uint32_t *loads_pending)
5701e04c3fSmrg{
5801e04c3fSmrg        struct v3d_surface *surf = v3d_surface(psurf);
5901e04c3fSmrg        bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
6001e04c3fSmrg        if (separate_stencil) {
6101e04c3fSmrg                psurf = surf->separate_stencil;
6201e04c3fSmrg                surf = v3d_surface(psurf);
6301e04c3fSmrg        }
6401e04c3fSmrg
6501e04c3fSmrg        struct v3d_resource *rsc = v3d_resource(psurf->texture);
6601e04c3fSmrg
677ec681f3Smrg        uint32_t layer_offset =
687ec681f3Smrg                v3d_layer_offset(&rsc->base, psurf->u.tex.level,
697ec681f3Smrg                                 psurf->u.tex.first_layer + layer);
7001e04c3fSmrg        cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
7101e04c3fSmrg                load.buffer_to_load = buffer;
727ec681f3Smrg                load.address = cl_address(rsc->bo, layer_offset);
7301e04c3fSmrg
7401e04c3fSmrg#if V3D_VERSION >= 40
7501e04c3fSmrg                load.memory_format = surf->tiling;
7601e04c3fSmrg                if (separate_stencil)
7701e04c3fSmrg                        load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
7801e04c3fSmrg                else
7901e04c3fSmrg                        load.input_image_format = surf->format;
809f464c52Smaya                load.r_b_swap = surf->swap_rb;
817ec681f3Smrg                load.force_alpha_1 = util_format_has_alpha1(psurf->format);
827ec681f3Smrg                if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
837ec681f3Smrg                    surf->tiling == V3D_TILING_UIF_XOR) {
8401e04c3fSmrg                        load.height_in_ub_or_stride =
8501e04c3fSmrg                                surf->padded_height_of_output_image_in_uif_blocks;
867ec681f3Smrg                } else if (surf->tiling == V3D_TILING_RASTER) {
8701e04c3fSmrg                        struct v3d_resource_slice *slice =
8801e04c3fSmrg                                &rsc->slices[psurf->u.tex.level];
8901e04c3fSmrg                        load.height_in_ub_or_stride = slice->stride;
9001e04c3fSmrg                }
9101e04c3fSmrg
9201e04c3fSmrg                if (psurf->texture->nr_samples > 1)
9301e04c3fSmrg                        load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
9401e04c3fSmrg                else
9501e04c3fSmrg                        load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
9601e04c3fSmrg
9701e04c3fSmrg#else /* V3D_VERSION < 40 */
9801e04c3fSmrg                /* Can't do raw ZSTENCIL loads -- need to load/store them to
9901e04c3fSmrg                 * separate buffers for Z and stencil.
10001e04c3fSmrg                 */
10101e04c3fSmrg                assert(buffer != ZSTENCIL);
10201e04c3fSmrg                load.raw_mode = true;
10301e04c3fSmrg                load.padded_height_of_output_image_in_uif_blocks =
10401e04c3fSmrg                        surf->padded_height_of_output_image_in_uif_blocks;
10501e04c3fSmrg#endif /* V3D_VERSION < 40 */
10601e04c3fSmrg        }
10701e04c3fSmrg
10801e04c3fSmrg        *loads_pending &= ~pipe_bit;
10901e04c3fSmrg        if (*loads_pending)
11001e04c3fSmrg                flush_last_load(cl);
11101e04c3fSmrg}
11201e04c3fSmrg
11301e04c3fSmrgstatic void
11401e04c3fSmrgstore_general(struct v3d_job *job,
1157ec681f3Smrg              struct v3d_cl *cl, struct pipe_surface *psurf,
1167ec681f3Smrg              int layer, int buffer, int pipe_bit,
1177ec681f3Smrg              uint32_t *stores_pending, bool general_color_clear,
1187ec681f3Smrg              bool resolve_4x)
11901e04c3fSmrg{
12001e04c3fSmrg        struct v3d_surface *surf = v3d_surface(psurf);
12101e04c3fSmrg        bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
12201e04c3fSmrg        if (separate_stencil) {
12301e04c3fSmrg                psurf = surf->separate_stencil;
12401e04c3fSmrg                surf = v3d_surface(psurf);
12501e04c3fSmrg        }
12601e04c3fSmrg
12701e04c3fSmrg        *stores_pending &= ~pipe_bit;
12801e04c3fSmrg        bool last_store = !(*stores_pending);
12901e04c3fSmrg
13001e04c3fSmrg        struct v3d_resource *rsc = v3d_resource(psurf->texture);
13101e04c3fSmrg
13201e04c3fSmrg        rsc->writes++;
13301e04c3fSmrg
1347ec681f3Smrg        uint32_t layer_offset =
1357ec681f3Smrg                v3d_layer_offset(&rsc->base, psurf->u.tex.level,
1367ec681f3Smrg                                 psurf->u.tex.first_layer + layer);
13701e04c3fSmrg        cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
13801e04c3fSmrg                store.buffer_to_store = buffer;
1397ec681f3Smrg                store.address = cl_address(rsc->bo, layer_offset);
14001e04c3fSmrg
14101e04c3fSmrg#if V3D_VERSION >= 40
14201e04c3fSmrg                store.clear_buffer_being_stored = false;
14301e04c3fSmrg
14401e04c3fSmrg                if (separate_stencil)
14501e04c3fSmrg                        store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
14601e04c3fSmrg                else
14701e04c3fSmrg                        store.output_image_format = surf->format;
14801e04c3fSmrg
1499f464c52Smaya                store.r_b_swap = surf->swap_rb;
15001e04c3fSmrg                store.memory_format = surf->tiling;
15101e04c3fSmrg
1527ec681f3Smrg                if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
1537ec681f3Smrg                    surf->tiling == V3D_TILING_UIF_XOR) {
15401e04c3fSmrg                        store.height_in_ub_or_stride =
15501e04c3fSmrg                                surf->padded_height_of_output_image_in_uif_blocks;
1567ec681f3Smrg                } else if (surf->tiling == V3D_TILING_RASTER) {
15701e04c3fSmrg                        struct v3d_resource_slice *slice =
15801e04c3fSmrg                                &rsc->slices[psurf->u.tex.level];
15901e04c3fSmrg                        store.height_in_ub_or_stride = slice->stride;
16001e04c3fSmrg                }
16101e04c3fSmrg
1627ec681f3Smrg                assert(!resolve_4x || job->bbuf);
16301e04c3fSmrg                if (psurf->texture->nr_samples > 1)
16401e04c3fSmrg                        store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
1657ec681f3Smrg                else if (resolve_4x && job->bbuf->texture->nr_samples > 1)
1667ec681f3Smrg                        store.decimate_mode = V3D_DECIMATE_MODE_4X;
16701e04c3fSmrg                else
16801e04c3fSmrg                        store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
16901e04c3fSmrg
17001e04c3fSmrg#else /* V3D_VERSION < 40 */
17101e04c3fSmrg                /* Can't do raw ZSTENCIL stores -- need to load/store them to
17201e04c3fSmrg                 * separate buffers for Z and stencil.
17301e04c3fSmrg                 */
17401e04c3fSmrg                assert(buffer != ZSTENCIL);
17501e04c3fSmrg                store.raw_mode = true;
17601e04c3fSmrg                if (!last_store) {
17701e04c3fSmrg                        store.disable_color_buffers_clear_on_write = true;
17801e04c3fSmrg                        store.disable_z_buffer_clear_on_write = true;
17901e04c3fSmrg                        store.disable_stencil_buffer_clear_on_write = true;
18001e04c3fSmrg                } else {
18101e04c3fSmrg                        store.disable_color_buffers_clear_on_write =
18201e04c3fSmrg                                !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) &&
18301e04c3fSmrg                                   general_color_clear &&
18401e04c3fSmrg                                   (job->clear & pipe_bit)));
18501e04c3fSmrg                        store.disable_z_buffer_clear_on_write =
18601e04c3fSmrg                                !(job->clear & PIPE_CLEAR_DEPTH);
18701e04c3fSmrg                        store.disable_stencil_buffer_clear_on_write =
18801e04c3fSmrg                                !(job->clear & PIPE_CLEAR_STENCIL);
18901e04c3fSmrg                }
19001e04c3fSmrg                store.padded_height_of_output_image_in_uif_blocks =
19101e04c3fSmrg                        surf->padded_height_of_output_image_in_uif_blocks;
19201e04c3fSmrg#endif /* V3D_VERSION < 40 */
19301e04c3fSmrg        }
19401e04c3fSmrg
19501e04c3fSmrg        /* There must be a TILE_COORDINATES_IMPLICIT between each store. */
19601e04c3fSmrg        if (V3D_VERSION < 40 && !last_store) {
19701e04c3fSmrg                cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
19801e04c3fSmrg        }
19901e04c3fSmrg}
20001e04c3fSmrg
20101e04c3fSmrgstatic int
20201e04c3fSmrgzs_buffer_from_pipe_bits(int pipe_clear_bits)
20301e04c3fSmrg{
20401e04c3fSmrg        switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) {
20501e04c3fSmrg        case PIPE_CLEAR_DEPTHSTENCIL:
20601e04c3fSmrg                return ZSTENCIL;
20701e04c3fSmrg        case PIPE_CLEAR_DEPTH:
20801e04c3fSmrg                return Z;
20901e04c3fSmrg        case PIPE_CLEAR_STENCIL:
21001e04c3fSmrg                return STENCIL;
21101e04c3fSmrg        default:
21201e04c3fSmrg                return NONE;
21301e04c3fSmrg        }
21401e04c3fSmrg}
21501e04c3fSmrg
21601e04c3fSmrgstatic void
2177ec681f3Smrgv3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
21801e04c3fSmrg{
2197ec681f3Smrg        /* When blitting, no color or zs buffer is loaded; instead the blit
2207ec681f3Smrg         * source buffer is loaded for the aspects that we are going to blit.
2217ec681f3Smrg         */
2227ec681f3Smrg        assert(!job->bbuf || job->load == 0);
2237ec681f3Smrg        assert(!job->bbuf || job->nr_cbufs <= 1);
2247ec681f3Smrg        assert(!job->bbuf || V3D_VERSION >= 40);
22501e04c3fSmrg
2267ec681f3Smrg        uint32_t loads_pending = job->bbuf ? job->store : job->load;
2277ec681f3Smrg
2287ec681f3Smrg        for (int i = 0; i < job->nr_cbufs; i++) {
22901e04c3fSmrg                uint32_t bit = PIPE_CLEAR_COLOR0 << i;
23001e04c3fSmrg                if (!(loads_pending & bit))
23101e04c3fSmrg                        continue;
23201e04c3fSmrg
2337ec681f3Smrg                struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i];
2347ec681f3Smrg                assert(!job->bbuf || i == 0);
2357ec681f3Smrg
23601e04c3fSmrg                if (!psurf || (V3D_VERSION < 40 &&
23701e04c3fSmrg                               psurf->texture->nr_samples <= 1)) {
23801e04c3fSmrg                        continue;
23901e04c3fSmrg                }
24001e04c3fSmrg
2417ec681f3Smrg                load_general(cl, psurf, RENDER_TARGET_0 + i, layer,
24201e04c3fSmrg                             bit, &loads_pending);
24301e04c3fSmrg        }
24401e04c3fSmrg
24501e04c3fSmrg        if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) &&
24601e04c3fSmrg            (V3D_VERSION >= 40 ||
24701e04c3fSmrg             (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) {
2487ec681f3Smrg                struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf;
2497ec681f3Smrg                struct v3d_resource *rsc = v3d_resource(src->texture);
25001e04c3fSmrg
25101e04c3fSmrg                if (rsc->separate_stencil &&
25201e04c3fSmrg                    (loads_pending & PIPE_CLEAR_STENCIL)) {
2537ec681f3Smrg                        load_general(cl, src,
2547ec681f3Smrg                                     STENCIL, layer,
25501e04c3fSmrg                                     PIPE_CLEAR_STENCIL,
25601e04c3fSmrg                                     &loads_pending);
25701e04c3fSmrg                }
25801e04c3fSmrg
25901e04c3fSmrg                if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
2607ec681f3Smrg                        load_general(cl, src,
26101e04c3fSmrg                                     zs_buffer_from_pipe_bits(loads_pending),
2627ec681f3Smrg                                     layer,
26301e04c3fSmrg                                     loads_pending & PIPE_CLEAR_DEPTHSTENCIL,
26401e04c3fSmrg                                     &loads_pending);
26501e04c3fSmrg                }
26601e04c3fSmrg        }
26701e04c3fSmrg
26801e04c3fSmrg#if V3D_VERSION < 40
26901e04c3fSmrg        /* The initial reload will be queued until we get the
27001e04c3fSmrg         * tile coordinates.
27101e04c3fSmrg         */
27201e04c3fSmrg        if (loads_pending) {
27301e04c3fSmrg                cl_emit(cl, RELOAD_TILE_COLOR_BUFFER, load) {
27401e04c3fSmrg                        load.disable_color_buffer_load =
27501e04c3fSmrg                                (~loads_pending &
27601e04c3fSmrg                                 PIPE_CLEAR_COLOR_BUFFERS) >>
27701e04c3fSmrg                                PIPE_FIRST_COLOR_BUFFER_BIT;
27801e04c3fSmrg                        load.enable_z_load =
27901e04c3fSmrg                                loads_pending & PIPE_CLEAR_DEPTH;
28001e04c3fSmrg                        load.enable_stencil_load =
28101e04c3fSmrg                                loads_pending & PIPE_CLEAR_STENCIL;
28201e04c3fSmrg                }
28301e04c3fSmrg        }
28401e04c3fSmrg#else /* V3D_VERSION >= 40 */
28501e04c3fSmrg        assert(!loads_pending);
28601e04c3fSmrg        cl_emit(cl, END_OF_LOADS, end);
28701e04c3fSmrg#endif
28801e04c3fSmrg}
28901e04c3fSmrg
29001e04c3fSmrgstatic void
2917ec681f3Smrgv3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
29201e04c3fSmrg{
29301e04c3fSmrg#if V3D_VERSION < 40
2947ec681f3Smrg        UNUSED bool needs_color_clear = job->clear & PIPE_CLEAR_COLOR_BUFFERS;
2957ec681f3Smrg        UNUSED bool needs_z_clear = job->clear & PIPE_CLEAR_DEPTH;
2967ec681f3Smrg        UNUSED bool needs_s_clear = job->clear & PIPE_CLEAR_STENCIL;
29701e04c3fSmrg
29801e04c3fSmrg        /* For clearing color in a TLB general on V3D 3.3:
29901e04c3fSmrg         *
30001e04c3fSmrg         * - NONE buffer store clears all TLB color buffers.
30101e04c3fSmrg         * - color buffer store clears just the TLB color buffer being stored.
30201e04c3fSmrg         * - Z/S buffers store may not clear the TLB color buffer.
30301e04c3fSmrg         *
30401e04c3fSmrg         * And on V3D 4.1, we only have one flag for "clear the buffer being
30501e04c3fSmrg         * stored" in the general packet, and a separate packet to clear all
30601e04c3fSmrg         * color TLB buffers.
30701e04c3fSmrg         *
30801e04c3fSmrg         * As a result, we only bother flagging TLB color clears in a general
30901e04c3fSmrg         * packet when we don't have to emit a separate packet to clear all
31001e04c3fSmrg         * TLB color buffers.
31101e04c3fSmrg         */
31201e04c3fSmrg        bool general_color_clear = (needs_color_clear &&
31301e04c3fSmrg                                    (job->clear & PIPE_CLEAR_COLOR_BUFFERS) ==
31401e04c3fSmrg                                    (job->store & PIPE_CLEAR_COLOR_BUFFERS));
31501e04c3fSmrg#else
31601e04c3fSmrg        bool general_color_clear = false;
31701e04c3fSmrg#endif
31801e04c3fSmrg
31901e04c3fSmrg        uint32_t stores_pending = job->store;
32001e04c3fSmrg
32101e04c3fSmrg        /* For V3D 4.1, use general stores for all TLB stores.
32201e04c3fSmrg         *
32301e04c3fSmrg         * For V3D 3.3, we only use general stores to do raw stores for any
32401e04c3fSmrg         * MSAA surfaces.  These output UIF tiled images where each 4x MSAA
32501e04c3fSmrg         * pixel is a 2x2 quad, and the format will be that of the
32601e04c3fSmrg         * internal_type/internal_bpp, rather than the format from GL's
32701e04c3fSmrg         * perspective.  Non-MSAA surfaces will use
32801e04c3fSmrg         * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
32901e04c3fSmrg         */
3307ec681f3Smrg        assert(!job->bbuf || job->nr_cbufs <= 1);
3317ec681f3Smrg        for (int i = 0; i < job->nr_cbufs; i++) {
33201e04c3fSmrg                uint32_t bit = PIPE_CLEAR_COLOR0 << i;
33301e04c3fSmrg                if (!(job->store & bit))
33401e04c3fSmrg                        continue;
33501e04c3fSmrg
33601e04c3fSmrg                struct pipe_surface *psurf = job->cbufs[i];
33701e04c3fSmrg                if (!psurf ||
33801e04c3fSmrg                    (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) {
33901e04c3fSmrg                        continue;
34001e04c3fSmrg                }
34101e04c3fSmrg
3427ec681f3Smrg                store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit,
3437ec681f3Smrg                              &stores_pending, general_color_clear, job->bbuf);
34401e04c3fSmrg        }
34501e04c3fSmrg
34601e04c3fSmrg        if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
34701e04c3fSmrg            !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) {
34801e04c3fSmrg                struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
34901e04c3fSmrg                if (rsc->separate_stencil) {
35001e04c3fSmrg                        if (job->store & PIPE_CLEAR_DEPTH) {
3517ec681f3Smrg                                store_general(job, cl, job->zsbuf, layer,
3527ec681f3Smrg                                              Z, PIPE_CLEAR_DEPTH,
35301e04c3fSmrg                                              &stores_pending,
3547ec681f3Smrg                                              general_color_clear,
3557ec681f3Smrg                                              false);
35601e04c3fSmrg                        }
35701e04c3fSmrg
35801e04c3fSmrg                        if (job->store & PIPE_CLEAR_STENCIL) {
3597ec681f3Smrg                                store_general(job, cl, job->zsbuf, layer,
3607ec681f3Smrg                                              STENCIL, PIPE_CLEAR_STENCIL,
36101e04c3fSmrg                                              &stores_pending,
3627ec681f3Smrg                                              general_color_clear,
3637ec681f3Smrg                                              false);
36401e04c3fSmrg                        }
36501e04c3fSmrg                } else {
3667ec681f3Smrg                        store_general(job, cl, job->zsbuf, layer,
36701e04c3fSmrg                                      zs_buffer_from_pipe_bits(job->store),
36801e04c3fSmrg                                      job->store & PIPE_CLEAR_DEPTHSTENCIL,
3697ec681f3Smrg                                      &stores_pending, general_color_clear,
3707ec681f3Smrg                                      false);
37101e04c3fSmrg                }
37201e04c3fSmrg        }
37301e04c3fSmrg
37401e04c3fSmrg#if V3D_VERSION < 40
37501e04c3fSmrg        if (stores_pending) {
37601e04c3fSmrg                cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
37701e04c3fSmrg
37801e04c3fSmrg                        store.disable_color_buffer_write =
37901e04c3fSmrg                                (~stores_pending >>
38001e04c3fSmrg                                 PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf;
38101e04c3fSmrg                        store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH;
38201e04c3fSmrg                        store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL;
38301e04c3fSmrg
38401e04c3fSmrg                        /* Note that when set this will clear all of the color
38501e04c3fSmrg                         * buffers.
38601e04c3fSmrg                         */
38701e04c3fSmrg                        store.disable_color_buffers_clear_on_write =
38801e04c3fSmrg                                !needs_color_clear;
38901e04c3fSmrg                        store.disable_z_buffer_clear_on_write =
39001e04c3fSmrg                                !needs_z_clear;
39101e04c3fSmrg                        store.disable_stencil_buffer_clear_on_write =
39201e04c3fSmrg                                !needs_s_clear;
39301e04c3fSmrg                };
39401e04c3fSmrg        } else if (needs_color_clear && !general_color_clear) {
39501e04c3fSmrg                /* If we didn't do our color clears in the general packet,
39601e04c3fSmrg                 * then emit a packet to clear all the TLB color buffers now.
39701e04c3fSmrg                 */
39801e04c3fSmrg                cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
39901e04c3fSmrg                        store.buffer_to_store = NONE;
40001e04c3fSmrg                }
40101e04c3fSmrg        }
40201e04c3fSmrg#else /* V3D_VERSION >= 40 */
4039f464c52Smaya        /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
4049f464c52Smaya         * we still need to emit some sort of store.
4059f464c52Smaya         */
4069f464c52Smaya        if (!job->store) {
4079f464c52Smaya                cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
4089f464c52Smaya                        store.buffer_to_store = NONE;
4099f464c52Smaya                }
4109f464c52Smaya        }
4119f464c52Smaya
41201e04c3fSmrg        assert(!stores_pending);
41301e04c3fSmrg
41401e04c3fSmrg        /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
41501e04c3fSmrg         * buffer bit is broken for depth/stencil.  In addition, the
41601e04c3fSmrg         * clear packet's Z/S bit is broken, but the RTs bit ends up
41701e04c3fSmrg         * clearing Z/S.
41801e04c3fSmrg         */
41901e04c3fSmrg        if (job->clear) {
42001e04c3fSmrg                cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
42101e04c3fSmrg                        clear.clear_z_stencil_buffer = true;
42201e04c3fSmrg                        clear.clear_all_render_targets = true;
42301e04c3fSmrg                }
42401e04c3fSmrg        }
42501e04c3fSmrg#endif /* V3D_VERSION >= 40 */
42601e04c3fSmrg}
42701e04c3fSmrg
42801e04c3fSmrgstatic void
4297ec681f3Smrgv3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
43001e04c3fSmrg{
43101e04c3fSmrg        /* Emit the generic list in our indirect state -- the rcl will just
43201e04c3fSmrg         * have pointers into it.
43301e04c3fSmrg         */
43401e04c3fSmrg        struct v3d_cl *cl = &job->indirect;
43501e04c3fSmrg        v3d_cl_ensure_space(cl, 200, 1);
43601e04c3fSmrg        struct v3d_cl_reloc tile_list_start = cl_get_address(cl);
43701e04c3fSmrg
43801e04c3fSmrg        if (V3D_VERSION >= 40) {
43901e04c3fSmrg                /* V3D 4.x only requires a single tile coordinates, and
44001e04c3fSmrg                 * END_OF_LOADS switches us between loading and rendering.
44101e04c3fSmrg                 */
44201e04c3fSmrg                cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
44301e04c3fSmrg        }
44401e04c3fSmrg
4457ec681f3Smrg        v3d_rcl_emit_loads(job, cl, layer);
44601e04c3fSmrg
44701e04c3fSmrg        if (V3D_VERSION < 40) {
44801e04c3fSmrg                /* Tile Coordinates triggers the last reload and sets where
44901e04c3fSmrg                 * the stores go. There must be one per store packet.
45001e04c3fSmrg                 */
45101e04c3fSmrg                cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
45201e04c3fSmrg        }
45301e04c3fSmrg
45401e04c3fSmrg        /* The binner starts out writing tiles assuming that the initial mode
45501e04c3fSmrg         * is triangles, so make sure that's the case.
45601e04c3fSmrg         */
45701e04c3fSmrg        cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
45801e04c3fSmrg                fmt.primitive_type = LIST_TRIANGLES;
45901e04c3fSmrg        }
46001e04c3fSmrg
4617ec681f3Smrg#if V3D_VERSION >= 41
4627ec681f3Smrg        /* PTB assumes that value to be 0, but hw will not set it. */
4637ec681f3Smrg        cl_emit(cl, SET_INSTANCEID, set) {
4647ec681f3Smrg           set.instance_id = 0;
4657ec681f3Smrg        }
4667ec681f3Smrg#endif
4677ec681f3Smrg
46801e04c3fSmrg        cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
46901e04c3fSmrg
4707ec681f3Smrg        v3d_rcl_emit_stores(job, cl, layer);
47101e04c3fSmrg
47201e04c3fSmrg#if V3D_VERSION >= 40
47301e04c3fSmrg        cl_emit(cl, END_OF_TILE_MARKER, end);
47401e04c3fSmrg#endif
47501e04c3fSmrg
47601e04c3fSmrg        cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
47701e04c3fSmrg
47801e04c3fSmrg        cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
47901e04c3fSmrg                branch.start = tile_list_start;
48001e04c3fSmrg                branch.end = cl_get_address(cl);
48101e04c3fSmrg        }
48201e04c3fSmrg}
48301e04c3fSmrg
48401e04c3fSmrg#if V3D_VERSION >= 40
48501e04c3fSmrgstatic void
48601e04c3fSmrgv3d_setup_render_target(struct v3d_job *job, int cbuf,
48701e04c3fSmrg                        uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp)
48801e04c3fSmrg{
48901e04c3fSmrg        if (!job->cbufs[cbuf])
49001e04c3fSmrg                return;
49101e04c3fSmrg
49201e04c3fSmrg        struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]);
49301e04c3fSmrg        *rt_bpp = surf->internal_bpp;
4947ec681f3Smrg        if (job->bbuf) {
4957ec681f3Smrg           struct v3d_surface *bsurf = v3d_surface(job->bbuf);
4967ec681f3Smrg           *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp);
4977ec681f3Smrg        }
49801e04c3fSmrg        *rt_type = surf->internal_type;
49901e04c3fSmrg        *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
50001e04c3fSmrg}
50101e04c3fSmrg
50201e04c3fSmrg#else /* V3D_VERSION < 40 */
50301e04c3fSmrg
50401e04c3fSmrgstatic void
50501e04c3fSmrgv3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf,
50601e04c3fSmrg                          struct v3d_resource *rsc, bool is_separate_stencil)
50701e04c3fSmrg{
50801e04c3fSmrg        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_Z_STENCIL, zs) {
50901e04c3fSmrg                zs.address = cl_address(rsc->bo, surf->offset);
51001e04c3fSmrg
51101e04c3fSmrg                if (!is_separate_stencil) {
51201e04c3fSmrg                        zs.internal_type = surf->internal_type;
51301e04c3fSmrg                        zs.output_image_format = surf->format;
51401e04c3fSmrg                } else {
51501e04c3fSmrg                        zs.z_stencil_id = 1; /* Separate stencil */
51601e04c3fSmrg                }
51701e04c3fSmrg
51801e04c3fSmrg                zs.padded_height_of_output_image_in_uif_blocks =
51901e04c3fSmrg                        surf->padded_height_of_output_image_in_uif_blocks;
52001e04c3fSmrg
5217ec681f3Smrg                assert(surf->tiling != V3D_TILING_RASTER);
52201e04c3fSmrg                zs.memory_format = surf->tiling;
52301e04c3fSmrg        }
52401e04c3fSmrg
52501e04c3fSmrg        if (job->store & (is_separate_stencil ?
52601e04c3fSmrg                          PIPE_CLEAR_STENCIL :
52701e04c3fSmrg                          PIPE_CLEAR_DEPTHSTENCIL)) {
52801e04c3fSmrg                rsc->writes++;
52901e04c3fSmrg        }
53001e04c3fSmrg}
53101e04c3fSmrg#endif /* V3D_VERSION < 40 */
53201e04c3fSmrg
53301e04c3fSmrg#define div_round_up(a, b) (((a) + (b) - 1) / b)
53401e04c3fSmrg
5357ec681f3Smrgstatic bool
5367ec681f3Smrgsupertile_in_job_scissors(struct v3d_job *job,
5377ec681f3Smrg                          uint32_t x, uint32_t y, uint32_t w, uint32_t h)
5387ec681f3Smrg{
5397ec681f3Smrg   if (job->scissor.disabled || job->scissor.count == 0)
5407ec681f3Smrg      return true;
5417ec681f3Smrg
5427ec681f3Smrg   const uint32_t min_x = x * w;
5437ec681f3Smrg   const uint32_t min_y = y * h;
5447ec681f3Smrg   const uint32_t max_x = min_x + w - 1;
5457ec681f3Smrg   const uint32_t max_y = min_y + h - 1;
5467ec681f3Smrg
5477ec681f3Smrg   for (uint32_t i = 0; i < job->scissor.count; i++) {
5487ec681f3Smrg           const uint32_t min_s_x = job->scissor.rects[i].min_x;
5497ec681f3Smrg           const uint32_t min_s_y = job->scissor.rects[i].min_y;
5507ec681f3Smrg           const uint32_t max_s_x = job->scissor.rects[i].max_x;
5517ec681f3Smrg           const uint32_t max_s_y = job->scissor.rects[i].max_y;
5527ec681f3Smrg
5537ec681f3Smrg           if (max_x < min_s_x || min_x > max_s_x ||
5547ec681f3Smrg               max_y < min_s_y || min_y > max_s_y) {
5557ec681f3Smrg                   continue;
5567ec681f3Smrg           }
5577ec681f3Smrg
5587ec681f3Smrg           return true;
5597ec681f3Smrg   }
5607ec681f3Smrg
5617ec681f3Smrg   return false;
5627ec681f3Smrg}
5637ec681f3Smrg
5647ec681f3Smrgstatic void
5657ec681f3Smrgemit_render_layer(struct v3d_job *job, uint32_t layer)
5667ec681f3Smrg{
5677ec681f3Smrg        uint32_t supertile_w = 1, supertile_h = 1;
5687ec681f3Smrg
5697ec681f3Smrg        /* If doing multicore binning, we would need to initialize each
5707ec681f3Smrg         * core's tile list here.
5717ec681f3Smrg         */
5727ec681f3Smrg        uint32_t tile_alloc_offset =
5737ec681f3Smrg                layer * job->draw_tiles_x * job->draw_tiles_y * 64;
5747ec681f3Smrg        cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
5757ec681f3Smrg                list.address = cl_address(job->tile_alloc, tile_alloc_offset);
5767ec681f3Smrg        }
5777ec681f3Smrg
5787ec681f3Smrg        cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
5797ec681f3Smrg                uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
5807ec681f3Smrg                const uint32_t max_supertiles = 256;
5817ec681f3Smrg
5827ec681f3Smrg                /* Size up our supertiles until we get under the limit. */
5837ec681f3Smrg                for (;;) {
5847ec681f3Smrg                        frame_w_in_supertiles = div_round_up(job->draw_tiles_x,
5857ec681f3Smrg                                                             supertile_w);
5867ec681f3Smrg                        frame_h_in_supertiles = div_round_up(job->draw_tiles_y,
5877ec681f3Smrg                                                             supertile_h);
5887ec681f3Smrg                        if (frame_w_in_supertiles *
5897ec681f3Smrg                                frame_h_in_supertiles < max_supertiles) {
5907ec681f3Smrg                                break;
5917ec681f3Smrg                        }
5927ec681f3Smrg
5937ec681f3Smrg                        if (supertile_w < supertile_h)
5947ec681f3Smrg                                supertile_w++;
5957ec681f3Smrg                        else
5967ec681f3Smrg                                supertile_h++;
5977ec681f3Smrg                }
5987ec681f3Smrg
5997ec681f3Smrg                config.number_of_bin_tile_lists = 1;
6007ec681f3Smrg                config.total_frame_width_in_tiles = job->draw_tiles_x;
6017ec681f3Smrg                config.total_frame_height_in_tiles = job->draw_tiles_y;
6027ec681f3Smrg
6037ec681f3Smrg                config.supertile_width_in_tiles = supertile_w;
6047ec681f3Smrg                config.supertile_height_in_tiles = supertile_h;
6057ec681f3Smrg
6067ec681f3Smrg                config.total_frame_width_in_supertiles = frame_w_in_supertiles;
6077ec681f3Smrg                config.total_frame_height_in_supertiles = frame_h_in_supertiles;
6087ec681f3Smrg        }
6097ec681f3Smrg
6107ec681f3Smrg        /* Start by clearing the tile buffer. */
6117ec681f3Smrg        cl_emit(&job->rcl, TILE_COORDINATES, coords) {
6127ec681f3Smrg                coords.tile_column_number = 0;
6137ec681f3Smrg                coords.tile_row_number = 0;
6147ec681f3Smrg        }
6157ec681f3Smrg
6167ec681f3Smrg        /* Emit an initial clear of the tile buffers.  This is necessary
6177ec681f3Smrg         * for any buffers that should be cleared (since clearing
6187ec681f3Smrg         * normally happens at the *end* of the generic tile list), but
6197ec681f3Smrg         * it's also nice to clear everything so the first tile doesn't
6207ec681f3Smrg         * inherit any contents from some previous frame.
6217ec681f3Smrg         *
6227ec681f3Smrg         * Also, implement the GFXH-1742 workaround.  There's a race in
6237ec681f3Smrg         * the HW between the RCL updating the TLB's internal type/size
6247ec681f3Smrg         * and thespawning of the QPU instances using the TLB's current
6257ec681f3Smrg         * internal type/size.  To make sure the QPUs get the right
6267ec681f3Smrg         * state, we need 1 dummy store in between internal type/size
6277ec681f3Smrg         * changes on V3D 3.x, and 2 dummy stores on 4.x.
6287ec681f3Smrg         */
6297ec681f3Smrg#if V3D_VERSION < 40
6307ec681f3Smrg        cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
6317ec681f3Smrg                store.buffer_to_store = NONE;
6327ec681f3Smrg        }
6337ec681f3Smrg#else
6347ec681f3Smrg        for (int i = 0; i < 2; i++) {
6357ec681f3Smrg                if (i > 0)
6367ec681f3Smrg                        cl_emit(&job->rcl, TILE_COORDINATES, coords);
6377ec681f3Smrg                cl_emit(&job->rcl, END_OF_LOADS, end);
6387ec681f3Smrg                cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
6397ec681f3Smrg                        store.buffer_to_store = NONE;
6407ec681f3Smrg                }
6417ec681f3Smrg                if (i == 0) {
6427ec681f3Smrg                        cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) {
6437ec681f3Smrg                                clear.clear_z_stencil_buffer = true;
6447ec681f3Smrg                                clear.clear_all_render_targets = true;
6457ec681f3Smrg                        }
6467ec681f3Smrg                }
6477ec681f3Smrg                cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
6487ec681f3Smrg        }
6497ec681f3Smrg#endif
6507ec681f3Smrg
6517ec681f3Smrg        cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
6527ec681f3Smrg
6537ec681f3Smrg        v3d_rcl_emit_generic_per_tile_list(job, layer);
6547ec681f3Smrg
6557ec681f3Smrg        /* XXX perf: We should expose GL_MESA_tile_raster_order to
6567ec681f3Smrg         * improve X11 performance, but we should use Morton order
6577ec681f3Smrg         * otherwise to improve cache locality.
6587ec681f3Smrg         */
6597ec681f3Smrg        uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
6607ec681f3Smrg        uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
6617ec681f3Smrg        uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
6627ec681f3Smrg        uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
6637ec681f3Smrg
6647ec681f3Smrg        uint32_t max_x_supertile = 0;
6657ec681f3Smrg        uint32_t max_y_supertile = 0;
6667ec681f3Smrg        if (job->draw_max_x != 0 && job->draw_max_y != 0) {
6677ec681f3Smrg                max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
6687ec681f3Smrg                max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
6697ec681f3Smrg        }
6707ec681f3Smrg
6717ec681f3Smrg        for (int y = min_y_supertile; y <= max_y_supertile; y++) {
6727ec681f3Smrg                for (int x = min_x_supertile; x <= max_x_supertile; x++) {
6737ec681f3Smrg                        if (supertile_in_job_scissors(job, x, y,
6747ec681f3Smrg                                                      supertile_w_in_pixels,
6757ec681f3Smrg                                                      supertile_h_in_pixels)) {
6767ec681f3Smrg                                cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
6777ec681f3Smrg                                      coords.column_number_in_supertiles = x;
6787ec681f3Smrg                                      coords.row_number_in_supertiles = y;
6797ec681f3Smrg                                }
6807ec681f3Smrg                        }
6817ec681f3Smrg                }
6827ec681f3Smrg        }
6837ec681f3Smrg}
6847ec681f3Smrg
68501e04c3fSmrgvoid
68601e04c3fSmrgv3dX(emit_rcl)(struct v3d_job *job)
68701e04c3fSmrg{
68801e04c3fSmrg        /* The RCL list should be empty. */
68901e04c3fSmrg        assert(!job->rcl.bo);
69001e04c3fSmrg
6917ec681f3Smrg        v3d_cl_ensure_space_with_branch(&job->rcl, 200 +
6927ec681f3Smrg                                        MAX2(job->num_layers, 1) * 256 *
69301e04c3fSmrg                                        cl_packet_length(SUPERTILE_COORDINATES));
69401e04c3fSmrg        job->submit.rcl_start = job->rcl.bo->offset;
69501e04c3fSmrg        v3d_job_add_bo(job, job->rcl.bo);
69601e04c3fSmrg
6977ec681f3Smrg        /* Common config must be the first TILE_RENDERING_MODE_CFG
69801e04c3fSmrg         * and Z_STENCIL_CLEAR_VALUES must be last.  The ones in between are
69901e04c3fSmrg         * optional updates to the previous HW state.
70001e04c3fSmrg         */
70101e04c3fSmrg        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
70201e04c3fSmrg#if V3D_VERSION < 40
70301e04c3fSmrg                config.enable_z_store = job->store & PIPE_CLEAR_DEPTH;
70401e04c3fSmrg                config.enable_stencil_store = job->store & PIPE_CLEAR_STENCIL;
70501e04c3fSmrg#else /* V3D_VERSION >= 40 */
70601e04c3fSmrg                if (job->zsbuf) {
70701e04c3fSmrg                        struct v3d_surface *surf = v3d_surface(job->zsbuf);
70801e04c3fSmrg                        config.internal_depth_type = surf->internal_type;
70901e04c3fSmrg                }
71001e04c3fSmrg#endif /* V3D_VERSION >= 40 */
71101e04c3fSmrg
71201e04c3fSmrg                /* XXX: Early D/S clear */
71301e04c3fSmrg
71401e04c3fSmrg                switch (job->first_ez_state) {
7157ec681f3Smrg                case V3D_EZ_UNDECIDED:
7167ec681f3Smrg                case V3D_EZ_LT_LE:
71701e04c3fSmrg                        config.early_z_disable = false;
71801e04c3fSmrg                        config.early_z_test_and_update_direction =
71901e04c3fSmrg                                EARLY_Z_DIRECTION_LT_LE;
72001e04c3fSmrg                        break;
7217ec681f3Smrg                case V3D_EZ_GT_GE:
72201e04c3fSmrg                        config.early_z_disable = false;
72301e04c3fSmrg                        config.early_z_test_and_update_direction =
72401e04c3fSmrg                                EARLY_Z_DIRECTION_GT_GE;
72501e04c3fSmrg                        break;
7267ec681f3Smrg                case V3D_EZ_DISABLED:
72701e04c3fSmrg                        config.early_z_disable = true;
72801e04c3fSmrg                }
72901e04c3fSmrg
73001e04c3fSmrg                config.image_width_pixels = job->draw_width;
73101e04c3fSmrg                config.image_height_pixels = job->draw_height;
73201e04c3fSmrg
7337ec681f3Smrg                config.number_of_render_targets = MAX2(job->nr_cbufs, 1);
73401e04c3fSmrg
73501e04c3fSmrg                config.multisample_mode_4x = job->msaa;
73601e04c3fSmrg
73701e04c3fSmrg                config.maximum_bpp_of_all_render_targets = job->internal_bpp;
73801e04c3fSmrg        }
73901e04c3fSmrg
7407ec681f3Smrg        for (int i = 0; i < job->nr_cbufs; i++) {
74101e04c3fSmrg                struct pipe_surface *psurf = job->cbufs[i];
74201e04c3fSmrg                if (!psurf)
74301e04c3fSmrg                        continue;
74401e04c3fSmrg                struct v3d_surface *surf = v3d_surface(psurf);
74501e04c3fSmrg                struct v3d_resource *rsc = v3d_resource(psurf->texture);
74601e04c3fSmrg
7477ec681f3Smrg                UNUSED uint32_t config_pad = 0;
74801e04c3fSmrg                uint32_t clear_pad = 0;
74901e04c3fSmrg
75001e04c3fSmrg                /* XXX: Set the pad for raster. */
7517ec681f3Smrg                if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
7527ec681f3Smrg                    surf->tiling == V3D_TILING_UIF_XOR) {
75301e04c3fSmrg                        int uif_block_height = v3d_utile_height(rsc->cpp) * 2;
75401e04c3fSmrg                        uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) /
75501e04c3fSmrg                                                           uif_block_height);
75601e04c3fSmrg                        if (surf->padded_height_of_output_image_in_uif_blocks -
75701e04c3fSmrg                            implicit_padded_height < 15) {
75801e04c3fSmrg                                config_pad = (surf->padded_height_of_output_image_in_uif_blocks -
75901e04c3fSmrg                                              implicit_padded_height);
76001e04c3fSmrg                        } else {
76101e04c3fSmrg                                config_pad = 15;
76201e04c3fSmrg                                clear_pad = surf->padded_height_of_output_image_in_uif_blocks;
76301e04c3fSmrg                        }
76401e04c3fSmrg                }
76501e04c3fSmrg
76601e04c3fSmrg#if V3D_VERSION < 40
76701e04c3fSmrg                cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
76801e04c3fSmrg                        rt.address = cl_address(rsc->bo, surf->offset);
76901e04c3fSmrg                        rt.internal_type = surf->internal_type;
77001e04c3fSmrg                        rt.output_image_format = surf->format;
77101e04c3fSmrg                        rt.memory_format = surf->tiling;
77201e04c3fSmrg                        rt.internal_bpp = surf->internal_bpp;
77301e04c3fSmrg                        rt.render_target_number = i;
77401e04c3fSmrg                        rt.pad = config_pad;
77501e04c3fSmrg
77601e04c3fSmrg                        if (job->store & PIPE_CLEAR_COLOR0 << i)
77701e04c3fSmrg                                rsc->writes++;
77801e04c3fSmrg                }
77901e04c3fSmrg#endif /* V3D_VERSION < 40 */
78001e04c3fSmrg
78101e04c3fSmrg                cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1,
78201e04c3fSmrg                        clear) {
78301e04c3fSmrg                        clear.clear_color_low_32_bits = job->clear_color[i][0];
78401e04c3fSmrg                        clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
78501e04c3fSmrg                        clear.render_target_number = i;
78601e04c3fSmrg                };
78701e04c3fSmrg
78801e04c3fSmrg                if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
78901e04c3fSmrg                        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2,
79001e04c3fSmrg                                clear) {
79101e04c3fSmrg                                clear.clear_color_mid_low_32_bits =
79201e04c3fSmrg                                        ((job->clear_color[i][1] >> 24) |
79301e04c3fSmrg                                         (job->clear_color[i][2] << 8));
79401e04c3fSmrg                                clear.clear_color_mid_high_24_bits =
79501e04c3fSmrg                                        ((job->clear_color[i][2] >> 24) |
79601e04c3fSmrg                                         ((job->clear_color[i][3] & 0xffff) << 8));
79701e04c3fSmrg                                clear.render_target_number = i;
79801e04c3fSmrg                        };
79901e04c3fSmrg                }
80001e04c3fSmrg
80101e04c3fSmrg                if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
80201e04c3fSmrg                        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3,
80301e04c3fSmrg                                clear) {
80401e04c3fSmrg                                clear.uif_padded_height_in_uif_blocks = clear_pad;
80501e04c3fSmrg                                clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
80601e04c3fSmrg                                clear.render_target_number = i;
80701e04c3fSmrg                        };
80801e04c3fSmrg                }
80901e04c3fSmrg        }
81001e04c3fSmrg
81101e04c3fSmrg#if V3D_VERSION >= 40
81201e04c3fSmrg        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
81301e04c3fSmrg                v3d_setup_render_target(job, 0,
81401e04c3fSmrg                                        &rt.render_target_0_internal_bpp,
81501e04c3fSmrg                                        &rt.render_target_0_internal_type,
81601e04c3fSmrg                                        &rt.render_target_0_clamp);
81701e04c3fSmrg                v3d_setup_render_target(job, 1,
81801e04c3fSmrg                                        &rt.render_target_1_internal_bpp,
81901e04c3fSmrg                                        &rt.render_target_1_internal_type,
82001e04c3fSmrg                                        &rt.render_target_1_clamp);
82101e04c3fSmrg                v3d_setup_render_target(job, 2,
82201e04c3fSmrg                                        &rt.render_target_2_internal_bpp,
82301e04c3fSmrg                                        &rt.render_target_2_internal_type,
82401e04c3fSmrg                                        &rt.render_target_2_clamp);
82501e04c3fSmrg                v3d_setup_render_target(job, 3,
82601e04c3fSmrg                                        &rt.render_target_3_internal_bpp,
82701e04c3fSmrg                                        &rt.render_target_3_internal_type,
82801e04c3fSmrg                                        &rt.render_target_3_clamp);
82901e04c3fSmrg        }
83001e04c3fSmrg#endif
83101e04c3fSmrg
83201e04c3fSmrg#if V3D_VERSION < 40
83301e04c3fSmrg        /* TODO: Don't bother emitting if we don't load/clear Z/S. */
83401e04c3fSmrg        if (job->zsbuf) {
83501e04c3fSmrg                struct pipe_surface *psurf = job->zsbuf;
83601e04c3fSmrg                struct v3d_surface *surf = v3d_surface(psurf);
83701e04c3fSmrg                struct v3d_resource *rsc = v3d_resource(psurf->texture);
83801e04c3fSmrg
83901e04c3fSmrg                v3d_emit_z_stencil_config(job, surf, rsc, false);
84001e04c3fSmrg
84101e04c3fSmrg                /* Emit the separate stencil packet if we have a resource for
84201e04c3fSmrg                 * it.  The HW will only load/store this buffer if the
84301e04c3fSmrg                 * Z/Stencil config doesn't have stencil in its format.
84401e04c3fSmrg                 */
84501e04c3fSmrg                if (surf->separate_stencil) {
84601e04c3fSmrg                        v3d_emit_z_stencil_config(job,
84701e04c3fSmrg                                                  v3d_surface(surf->separate_stencil),
84801e04c3fSmrg                                                  rsc->separate_stencil, true);
84901e04c3fSmrg                }
85001e04c3fSmrg        }
85101e04c3fSmrg#endif /* V3D_VERSION < 40 */
85201e04c3fSmrg
85301e04c3fSmrg        /* Ends rendering mode config. */
85401e04c3fSmrg        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES,
85501e04c3fSmrg                clear) {
85601e04c3fSmrg                clear.z_clear_value = job->clear_z;
85701e04c3fSmrg                clear.stencil_clear_value = job->clear_s;
85801e04c3fSmrg        };
85901e04c3fSmrg
86001e04c3fSmrg        /* Always set initial block size before the first branch, which needs
86101e04c3fSmrg         * to match the value from binning mode config.
86201e04c3fSmrg         */
86301e04c3fSmrg        cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
86401e04c3fSmrg                init.use_auto_chained_tile_lists = true;
86501e04c3fSmrg                init.size_of_first_block_in_chained_tile_lists =
86601e04c3fSmrg                        TILE_ALLOCATION_BLOCK_SIZE_64B;
86701e04c3fSmrg        }
86801e04c3fSmrg
8697ec681f3Smrg        /* ARB_framebuffer_no_attachments allows rendering to happen even when
8707ec681f3Smrg         * the framebuffer has no attachments, the idea being that fragment
8717ec681f3Smrg         * shaders can still do image load/store, ssbo, etc without having to
8727ec681f3Smrg         * write to actual attachments, so always run at least one iteration
8737ec681f3Smrg         * of the loop.
8749f464c52Smaya         */
8757ec681f3Smrg        assert(job->num_layers > 0 || (job->load == 0 && job->store == 0));
8767ec681f3Smrg        for (int layer = 0; layer < MAX2(1, job->num_layers); layer++)
8777ec681f3Smrg                emit_render_layer(job, layer);
8789f464c52Smaya
87901e04c3fSmrg        cl_emit(&job->rcl, END_OF_RENDERING, end);
88001e04c3fSmrg}
881