101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2017 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 247ec681f3Smrg#include "util/format/u_format.h" 2501e04c3fSmrg#include "v3d_context.h" 267ec681f3Smrg#include "broadcom/common/v3d_tiling.h" 2701e04c3fSmrg#include "broadcom/common/v3d_macros.h" 2801e04c3fSmrg#include "broadcom/cle/v3dx_pack.h" 2901e04c3fSmrg 3001e04c3fSmrg#define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 | \ 3101e04c3fSmrg PIPE_CLEAR_COLOR1 | \ 3201e04c3fSmrg PIPE_CLEAR_COLOR2 | \ 3301e04c3fSmrg PIPE_CLEAR_COLOR3) \ 3401e04c3fSmrg 3501e04c3fSmrg#define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1) 3601e04c3fSmrg 3701e04c3fSmrg/* The HW queues up the load until the tile coordinates show up, but can only 3801e04c3fSmrg * track one at a time. If we need to do more than one load, then we need to 3901e04c3fSmrg * flush out the previous load by emitting the tile coordinates and doing a 4001e04c3fSmrg * dummy store. 4101e04c3fSmrg */ 4201e04c3fSmrgstatic void 4301e04c3fSmrgflush_last_load(struct v3d_cl *cl) 4401e04c3fSmrg{ 4501e04c3fSmrg if (V3D_VERSION >= 40) 4601e04c3fSmrg return; 4701e04c3fSmrg 4801e04c3fSmrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 4901e04c3fSmrg cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 5001e04c3fSmrg store.buffer_to_store = NONE; 5101e04c3fSmrg } 5201e04c3fSmrg} 5301e04c3fSmrg 5401e04c3fSmrgstatic void 5501e04c3fSmrgload_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer, 567ec681f3Smrg int layer, uint32_t pipe_bit, uint32_t *loads_pending) 5701e04c3fSmrg{ 5801e04c3fSmrg struct v3d_surface *surf = v3d_surface(psurf); 5901e04c3fSmrg bool separate_stencil = surf->separate_stencil && buffer == STENCIL; 6001e04c3fSmrg if (separate_stencil) { 6101e04c3fSmrg psurf = surf->separate_stencil; 6201e04c3fSmrg surf = v3d_surface(psurf); 6301e04c3fSmrg } 6401e04c3fSmrg 6501e04c3fSmrg struct v3d_resource *rsc = v3d_resource(psurf->texture); 6601e04c3fSmrg 677ec681f3Smrg uint32_t layer_offset = 687ec681f3Smrg v3d_layer_offset(&rsc->base, psurf->u.tex.level, 697ec681f3Smrg psurf->u.tex.first_layer + layer); 7001e04c3fSmrg cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { 7101e04c3fSmrg load.buffer_to_load = buffer; 727ec681f3Smrg load.address = cl_address(rsc->bo, layer_offset); 7301e04c3fSmrg 7401e04c3fSmrg#if V3D_VERSION >= 40 7501e04c3fSmrg load.memory_format = surf->tiling; 7601e04c3fSmrg if (separate_stencil) 7701e04c3fSmrg load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; 7801e04c3fSmrg else 7901e04c3fSmrg load.input_image_format = surf->format; 809f464c52Smaya load.r_b_swap = surf->swap_rb; 817ec681f3Smrg load.force_alpha_1 = util_format_has_alpha1(psurf->format); 827ec681f3Smrg if (surf->tiling == V3D_TILING_UIF_NO_XOR || 837ec681f3Smrg surf->tiling == V3D_TILING_UIF_XOR) { 8401e04c3fSmrg load.height_in_ub_or_stride = 8501e04c3fSmrg surf->padded_height_of_output_image_in_uif_blocks; 867ec681f3Smrg } else if (surf->tiling == V3D_TILING_RASTER) { 8701e04c3fSmrg struct v3d_resource_slice *slice = 8801e04c3fSmrg &rsc->slices[psurf->u.tex.level]; 8901e04c3fSmrg load.height_in_ub_or_stride = slice->stride; 9001e04c3fSmrg } 9101e04c3fSmrg 9201e04c3fSmrg if (psurf->texture->nr_samples > 1) 9301e04c3fSmrg load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 9401e04c3fSmrg else 9501e04c3fSmrg load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 9601e04c3fSmrg 9701e04c3fSmrg#else /* V3D_VERSION < 40 */ 9801e04c3fSmrg /* Can't do raw ZSTENCIL loads -- need to load/store them to 9901e04c3fSmrg * separate buffers for Z and stencil. 10001e04c3fSmrg */ 10101e04c3fSmrg assert(buffer != ZSTENCIL); 10201e04c3fSmrg load.raw_mode = true; 10301e04c3fSmrg load.padded_height_of_output_image_in_uif_blocks = 10401e04c3fSmrg surf->padded_height_of_output_image_in_uif_blocks; 10501e04c3fSmrg#endif /* V3D_VERSION < 40 */ 10601e04c3fSmrg } 10701e04c3fSmrg 10801e04c3fSmrg *loads_pending &= ~pipe_bit; 10901e04c3fSmrg if (*loads_pending) 11001e04c3fSmrg flush_last_load(cl); 11101e04c3fSmrg} 11201e04c3fSmrg 11301e04c3fSmrgstatic void 11401e04c3fSmrgstore_general(struct v3d_job *job, 1157ec681f3Smrg struct v3d_cl *cl, struct pipe_surface *psurf, 1167ec681f3Smrg int layer, int buffer, int pipe_bit, 1177ec681f3Smrg uint32_t *stores_pending, bool general_color_clear, 1187ec681f3Smrg bool resolve_4x) 11901e04c3fSmrg{ 12001e04c3fSmrg struct v3d_surface *surf = v3d_surface(psurf); 12101e04c3fSmrg bool separate_stencil = surf->separate_stencil && buffer == STENCIL; 12201e04c3fSmrg if (separate_stencil) { 12301e04c3fSmrg psurf = surf->separate_stencil; 12401e04c3fSmrg surf = v3d_surface(psurf); 12501e04c3fSmrg } 12601e04c3fSmrg 12701e04c3fSmrg *stores_pending &= ~pipe_bit; 12801e04c3fSmrg bool last_store = !(*stores_pending); 12901e04c3fSmrg 13001e04c3fSmrg struct v3d_resource *rsc = v3d_resource(psurf->texture); 13101e04c3fSmrg 13201e04c3fSmrg rsc->writes++; 13301e04c3fSmrg 1347ec681f3Smrg uint32_t layer_offset = 1357ec681f3Smrg v3d_layer_offset(&rsc->base, psurf->u.tex.level, 1367ec681f3Smrg psurf->u.tex.first_layer + layer); 13701e04c3fSmrg cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 13801e04c3fSmrg store.buffer_to_store = buffer; 1397ec681f3Smrg store.address = cl_address(rsc->bo, layer_offset); 14001e04c3fSmrg 14101e04c3fSmrg#if V3D_VERSION >= 40 14201e04c3fSmrg store.clear_buffer_being_stored = false; 14301e04c3fSmrg 14401e04c3fSmrg if (separate_stencil) 14501e04c3fSmrg store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; 14601e04c3fSmrg else 14701e04c3fSmrg store.output_image_format = surf->format; 14801e04c3fSmrg 1499f464c52Smaya store.r_b_swap = surf->swap_rb; 15001e04c3fSmrg store.memory_format = surf->tiling; 15101e04c3fSmrg 1527ec681f3Smrg if (surf->tiling == V3D_TILING_UIF_NO_XOR || 1537ec681f3Smrg surf->tiling == V3D_TILING_UIF_XOR) { 15401e04c3fSmrg store.height_in_ub_or_stride = 15501e04c3fSmrg surf->padded_height_of_output_image_in_uif_blocks; 1567ec681f3Smrg } else if (surf->tiling == V3D_TILING_RASTER) { 15701e04c3fSmrg struct v3d_resource_slice *slice = 15801e04c3fSmrg &rsc->slices[psurf->u.tex.level]; 15901e04c3fSmrg store.height_in_ub_or_stride = slice->stride; 16001e04c3fSmrg } 16101e04c3fSmrg 1627ec681f3Smrg assert(!resolve_4x || job->bbuf); 16301e04c3fSmrg if (psurf->texture->nr_samples > 1) 16401e04c3fSmrg store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 1657ec681f3Smrg else if (resolve_4x && job->bbuf->texture->nr_samples > 1) 1667ec681f3Smrg store.decimate_mode = V3D_DECIMATE_MODE_4X; 16701e04c3fSmrg else 16801e04c3fSmrg store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 16901e04c3fSmrg 17001e04c3fSmrg#else /* V3D_VERSION < 40 */ 17101e04c3fSmrg /* Can't do raw ZSTENCIL stores -- need to load/store them to 17201e04c3fSmrg * separate buffers for Z and stencil. 17301e04c3fSmrg */ 17401e04c3fSmrg assert(buffer != ZSTENCIL); 17501e04c3fSmrg store.raw_mode = true; 17601e04c3fSmrg if (!last_store) { 17701e04c3fSmrg store.disable_color_buffers_clear_on_write = true; 17801e04c3fSmrg store.disable_z_buffer_clear_on_write = true; 17901e04c3fSmrg store.disable_stencil_buffer_clear_on_write = true; 18001e04c3fSmrg } else { 18101e04c3fSmrg store.disable_color_buffers_clear_on_write = 18201e04c3fSmrg !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) && 18301e04c3fSmrg general_color_clear && 18401e04c3fSmrg (job->clear & pipe_bit))); 18501e04c3fSmrg store.disable_z_buffer_clear_on_write = 18601e04c3fSmrg !(job->clear & PIPE_CLEAR_DEPTH); 18701e04c3fSmrg store.disable_stencil_buffer_clear_on_write = 18801e04c3fSmrg !(job->clear & PIPE_CLEAR_STENCIL); 18901e04c3fSmrg } 19001e04c3fSmrg store.padded_height_of_output_image_in_uif_blocks = 19101e04c3fSmrg surf->padded_height_of_output_image_in_uif_blocks; 19201e04c3fSmrg#endif /* V3D_VERSION < 40 */ 19301e04c3fSmrg } 19401e04c3fSmrg 19501e04c3fSmrg /* There must be a TILE_COORDINATES_IMPLICIT between each store. */ 19601e04c3fSmrg if (V3D_VERSION < 40 && !last_store) { 19701e04c3fSmrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 19801e04c3fSmrg } 19901e04c3fSmrg} 20001e04c3fSmrg 20101e04c3fSmrgstatic int 20201e04c3fSmrgzs_buffer_from_pipe_bits(int pipe_clear_bits) 20301e04c3fSmrg{ 20401e04c3fSmrg switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) { 20501e04c3fSmrg case PIPE_CLEAR_DEPTHSTENCIL: 20601e04c3fSmrg return ZSTENCIL; 20701e04c3fSmrg case PIPE_CLEAR_DEPTH: 20801e04c3fSmrg return Z; 20901e04c3fSmrg case PIPE_CLEAR_STENCIL: 21001e04c3fSmrg return STENCIL; 21101e04c3fSmrg default: 21201e04c3fSmrg return NONE; 21301e04c3fSmrg } 21401e04c3fSmrg} 21501e04c3fSmrg 21601e04c3fSmrgstatic void 2177ec681f3Smrgv3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer) 21801e04c3fSmrg{ 2197ec681f3Smrg /* When blitting, no color or zs buffer is loaded; instead the blit 2207ec681f3Smrg * source buffer is loaded for the aspects that we are going to blit. 2217ec681f3Smrg */ 2227ec681f3Smrg assert(!job->bbuf || job->load == 0); 2237ec681f3Smrg assert(!job->bbuf || job->nr_cbufs <= 1); 2247ec681f3Smrg assert(!job->bbuf || V3D_VERSION >= 40); 22501e04c3fSmrg 2267ec681f3Smrg uint32_t loads_pending = job->bbuf ? job->store : job->load; 2277ec681f3Smrg 2287ec681f3Smrg for (int i = 0; i < job->nr_cbufs; i++) { 22901e04c3fSmrg uint32_t bit = PIPE_CLEAR_COLOR0 << i; 23001e04c3fSmrg if (!(loads_pending & bit)) 23101e04c3fSmrg continue; 23201e04c3fSmrg 2337ec681f3Smrg struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i]; 2347ec681f3Smrg assert(!job->bbuf || i == 0); 2357ec681f3Smrg 23601e04c3fSmrg if (!psurf || (V3D_VERSION < 40 && 23701e04c3fSmrg psurf->texture->nr_samples <= 1)) { 23801e04c3fSmrg continue; 23901e04c3fSmrg } 24001e04c3fSmrg 2417ec681f3Smrg load_general(cl, psurf, RENDER_TARGET_0 + i, layer, 24201e04c3fSmrg bit, &loads_pending); 24301e04c3fSmrg } 24401e04c3fSmrg 24501e04c3fSmrg if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) && 24601e04c3fSmrg (V3D_VERSION >= 40 || 24701e04c3fSmrg (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) { 2487ec681f3Smrg struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf; 2497ec681f3Smrg struct v3d_resource *rsc = v3d_resource(src->texture); 25001e04c3fSmrg 25101e04c3fSmrg if (rsc->separate_stencil && 25201e04c3fSmrg (loads_pending & PIPE_CLEAR_STENCIL)) { 2537ec681f3Smrg load_general(cl, src, 2547ec681f3Smrg STENCIL, layer, 25501e04c3fSmrg PIPE_CLEAR_STENCIL, 25601e04c3fSmrg &loads_pending); 25701e04c3fSmrg } 25801e04c3fSmrg 25901e04c3fSmrg if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) { 2607ec681f3Smrg load_general(cl, src, 26101e04c3fSmrg zs_buffer_from_pipe_bits(loads_pending), 2627ec681f3Smrg layer, 26301e04c3fSmrg loads_pending & PIPE_CLEAR_DEPTHSTENCIL, 26401e04c3fSmrg &loads_pending); 26501e04c3fSmrg } 26601e04c3fSmrg } 26701e04c3fSmrg 26801e04c3fSmrg#if V3D_VERSION < 40 26901e04c3fSmrg /* The initial reload will be queued until we get the 27001e04c3fSmrg * tile coordinates. 27101e04c3fSmrg */ 27201e04c3fSmrg if (loads_pending) { 27301e04c3fSmrg cl_emit(cl, RELOAD_TILE_COLOR_BUFFER, load) { 27401e04c3fSmrg load.disable_color_buffer_load = 27501e04c3fSmrg (~loads_pending & 27601e04c3fSmrg PIPE_CLEAR_COLOR_BUFFERS) >> 27701e04c3fSmrg PIPE_FIRST_COLOR_BUFFER_BIT; 27801e04c3fSmrg load.enable_z_load = 27901e04c3fSmrg loads_pending & PIPE_CLEAR_DEPTH; 28001e04c3fSmrg load.enable_stencil_load = 28101e04c3fSmrg loads_pending & PIPE_CLEAR_STENCIL; 28201e04c3fSmrg } 28301e04c3fSmrg } 28401e04c3fSmrg#else /* V3D_VERSION >= 40 */ 28501e04c3fSmrg assert(!loads_pending); 28601e04c3fSmrg cl_emit(cl, END_OF_LOADS, end); 28701e04c3fSmrg#endif 28801e04c3fSmrg} 28901e04c3fSmrg 29001e04c3fSmrgstatic void 2917ec681f3Smrgv3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer) 29201e04c3fSmrg{ 29301e04c3fSmrg#if V3D_VERSION < 40 2947ec681f3Smrg UNUSED bool needs_color_clear = job->clear & PIPE_CLEAR_COLOR_BUFFERS; 2957ec681f3Smrg UNUSED bool needs_z_clear = job->clear & PIPE_CLEAR_DEPTH; 2967ec681f3Smrg UNUSED bool needs_s_clear = job->clear & PIPE_CLEAR_STENCIL; 29701e04c3fSmrg 29801e04c3fSmrg /* For clearing color in a TLB general on V3D 3.3: 29901e04c3fSmrg * 30001e04c3fSmrg * - NONE buffer store clears all TLB color buffers. 30101e04c3fSmrg * - color buffer store clears just the TLB color buffer being stored. 30201e04c3fSmrg * - Z/S buffers store may not clear the TLB color buffer. 30301e04c3fSmrg * 30401e04c3fSmrg * And on V3D 4.1, we only have one flag for "clear the buffer being 30501e04c3fSmrg * stored" in the general packet, and a separate packet to clear all 30601e04c3fSmrg * color TLB buffers. 30701e04c3fSmrg * 30801e04c3fSmrg * As a result, we only bother flagging TLB color clears in a general 30901e04c3fSmrg * packet when we don't have to emit a separate packet to clear all 31001e04c3fSmrg * TLB color buffers. 31101e04c3fSmrg */ 31201e04c3fSmrg bool general_color_clear = (needs_color_clear && 31301e04c3fSmrg (job->clear & PIPE_CLEAR_COLOR_BUFFERS) == 31401e04c3fSmrg (job->store & PIPE_CLEAR_COLOR_BUFFERS)); 31501e04c3fSmrg#else 31601e04c3fSmrg bool general_color_clear = false; 31701e04c3fSmrg#endif 31801e04c3fSmrg 31901e04c3fSmrg uint32_t stores_pending = job->store; 32001e04c3fSmrg 32101e04c3fSmrg /* For V3D 4.1, use general stores for all TLB stores. 32201e04c3fSmrg * 32301e04c3fSmrg * For V3D 3.3, we only use general stores to do raw stores for any 32401e04c3fSmrg * MSAA surfaces. These output UIF tiled images where each 4x MSAA 32501e04c3fSmrg * pixel is a 2x2 quad, and the format will be that of the 32601e04c3fSmrg * internal_type/internal_bpp, rather than the format from GL's 32701e04c3fSmrg * perspective. Non-MSAA surfaces will use 32801e04c3fSmrg * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED. 32901e04c3fSmrg */ 3307ec681f3Smrg assert(!job->bbuf || job->nr_cbufs <= 1); 3317ec681f3Smrg for (int i = 0; i < job->nr_cbufs; i++) { 33201e04c3fSmrg uint32_t bit = PIPE_CLEAR_COLOR0 << i; 33301e04c3fSmrg if (!(job->store & bit)) 33401e04c3fSmrg continue; 33501e04c3fSmrg 33601e04c3fSmrg struct pipe_surface *psurf = job->cbufs[i]; 33701e04c3fSmrg if (!psurf || 33801e04c3fSmrg (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) { 33901e04c3fSmrg continue; 34001e04c3fSmrg } 34101e04c3fSmrg 3427ec681f3Smrg store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit, 3437ec681f3Smrg &stores_pending, general_color_clear, job->bbuf); 34401e04c3fSmrg } 34501e04c3fSmrg 34601e04c3fSmrg if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf && 34701e04c3fSmrg !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) { 34801e04c3fSmrg struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture); 34901e04c3fSmrg if (rsc->separate_stencil) { 35001e04c3fSmrg if (job->store & PIPE_CLEAR_DEPTH) { 3517ec681f3Smrg store_general(job, cl, job->zsbuf, layer, 3527ec681f3Smrg Z, PIPE_CLEAR_DEPTH, 35301e04c3fSmrg &stores_pending, 3547ec681f3Smrg general_color_clear, 3557ec681f3Smrg false); 35601e04c3fSmrg } 35701e04c3fSmrg 35801e04c3fSmrg if (job->store & PIPE_CLEAR_STENCIL) { 3597ec681f3Smrg store_general(job, cl, job->zsbuf, layer, 3607ec681f3Smrg STENCIL, PIPE_CLEAR_STENCIL, 36101e04c3fSmrg &stores_pending, 3627ec681f3Smrg general_color_clear, 3637ec681f3Smrg false); 36401e04c3fSmrg } 36501e04c3fSmrg } else { 3667ec681f3Smrg store_general(job, cl, job->zsbuf, layer, 36701e04c3fSmrg zs_buffer_from_pipe_bits(job->store), 36801e04c3fSmrg job->store & PIPE_CLEAR_DEPTHSTENCIL, 3697ec681f3Smrg &stores_pending, general_color_clear, 3707ec681f3Smrg false); 37101e04c3fSmrg } 37201e04c3fSmrg } 37301e04c3fSmrg 37401e04c3fSmrg#if V3D_VERSION < 40 37501e04c3fSmrg if (stores_pending) { 37601e04c3fSmrg cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { 37701e04c3fSmrg 37801e04c3fSmrg store.disable_color_buffer_write = 37901e04c3fSmrg (~stores_pending >> 38001e04c3fSmrg PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf; 38101e04c3fSmrg store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH; 38201e04c3fSmrg store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL; 38301e04c3fSmrg 38401e04c3fSmrg /* Note that when set this will clear all of the color 38501e04c3fSmrg * buffers. 38601e04c3fSmrg */ 38701e04c3fSmrg store.disable_color_buffers_clear_on_write = 38801e04c3fSmrg !needs_color_clear; 38901e04c3fSmrg store.disable_z_buffer_clear_on_write = 39001e04c3fSmrg !needs_z_clear; 39101e04c3fSmrg store.disable_stencil_buffer_clear_on_write = 39201e04c3fSmrg !needs_s_clear; 39301e04c3fSmrg }; 39401e04c3fSmrg } else if (needs_color_clear && !general_color_clear) { 39501e04c3fSmrg /* If we didn't do our color clears in the general packet, 39601e04c3fSmrg * then emit a packet to clear all the TLB color buffers now. 39701e04c3fSmrg */ 39801e04c3fSmrg cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 39901e04c3fSmrg store.buffer_to_store = NONE; 40001e04c3fSmrg } 40101e04c3fSmrg } 40201e04c3fSmrg#else /* V3D_VERSION >= 40 */ 4039f464c52Smaya /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments, 4049f464c52Smaya * we still need to emit some sort of store. 4059f464c52Smaya */ 4069f464c52Smaya if (!job->store) { 4079f464c52Smaya cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 4089f464c52Smaya store.buffer_to_store = NONE; 4099f464c52Smaya } 4109f464c52Smaya } 4119f464c52Smaya 41201e04c3fSmrg assert(!stores_pending); 41301e04c3fSmrg 41401e04c3fSmrg /* GFXH-1461/GFXH-1689: The per-buffer store command's clear 41501e04c3fSmrg * buffer bit is broken for depth/stencil. In addition, the 41601e04c3fSmrg * clear packet's Z/S bit is broken, but the RTs bit ends up 41701e04c3fSmrg * clearing Z/S. 41801e04c3fSmrg */ 41901e04c3fSmrg if (job->clear) { 42001e04c3fSmrg cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { 42101e04c3fSmrg clear.clear_z_stencil_buffer = true; 42201e04c3fSmrg clear.clear_all_render_targets = true; 42301e04c3fSmrg } 42401e04c3fSmrg } 42501e04c3fSmrg#endif /* V3D_VERSION >= 40 */ 42601e04c3fSmrg} 42701e04c3fSmrg 42801e04c3fSmrgstatic void 4297ec681f3Smrgv3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer) 43001e04c3fSmrg{ 43101e04c3fSmrg /* Emit the generic list in our indirect state -- the rcl will just 43201e04c3fSmrg * have pointers into it. 43301e04c3fSmrg */ 43401e04c3fSmrg struct v3d_cl *cl = &job->indirect; 43501e04c3fSmrg v3d_cl_ensure_space(cl, 200, 1); 43601e04c3fSmrg struct v3d_cl_reloc tile_list_start = cl_get_address(cl); 43701e04c3fSmrg 43801e04c3fSmrg if (V3D_VERSION >= 40) { 43901e04c3fSmrg /* V3D 4.x only requires a single tile coordinates, and 44001e04c3fSmrg * END_OF_LOADS switches us between loading and rendering. 44101e04c3fSmrg */ 44201e04c3fSmrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 44301e04c3fSmrg } 44401e04c3fSmrg 4457ec681f3Smrg v3d_rcl_emit_loads(job, cl, layer); 44601e04c3fSmrg 44701e04c3fSmrg if (V3D_VERSION < 40) { 44801e04c3fSmrg /* Tile Coordinates triggers the last reload and sets where 44901e04c3fSmrg * the stores go. There must be one per store packet. 45001e04c3fSmrg */ 45101e04c3fSmrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 45201e04c3fSmrg } 45301e04c3fSmrg 45401e04c3fSmrg /* The binner starts out writing tiles assuming that the initial mode 45501e04c3fSmrg * is triangles, so make sure that's the case. 45601e04c3fSmrg */ 45701e04c3fSmrg cl_emit(cl, PRIM_LIST_FORMAT, fmt) { 45801e04c3fSmrg fmt.primitive_type = LIST_TRIANGLES; 45901e04c3fSmrg } 46001e04c3fSmrg 4617ec681f3Smrg#if V3D_VERSION >= 41 4627ec681f3Smrg /* PTB assumes that value to be 0, but hw will not set it. */ 4637ec681f3Smrg cl_emit(cl, SET_INSTANCEID, set) { 4647ec681f3Smrg set.instance_id = 0; 4657ec681f3Smrg } 4667ec681f3Smrg#endif 4677ec681f3Smrg 46801e04c3fSmrg cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 46901e04c3fSmrg 4707ec681f3Smrg v3d_rcl_emit_stores(job, cl, layer); 47101e04c3fSmrg 47201e04c3fSmrg#if V3D_VERSION >= 40 47301e04c3fSmrg cl_emit(cl, END_OF_TILE_MARKER, end); 47401e04c3fSmrg#endif 47501e04c3fSmrg 47601e04c3fSmrg cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 47701e04c3fSmrg 47801e04c3fSmrg cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 47901e04c3fSmrg branch.start = tile_list_start; 48001e04c3fSmrg branch.end = cl_get_address(cl); 48101e04c3fSmrg } 48201e04c3fSmrg} 48301e04c3fSmrg 48401e04c3fSmrg#if V3D_VERSION >= 40 48501e04c3fSmrgstatic void 48601e04c3fSmrgv3d_setup_render_target(struct v3d_job *job, int cbuf, 48701e04c3fSmrg uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp) 48801e04c3fSmrg{ 48901e04c3fSmrg if (!job->cbufs[cbuf]) 49001e04c3fSmrg return; 49101e04c3fSmrg 49201e04c3fSmrg struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]); 49301e04c3fSmrg *rt_bpp = surf->internal_bpp; 4947ec681f3Smrg if (job->bbuf) { 4957ec681f3Smrg struct v3d_surface *bsurf = v3d_surface(job->bbuf); 4967ec681f3Smrg *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp); 4977ec681f3Smrg } 49801e04c3fSmrg *rt_type = surf->internal_type; 49901e04c3fSmrg *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; 50001e04c3fSmrg} 50101e04c3fSmrg 50201e04c3fSmrg#else /* V3D_VERSION < 40 */ 50301e04c3fSmrg 50401e04c3fSmrgstatic void 50501e04c3fSmrgv3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf, 50601e04c3fSmrg struct v3d_resource *rsc, bool is_separate_stencil) 50701e04c3fSmrg{ 50801e04c3fSmrg cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_Z_STENCIL, zs) { 50901e04c3fSmrg zs.address = cl_address(rsc->bo, surf->offset); 51001e04c3fSmrg 51101e04c3fSmrg if (!is_separate_stencil) { 51201e04c3fSmrg zs.internal_type = surf->internal_type; 51301e04c3fSmrg zs.output_image_format = surf->format; 51401e04c3fSmrg } else { 51501e04c3fSmrg zs.z_stencil_id = 1; /* Separate stencil */ 51601e04c3fSmrg } 51701e04c3fSmrg 51801e04c3fSmrg zs.padded_height_of_output_image_in_uif_blocks = 51901e04c3fSmrg surf->padded_height_of_output_image_in_uif_blocks; 52001e04c3fSmrg 5217ec681f3Smrg assert(surf->tiling != V3D_TILING_RASTER); 52201e04c3fSmrg zs.memory_format = surf->tiling; 52301e04c3fSmrg } 52401e04c3fSmrg 52501e04c3fSmrg if (job->store & (is_separate_stencil ? 52601e04c3fSmrg PIPE_CLEAR_STENCIL : 52701e04c3fSmrg PIPE_CLEAR_DEPTHSTENCIL)) { 52801e04c3fSmrg rsc->writes++; 52901e04c3fSmrg } 53001e04c3fSmrg} 53101e04c3fSmrg#endif /* V3D_VERSION < 40 */ 53201e04c3fSmrg 53301e04c3fSmrg#define div_round_up(a, b) (((a) + (b) - 1) / b) 53401e04c3fSmrg 5357ec681f3Smrgstatic bool 5367ec681f3Smrgsupertile_in_job_scissors(struct v3d_job *job, 5377ec681f3Smrg uint32_t x, uint32_t y, uint32_t w, uint32_t h) 5387ec681f3Smrg{ 5397ec681f3Smrg if (job->scissor.disabled || job->scissor.count == 0) 5407ec681f3Smrg return true; 5417ec681f3Smrg 5427ec681f3Smrg const uint32_t min_x = x * w; 5437ec681f3Smrg const uint32_t min_y = y * h; 5447ec681f3Smrg const uint32_t max_x = min_x + w - 1; 5457ec681f3Smrg const uint32_t max_y = min_y + h - 1; 5467ec681f3Smrg 5477ec681f3Smrg for (uint32_t i = 0; i < job->scissor.count; i++) { 5487ec681f3Smrg const uint32_t min_s_x = job->scissor.rects[i].min_x; 5497ec681f3Smrg const uint32_t min_s_y = job->scissor.rects[i].min_y; 5507ec681f3Smrg const uint32_t max_s_x = job->scissor.rects[i].max_x; 5517ec681f3Smrg const uint32_t max_s_y = job->scissor.rects[i].max_y; 5527ec681f3Smrg 5537ec681f3Smrg if (max_x < min_s_x || min_x > max_s_x || 5547ec681f3Smrg max_y < min_s_y || min_y > max_s_y) { 5557ec681f3Smrg continue; 5567ec681f3Smrg } 5577ec681f3Smrg 5587ec681f3Smrg return true; 5597ec681f3Smrg } 5607ec681f3Smrg 5617ec681f3Smrg return false; 5627ec681f3Smrg} 5637ec681f3Smrg 5647ec681f3Smrgstatic void 5657ec681f3Smrgemit_render_layer(struct v3d_job *job, uint32_t layer) 5667ec681f3Smrg{ 5677ec681f3Smrg uint32_t supertile_w = 1, supertile_h = 1; 5687ec681f3Smrg 5697ec681f3Smrg /* If doing multicore binning, we would need to initialize each 5707ec681f3Smrg * core's tile list here. 5717ec681f3Smrg */ 5727ec681f3Smrg uint32_t tile_alloc_offset = 5737ec681f3Smrg layer * job->draw_tiles_x * job->draw_tiles_y * 64; 5747ec681f3Smrg cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { 5757ec681f3Smrg list.address = cl_address(job->tile_alloc, tile_alloc_offset); 5767ec681f3Smrg } 5777ec681f3Smrg 5787ec681f3Smrg cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { 5797ec681f3Smrg uint32_t frame_w_in_supertiles, frame_h_in_supertiles; 5807ec681f3Smrg const uint32_t max_supertiles = 256; 5817ec681f3Smrg 5827ec681f3Smrg /* Size up our supertiles until we get under the limit. */ 5837ec681f3Smrg for (;;) { 5847ec681f3Smrg frame_w_in_supertiles = div_round_up(job->draw_tiles_x, 5857ec681f3Smrg supertile_w); 5867ec681f3Smrg frame_h_in_supertiles = div_round_up(job->draw_tiles_y, 5877ec681f3Smrg supertile_h); 5887ec681f3Smrg if (frame_w_in_supertiles * 5897ec681f3Smrg frame_h_in_supertiles < max_supertiles) { 5907ec681f3Smrg break; 5917ec681f3Smrg } 5927ec681f3Smrg 5937ec681f3Smrg if (supertile_w < supertile_h) 5947ec681f3Smrg supertile_w++; 5957ec681f3Smrg else 5967ec681f3Smrg supertile_h++; 5977ec681f3Smrg } 5987ec681f3Smrg 5997ec681f3Smrg config.number_of_bin_tile_lists = 1; 6007ec681f3Smrg config.total_frame_width_in_tiles = job->draw_tiles_x; 6017ec681f3Smrg config.total_frame_height_in_tiles = job->draw_tiles_y; 6027ec681f3Smrg 6037ec681f3Smrg config.supertile_width_in_tiles = supertile_w; 6047ec681f3Smrg config.supertile_height_in_tiles = supertile_h; 6057ec681f3Smrg 6067ec681f3Smrg config.total_frame_width_in_supertiles = frame_w_in_supertiles; 6077ec681f3Smrg config.total_frame_height_in_supertiles = frame_h_in_supertiles; 6087ec681f3Smrg } 6097ec681f3Smrg 6107ec681f3Smrg /* Start by clearing the tile buffer. */ 6117ec681f3Smrg cl_emit(&job->rcl, TILE_COORDINATES, coords) { 6127ec681f3Smrg coords.tile_column_number = 0; 6137ec681f3Smrg coords.tile_row_number = 0; 6147ec681f3Smrg } 6157ec681f3Smrg 6167ec681f3Smrg /* Emit an initial clear of the tile buffers. This is necessary 6177ec681f3Smrg * for any buffers that should be cleared (since clearing 6187ec681f3Smrg * normally happens at the *end* of the generic tile list), but 6197ec681f3Smrg * it's also nice to clear everything so the first tile doesn't 6207ec681f3Smrg * inherit any contents from some previous frame. 6217ec681f3Smrg * 6227ec681f3Smrg * Also, implement the GFXH-1742 workaround. There's a race in 6237ec681f3Smrg * the HW between the RCL updating the TLB's internal type/size 6247ec681f3Smrg * and thespawning of the QPU instances using the TLB's current 6257ec681f3Smrg * internal type/size. To make sure the QPUs get the right 6267ec681f3Smrg * state, we need 1 dummy store in between internal type/size 6277ec681f3Smrg * changes on V3D 3.x, and 2 dummy stores on 4.x. 6287ec681f3Smrg */ 6297ec681f3Smrg#if V3D_VERSION < 40 6307ec681f3Smrg cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { 6317ec681f3Smrg store.buffer_to_store = NONE; 6327ec681f3Smrg } 6337ec681f3Smrg#else 6347ec681f3Smrg for (int i = 0; i < 2; i++) { 6357ec681f3Smrg if (i > 0) 6367ec681f3Smrg cl_emit(&job->rcl, TILE_COORDINATES, coords); 6377ec681f3Smrg cl_emit(&job->rcl, END_OF_LOADS, end); 6387ec681f3Smrg cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { 6397ec681f3Smrg store.buffer_to_store = NONE; 6407ec681f3Smrg } 6417ec681f3Smrg if (i == 0) { 6427ec681f3Smrg cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) { 6437ec681f3Smrg clear.clear_z_stencil_buffer = true; 6447ec681f3Smrg clear.clear_all_render_targets = true; 6457ec681f3Smrg } 6467ec681f3Smrg } 6477ec681f3Smrg cl_emit(&job->rcl, END_OF_TILE_MARKER, end); 6487ec681f3Smrg } 6497ec681f3Smrg#endif 6507ec681f3Smrg 6517ec681f3Smrg cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); 6527ec681f3Smrg 6537ec681f3Smrg v3d_rcl_emit_generic_per_tile_list(job, layer); 6547ec681f3Smrg 6557ec681f3Smrg /* XXX perf: We should expose GL_MESA_tile_raster_order to 6567ec681f3Smrg * improve X11 performance, but we should use Morton order 6577ec681f3Smrg * otherwise to improve cache locality. 6587ec681f3Smrg */ 6597ec681f3Smrg uint32_t supertile_w_in_pixels = job->tile_width * supertile_w; 6607ec681f3Smrg uint32_t supertile_h_in_pixels = job->tile_height * supertile_h; 6617ec681f3Smrg uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels; 6627ec681f3Smrg uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels; 6637ec681f3Smrg 6647ec681f3Smrg uint32_t max_x_supertile = 0; 6657ec681f3Smrg uint32_t max_y_supertile = 0; 6667ec681f3Smrg if (job->draw_max_x != 0 && job->draw_max_y != 0) { 6677ec681f3Smrg max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels; 6687ec681f3Smrg max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels; 6697ec681f3Smrg } 6707ec681f3Smrg 6717ec681f3Smrg for (int y = min_y_supertile; y <= max_y_supertile; y++) { 6727ec681f3Smrg for (int x = min_x_supertile; x <= max_x_supertile; x++) { 6737ec681f3Smrg if (supertile_in_job_scissors(job, x, y, 6747ec681f3Smrg supertile_w_in_pixels, 6757ec681f3Smrg supertile_h_in_pixels)) { 6767ec681f3Smrg cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) { 6777ec681f3Smrg coords.column_number_in_supertiles = x; 6787ec681f3Smrg coords.row_number_in_supertiles = y; 6797ec681f3Smrg } 6807ec681f3Smrg } 6817ec681f3Smrg } 6827ec681f3Smrg } 6837ec681f3Smrg} 6847ec681f3Smrg 68501e04c3fSmrgvoid 68601e04c3fSmrgv3dX(emit_rcl)(struct v3d_job *job) 68701e04c3fSmrg{ 68801e04c3fSmrg /* The RCL list should be empty. */ 68901e04c3fSmrg assert(!job->rcl.bo); 69001e04c3fSmrg 6917ec681f3Smrg v3d_cl_ensure_space_with_branch(&job->rcl, 200 + 6927ec681f3Smrg MAX2(job->num_layers, 1) * 256 * 69301e04c3fSmrg cl_packet_length(SUPERTILE_COORDINATES)); 69401e04c3fSmrg job->submit.rcl_start = job->rcl.bo->offset; 69501e04c3fSmrg v3d_job_add_bo(job, job->rcl.bo); 69601e04c3fSmrg 6977ec681f3Smrg /* Common config must be the first TILE_RENDERING_MODE_CFG 69801e04c3fSmrg * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are 69901e04c3fSmrg * optional updates to the previous HW state. 70001e04c3fSmrg */ 70101e04c3fSmrg cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { 70201e04c3fSmrg#if V3D_VERSION < 40 70301e04c3fSmrg config.enable_z_store = job->store & PIPE_CLEAR_DEPTH; 70401e04c3fSmrg config.enable_stencil_store = job->store & PIPE_CLEAR_STENCIL; 70501e04c3fSmrg#else /* V3D_VERSION >= 40 */ 70601e04c3fSmrg if (job->zsbuf) { 70701e04c3fSmrg struct v3d_surface *surf = v3d_surface(job->zsbuf); 70801e04c3fSmrg config.internal_depth_type = surf->internal_type; 70901e04c3fSmrg } 71001e04c3fSmrg#endif /* V3D_VERSION >= 40 */ 71101e04c3fSmrg 71201e04c3fSmrg /* XXX: Early D/S clear */ 71301e04c3fSmrg 71401e04c3fSmrg switch (job->first_ez_state) { 7157ec681f3Smrg case V3D_EZ_UNDECIDED: 7167ec681f3Smrg case V3D_EZ_LT_LE: 71701e04c3fSmrg config.early_z_disable = false; 71801e04c3fSmrg config.early_z_test_and_update_direction = 71901e04c3fSmrg EARLY_Z_DIRECTION_LT_LE; 72001e04c3fSmrg break; 7217ec681f3Smrg case V3D_EZ_GT_GE: 72201e04c3fSmrg config.early_z_disable = false; 72301e04c3fSmrg config.early_z_test_and_update_direction = 72401e04c3fSmrg EARLY_Z_DIRECTION_GT_GE; 72501e04c3fSmrg break; 7267ec681f3Smrg case V3D_EZ_DISABLED: 72701e04c3fSmrg config.early_z_disable = true; 72801e04c3fSmrg } 72901e04c3fSmrg 73001e04c3fSmrg config.image_width_pixels = job->draw_width; 73101e04c3fSmrg config.image_height_pixels = job->draw_height; 73201e04c3fSmrg 7337ec681f3Smrg config.number_of_render_targets = MAX2(job->nr_cbufs, 1); 73401e04c3fSmrg 73501e04c3fSmrg config.multisample_mode_4x = job->msaa; 73601e04c3fSmrg 73701e04c3fSmrg config.maximum_bpp_of_all_render_targets = job->internal_bpp; 73801e04c3fSmrg } 73901e04c3fSmrg 7407ec681f3Smrg for (int i = 0; i < job->nr_cbufs; i++) { 74101e04c3fSmrg struct pipe_surface *psurf = job->cbufs[i]; 74201e04c3fSmrg if (!psurf) 74301e04c3fSmrg continue; 74401e04c3fSmrg struct v3d_surface *surf = v3d_surface(psurf); 74501e04c3fSmrg struct v3d_resource *rsc = v3d_resource(psurf->texture); 74601e04c3fSmrg 7477ec681f3Smrg UNUSED uint32_t config_pad = 0; 74801e04c3fSmrg uint32_t clear_pad = 0; 74901e04c3fSmrg 75001e04c3fSmrg /* XXX: Set the pad for raster. */ 7517ec681f3Smrg if (surf->tiling == V3D_TILING_UIF_NO_XOR || 7527ec681f3Smrg surf->tiling == V3D_TILING_UIF_XOR) { 75301e04c3fSmrg int uif_block_height = v3d_utile_height(rsc->cpp) * 2; 75401e04c3fSmrg uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) / 75501e04c3fSmrg uif_block_height); 75601e04c3fSmrg if (surf->padded_height_of_output_image_in_uif_blocks - 75701e04c3fSmrg implicit_padded_height < 15) { 75801e04c3fSmrg config_pad = (surf->padded_height_of_output_image_in_uif_blocks - 75901e04c3fSmrg implicit_padded_height); 76001e04c3fSmrg } else { 76101e04c3fSmrg config_pad = 15; 76201e04c3fSmrg clear_pad = surf->padded_height_of_output_image_in_uif_blocks; 76301e04c3fSmrg } 76401e04c3fSmrg } 76501e04c3fSmrg 76601e04c3fSmrg#if V3D_VERSION < 40 76701e04c3fSmrg cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { 76801e04c3fSmrg rt.address = cl_address(rsc->bo, surf->offset); 76901e04c3fSmrg rt.internal_type = surf->internal_type; 77001e04c3fSmrg rt.output_image_format = surf->format; 77101e04c3fSmrg rt.memory_format = surf->tiling; 77201e04c3fSmrg rt.internal_bpp = surf->internal_bpp; 77301e04c3fSmrg rt.render_target_number = i; 77401e04c3fSmrg rt.pad = config_pad; 77501e04c3fSmrg 77601e04c3fSmrg if (job->store & PIPE_CLEAR_COLOR0 << i) 77701e04c3fSmrg rsc->writes++; 77801e04c3fSmrg } 77901e04c3fSmrg#endif /* V3D_VERSION < 40 */ 78001e04c3fSmrg 78101e04c3fSmrg cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, 78201e04c3fSmrg clear) { 78301e04c3fSmrg clear.clear_color_low_32_bits = job->clear_color[i][0]; 78401e04c3fSmrg clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff; 78501e04c3fSmrg clear.render_target_number = i; 78601e04c3fSmrg }; 78701e04c3fSmrg 78801e04c3fSmrg if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) { 78901e04c3fSmrg cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, 79001e04c3fSmrg clear) { 79101e04c3fSmrg clear.clear_color_mid_low_32_bits = 79201e04c3fSmrg ((job->clear_color[i][1] >> 24) | 79301e04c3fSmrg (job->clear_color[i][2] << 8)); 79401e04c3fSmrg clear.clear_color_mid_high_24_bits = 79501e04c3fSmrg ((job->clear_color[i][2] >> 24) | 79601e04c3fSmrg ((job->clear_color[i][3] & 0xffff) << 8)); 79701e04c3fSmrg clear.render_target_number = i; 79801e04c3fSmrg }; 79901e04c3fSmrg } 80001e04c3fSmrg 80101e04c3fSmrg if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { 80201e04c3fSmrg cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, 80301e04c3fSmrg clear) { 80401e04c3fSmrg clear.uif_padded_height_in_uif_blocks = clear_pad; 80501e04c3fSmrg clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16; 80601e04c3fSmrg clear.render_target_number = i; 80701e04c3fSmrg }; 80801e04c3fSmrg } 80901e04c3fSmrg } 81001e04c3fSmrg 81101e04c3fSmrg#if V3D_VERSION >= 40 81201e04c3fSmrg cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { 81301e04c3fSmrg v3d_setup_render_target(job, 0, 81401e04c3fSmrg &rt.render_target_0_internal_bpp, 81501e04c3fSmrg &rt.render_target_0_internal_type, 81601e04c3fSmrg &rt.render_target_0_clamp); 81701e04c3fSmrg v3d_setup_render_target(job, 1, 81801e04c3fSmrg &rt.render_target_1_internal_bpp, 81901e04c3fSmrg &rt.render_target_1_internal_type, 82001e04c3fSmrg &rt.render_target_1_clamp); 82101e04c3fSmrg v3d_setup_render_target(job, 2, 82201e04c3fSmrg &rt.render_target_2_internal_bpp, 82301e04c3fSmrg &rt.render_target_2_internal_type, 82401e04c3fSmrg &rt.render_target_2_clamp); 82501e04c3fSmrg v3d_setup_render_target(job, 3, 82601e04c3fSmrg &rt.render_target_3_internal_bpp, 82701e04c3fSmrg &rt.render_target_3_internal_type, 82801e04c3fSmrg &rt.render_target_3_clamp); 82901e04c3fSmrg } 83001e04c3fSmrg#endif 83101e04c3fSmrg 83201e04c3fSmrg#if V3D_VERSION < 40 83301e04c3fSmrg /* TODO: Don't bother emitting if we don't load/clear Z/S. */ 83401e04c3fSmrg if (job->zsbuf) { 83501e04c3fSmrg struct pipe_surface *psurf = job->zsbuf; 83601e04c3fSmrg struct v3d_surface *surf = v3d_surface(psurf); 83701e04c3fSmrg struct v3d_resource *rsc = v3d_resource(psurf->texture); 83801e04c3fSmrg 83901e04c3fSmrg v3d_emit_z_stencil_config(job, surf, rsc, false); 84001e04c3fSmrg 84101e04c3fSmrg /* Emit the separate stencil packet if we have a resource for 84201e04c3fSmrg * it. The HW will only load/store this buffer if the 84301e04c3fSmrg * Z/Stencil config doesn't have stencil in its format. 84401e04c3fSmrg */ 84501e04c3fSmrg if (surf->separate_stencil) { 84601e04c3fSmrg v3d_emit_z_stencil_config(job, 84701e04c3fSmrg v3d_surface(surf->separate_stencil), 84801e04c3fSmrg rsc->separate_stencil, true); 84901e04c3fSmrg } 85001e04c3fSmrg } 85101e04c3fSmrg#endif /* V3D_VERSION < 40 */ 85201e04c3fSmrg 85301e04c3fSmrg /* Ends rendering mode config. */ 85401e04c3fSmrg cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, 85501e04c3fSmrg clear) { 85601e04c3fSmrg clear.z_clear_value = job->clear_z; 85701e04c3fSmrg clear.stencil_clear_value = job->clear_s; 85801e04c3fSmrg }; 85901e04c3fSmrg 86001e04c3fSmrg /* Always set initial block size before the first branch, which needs 86101e04c3fSmrg * to match the value from binning mode config. 86201e04c3fSmrg */ 86301e04c3fSmrg cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { 86401e04c3fSmrg init.use_auto_chained_tile_lists = true; 86501e04c3fSmrg init.size_of_first_block_in_chained_tile_lists = 86601e04c3fSmrg TILE_ALLOCATION_BLOCK_SIZE_64B; 86701e04c3fSmrg } 86801e04c3fSmrg 8697ec681f3Smrg /* ARB_framebuffer_no_attachments allows rendering to happen even when 8707ec681f3Smrg * the framebuffer has no attachments, the idea being that fragment 8717ec681f3Smrg * shaders can still do image load/store, ssbo, etc without having to 8727ec681f3Smrg * write to actual attachments, so always run at least one iteration 8737ec681f3Smrg * of the loop. 8749f464c52Smaya */ 8757ec681f3Smrg assert(job->num_layers > 0 || (job->load == 0 && job->store == 0)); 8767ec681f3Smrg for (int layer = 0; layer < MAX2(1, job->num_layers); layer++) 8777ec681f3Smrg emit_render_layer(job, layer); 8789f464c52Smaya 87901e04c3fSmrg cl_emit(&job->rcl, END_OF_RENDERING, end); 88001e04c3fSmrg} 881