17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2021 Raspberry Pi 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "v3dv_private.h" 257ec681f3Smrg#include "broadcom/common/v3d_macros.h" 267ec681f3Smrg#include "broadcom/cle/v3dx_pack.h" 277ec681f3Smrg#include "broadcom/compiler/v3d_compiler.h" 287ec681f3Smrg 297ec681f3Smrg#include "util/half_float.h" 307ec681f3Smrg#include "vulkan/util/vk_format.h" 317ec681f3Smrg#include "util/u_pack_color.h" 327ec681f3Smrg 337ec681f3Smrg#include "vk_format_info.h" 347ec681f3Smrg 357ec681f3Smrgvoid 367ec681f3Smrgv3dX(job_emit_binning_flush)(struct v3dv_job *job) 377ec681f3Smrg{ 387ec681f3Smrg assert(job); 397ec681f3Smrg 407ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH)); 417ec681f3Smrg v3dv_return_if_oom(NULL, job); 427ec681f3Smrg 437ec681f3Smrg cl_emit(&job->bcl, FLUSH, flush); 447ec681f3Smrg} 457ec681f3Smrg 467ec681f3Smrgvoid 477ec681f3Smrgv3dX(job_emit_binning_prolog)(struct v3dv_job *job, 487ec681f3Smrg const struct v3dv_frame_tiling *tiling, 497ec681f3Smrg uint32_t layers) 507ec681f3Smrg{ 517ec681f3Smrg /* This must go before the binning mode configuration. It is 527ec681f3Smrg * required for layered framebuffers to work. 537ec681f3Smrg */ 547ec681f3Smrg cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) { 557ec681f3Smrg config.number_of_layers = layers; 567ec681f3Smrg } 577ec681f3Smrg 587ec681f3Smrg cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { 597ec681f3Smrg config.width_in_pixels = tiling->width; 607ec681f3Smrg config.height_in_pixels = tiling->height; 617ec681f3Smrg config.number_of_render_targets = MAX2(tiling->render_target_count, 1); 627ec681f3Smrg config.multisample_mode_4x = tiling->msaa; 637ec681f3Smrg config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; 647ec681f3Smrg } 657ec681f3Smrg 667ec681f3Smrg /* There's definitely nothing in the VCD cache we want. */ 677ec681f3Smrg cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); 687ec681f3Smrg 697ec681f3Smrg /* "Binning mode lists must have a Start Tile Binning item (6) after 707ec681f3Smrg * any prefix state data before the binning list proper starts." 717ec681f3Smrg */ 727ec681f3Smrg cl_emit(&job->bcl, START_TILE_BINNING, bin); 737ec681f3Smrg} 747ec681f3Smrg 757ec681f3Smrgvoid 767ec681f3Smrgv3dX(cmd_buffer_end_render_pass_secondary)(struct v3dv_cmd_buffer *cmd_buffer) 777ec681f3Smrg{ 787ec681f3Smrg assert(cmd_buffer->state.job); 797ec681f3Smrg v3dv_cl_ensure_space_with_branch(&cmd_buffer->state.job->bcl, 807ec681f3Smrg cl_packet_length(RETURN_FROM_SUB_LIST)); 817ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 827ec681f3Smrg cl_emit(&cmd_buffer->state.job->bcl, RETURN_FROM_SUB_LIST, ret); 837ec681f3Smrg} 847ec681f3Smrg 857ec681f3Smrgvoid 867ec681f3Smrgv3dX(job_emit_clip_window)(struct v3dv_job *job, const VkRect2D *rect) 877ec681f3Smrg{ 887ec681f3Smrg assert(job); 897ec681f3Smrg 907ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CLIP_WINDOW)); 917ec681f3Smrg v3dv_return_if_oom(NULL, job); 927ec681f3Smrg 937ec681f3Smrg cl_emit(&job->bcl, CLIP_WINDOW, clip) { 947ec681f3Smrg clip.clip_window_left_pixel_coordinate = rect->offset.x; 957ec681f3Smrg clip.clip_window_bottom_pixel_coordinate = rect->offset.y; 967ec681f3Smrg clip.clip_window_width_in_pixels = rect->extent.width; 977ec681f3Smrg clip.clip_window_height_in_pixels = rect->extent.height; 987ec681f3Smrg } 997ec681f3Smrg} 1007ec681f3Smrg 1017ec681f3Smrgstatic void 1027ec681f3Smrgcmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer, 1037ec681f3Smrg struct v3dv_cl *cl, 1047ec681f3Smrg struct v3dv_image_view *iview, 1057ec681f3Smrg uint32_t layer, 1067ec681f3Smrg uint32_t buffer) 1077ec681f3Smrg{ 1087ec681f3Smrg const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; 1097ec681f3Smrg const struct v3d_resource_slice *slice = 1107ec681f3Smrg &image->slices[iview->vk.base_mip_level]; 1117ec681f3Smrg uint32_t layer_offset = 1127ec681f3Smrg v3dv_layer_offset(image, iview->vk.base_mip_level, 1137ec681f3Smrg iview->vk.base_array_layer + layer); 1147ec681f3Smrg 1157ec681f3Smrg cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { 1167ec681f3Smrg load.buffer_to_load = buffer; 1177ec681f3Smrg load.address = v3dv_cl_address(image->mem->bo, layer_offset); 1187ec681f3Smrg 1197ec681f3Smrg load.input_image_format = iview->format->rt_type; 1207ec681f3Smrg load.r_b_swap = iview->swap_rb; 1217ec681f3Smrg load.memory_format = slice->tiling; 1227ec681f3Smrg 1237ec681f3Smrg if (slice->tiling == V3D_TILING_UIF_NO_XOR || 1247ec681f3Smrg slice->tiling == V3D_TILING_UIF_XOR) { 1257ec681f3Smrg load.height_in_ub_or_stride = 1267ec681f3Smrg slice->padded_height_of_output_image_in_uif_blocks; 1277ec681f3Smrg } else if (slice->tiling == V3D_TILING_RASTER) { 1287ec681f3Smrg load.height_in_ub_or_stride = slice->stride; 1297ec681f3Smrg } 1307ec681f3Smrg 1317ec681f3Smrg if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) 1327ec681f3Smrg load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 1337ec681f3Smrg else 1347ec681f3Smrg load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 1357ec681f3Smrg } 1367ec681f3Smrg} 1377ec681f3Smrg 1387ec681f3Smrgstatic bool 1397ec681f3Smrgcheck_needs_load(const struct v3dv_cmd_buffer_state *state, 1407ec681f3Smrg VkImageAspectFlags aspect, 1417ec681f3Smrg uint32_t first_subpass_idx, 1427ec681f3Smrg VkAttachmentLoadOp load_op) 1437ec681f3Smrg{ 1447ec681f3Smrg /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are 1457ec681f3Smrg * testing does not exist in the image. 1467ec681f3Smrg */ 1477ec681f3Smrg if (!aspect) 1487ec681f3Smrg return false; 1497ec681f3Smrg 1507ec681f3Smrg /* Attachment (or view) load operations apply on the first subpass that 1517ec681f3Smrg * uses the attachment (or view), otherwise we always need to load. 1527ec681f3Smrg */ 1537ec681f3Smrg if (state->job->first_subpass > first_subpass_idx) 1547ec681f3Smrg return true; 1557ec681f3Smrg 1567ec681f3Smrg /* If the job is continuing a subpass started in another job, we always 1577ec681f3Smrg * need to load. 1587ec681f3Smrg */ 1597ec681f3Smrg if (state->job->is_subpass_continue) 1607ec681f3Smrg return true; 1617ec681f3Smrg 1627ec681f3Smrg /* If the area is not aligned to tile boundaries, we always need to load */ 1637ec681f3Smrg if (!state->tile_aligned_render_area) 1647ec681f3Smrg return true; 1657ec681f3Smrg 1667ec681f3Smrg /* The attachment load operations must be LOAD */ 1677ec681f3Smrg return load_op == VK_ATTACHMENT_LOAD_OP_LOAD; 1687ec681f3Smrg} 1697ec681f3Smrg 1707ec681f3Smrgstatic inline uint32_t 1717ec681f3Smrgv3dv_zs_buffer(bool depth, bool stencil) 1727ec681f3Smrg{ 1737ec681f3Smrg if (depth && stencil) 1747ec681f3Smrg return ZSTENCIL; 1757ec681f3Smrg else if (depth) 1767ec681f3Smrg return Z; 1777ec681f3Smrg else if (stencil) 1787ec681f3Smrg return STENCIL; 1797ec681f3Smrg return NONE; 1807ec681f3Smrg} 1817ec681f3Smrg 1827ec681f3Smrgstatic void 1837ec681f3Smrgcmd_buffer_render_pass_emit_loads(struct v3dv_cmd_buffer *cmd_buffer, 1847ec681f3Smrg struct v3dv_cl *cl, 1857ec681f3Smrg uint32_t layer) 1867ec681f3Smrg{ 1877ec681f3Smrg const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1887ec681f3Smrg const struct v3dv_framebuffer *framebuffer = state->framebuffer; 1897ec681f3Smrg const struct v3dv_render_pass *pass = state->pass; 1907ec681f3Smrg const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; 1917ec681f3Smrg 1927ec681f3Smrg assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT); 1937ec681f3Smrg 1947ec681f3Smrg for (uint32_t i = 0; i < subpass->color_count; i++) { 1957ec681f3Smrg uint32_t attachment_idx = subpass->color_attachments[i].attachment; 1967ec681f3Smrg 1977ec681f3Smrg if (attachment_idx == VK_ATTACHMENT_UNUSED) 1987ec681f3Smrg continue; 1997ec681f3Smrg 2007ec681f3Smrg const struct v3dv_render_pass_attachment *attachment = 2017ec681f3Smrg &state->pass->attachments[attachment_idx]; 2027ec681f3Smrg 2037ec681f3Smrg /* According to the Vulkan spec: 2047ec681f3Smrg * 2057ec681f3Smrg * "The load operation for each sample in an attachment happens before 2067ec681f3Smrg * any recorded command which accesses the sample in the first subpass 2077ec681f3Smrg * where the attachment is used." 2087ec681f3Smrg * 2097ec681f3Smrg * If the load operation is CLEAR, we must only clear once on the first 2107ec681f3Smrg * subpass that uses the attachment (and in that case we don't LOAD). 2117ec681f3Smrg * After that, we always want to load so we don't lose any rendering done 2127ec681f3Smrg * by a previous subpass to the same attachment. We also want to load 2137ec681f3Smrg * if the current job is continuing subpass work started by a previous 2147ec681f3Smrg * job, for the same reason. 2157ec681f3Smrg * 2167ec681f3Smrg * If the render area is not aligned to tile boundaries then we have 2177ec681f3Smrg * tiles which are partially covered by it. In this case, we need to 2187ec681f3Smrg * load the tiles so we can preserve the pixels that are outside the 2197ec681f3Smrg * render area for any such tiles. 2207ec681f3Smrg */ 2217ec681f3Smrg uint32_t first_subpass = !pass->multiview_enabled ? 2227ec681f3Smrg attachment->first_subpass : 2237ec681f3Smrg attachment->views[layer].first_subpass; 2247ec681f3Smrg 2257ec681f3Smrg bool needs_load = check_needs_load(state, 2267ec681f3Smrg VK_IMAGE_ASPECT_COLOR_BIT, 2277ec681f3Smrg first_subpass, 2287ec681f3Smrg attachment->desc.loadOp); 2297ec681f3Smrg if (needs_load) { 2307ec681f3Smrg struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx]; 2317ec681f3Smrg cmd_buffer_render_pass_emit_load(cmd_buffer, cl, iview, 2327ec681f3Smrg layer, RENDER_TARGET_0 + i); 2337ec681f3Smrg } 2347ec681f3Smrg } 2357ec681f3Smrg 2367ec681f3Smrg uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; 2377ec681f3Smrg if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 2387ec681f3Smrg const struct v3dv_render_pass_attachment *ds_attachment = 2397ec681f3Smrg &state->pass->attachments[ds_attachment_idx]; 2407ec681f3Smrg 2417ec681f3Smrg const VkImageAspectFlags ds_aspects = 2427ec681f3Smrg vk_format_aspects(ds_attachment->desc.format); 2437ec681f3Smrg 2447ec681f3Smrg uint32_t ds_first_subpass = !pass->multiview_enabled ? 2457ec681f3Smrg ds_attachment->first_subpass : 2467ec681f3Smrg ds_attachment->views[layer].first_subpass; 2477ec681f3Smrg 2487ec681f3Smrg const bool needs_depth_load = 2497ec681f3Smrg check_needs_load(state, 2507ec681f3Smrg ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 2517ec681f3Smrg ds_first_subpass, 2527ec681f3Smrg ds_attachment->desc.loadOp); 2537ec681f3Smrg 2547ec681f3Smrg const bool needs_stencil_load = 2557ec681f3Smrg check_needs_load(state, 2567ec681f3Smrg ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 2577ec681f3Smrg ds_first_subpass, 2587ec681f3Smrg ds_attachment->desc.stencilLoadOp); 2597ec681f3Smrg 2607ec681f3Smrg if (needs_depth_load || needs_stencil_load) { 2617ec681f3Smrg struct v3dv_image_view *iview = 2627ec681f3Smrg framebuffer->attachments[ds_attachment_idx]; 2637ec681f3Smrg /* From the Vulkan spec: 2647ec681f3Smrg * 2657ec681f3Smrg * "When an image view of a depth/stencil image is used as a 2667ec681f3Smrg * depth/stencil framebuffer attachment, the aspectMask is ignored 2677ec681f3Smrg * and both depth and stencil image subresources are used." 2687ec681f3Smrg * 2697ec681f3Smrg * So we ignore the aspects from the subresource range of the image 2707ec681f3Smrg * view for the depth/stencil attachment, but we still need to restrict 2717ec681f3Smrg * the to aspects compatible with the render pass and the image. 2727ec681f3Smrg */ 2737ec681f3Smrg const uint32_t zs_buffer = 2747ec681f3Smrg v3dv_zs_buffer(needs_depth_load, needs_stencil_load); 2757ec681f3Smrg cmd_buffer_render_pass_emit_load(cmd_buffer, cl, 2767ec681f3Smrg iview, layer, zs_buffer); 2777ec681f3Smrg } 2787ec681f3Smrg } 2797ec681f3Smrg 2807ec681f3Smrg cl_emit(cl, END_OF_LOADS, end); 2817ec681f3Smrg} 2827ec681f3Smrg 2837ec681f3Smrgstatic void 2847ec681f3Smrgcmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer, 2857ec681f3Smrg struct v3dv_cl *cl, 2867ec681f3Smrg uint32_t attachment_idx, 2877ec681f3Smrg uint32_t layer, 2887ec681f3Smrg uint32_t buffer, 2897ec681f3Smrg bool clear, 2907ec681f3Smrg bool is_multisample_resolve) 2917ec681f3Smrg{ 2927ec681f3Smrg const struct v3dv_image_view *iview = 2937ec681f3Smrg cmd_buffer->state.framebuffer->attachments[attachment_idx]; 2947ec681f3Smrg const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; 2957ec681f3Smrg const struct v3d_resource_slice *slice = 2967ec681f3Smrg &image->slices[iview->vk.base_mip_level]; 2977ec681f3Smrg uint32_t layer_offset = v3dv_layer_offset(image, 2987ec681f3Smrg iview->vk.base_mip_level, 2997ec681f3Smrg iview->vk.base_array_layer + layer); 3007ec681f3Smrg 3017ec681f3Smrg cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 3027ec681f3Smrg store.buffer_to_store = buffer; 3037ec681f3Smrg store.address = v3dv_cl_address(image->mem->bo, layer_offset); 3047ec681f3Smrg store.clear_buffer_being_stored = clear; 3057ec681f3Smrg 3067ec681f3Smrg store.output_image_format = iview->format->rt_type; 3077ec681f3Smrg store.r_b_swap = iview->swap_rb; 3087ec681f3Smrg store.memory_format = slice->tiling; 3097ec681f3Smrg 3107ec681f3Smrg if (slice->tiling == V3D_TILING_UIF_NO_XOR || 3117ec681f3Smrg slice->tiling == V3D_TILING_UIF_XOR) { 3127ec681f3Smrg store.height_in_ub_or_stride = 3137ec681f3Smrg slice->padded_height_of_output_image_in_uif_blocks; 3147ec681f3Smrg } else if (slice->tiling == V3D_TILING_RASTER) { 3157ec681f3Smrg store.height_in_ub_or_stride = slice->stride; 3167ec681f3Smrg } 3177ec681f3Smrg 3187ec681f3Smrg if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) 3197ec681f3Smrg store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 3207ec681f3Smrg else if (is_multisample_resolve) 3217ec681f3Smrg store.decimate_mode = V3D_DECIMATE_MODE_4X; 3227ec681f3Smrg else 3237ec681f3Smrg store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 3247ec681f3Smrg } 3257ec681f3Smrg} 3267ec681f3Smrg 3277ec681f3Smrgstatic bool 3287ec681f3Smrgcheck_needs_clear(const struct v3dv_cmd_buffer_state *state, 3297ec681f3Smrg VkImageAspectFlags aspect, 3307ec681f3Smrg uint32_t first_subpass_idx, 3317ec681f3Smrg VkAttachmentLoadOp load_op, 3327ec681f3Smrg bool do_clear_with_draw) 3337ec681f3Smrg{ 3347ec681f3Smrg /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are 3357ec681f3Smrg * testing does not exist in the image. 3367ec681f3Smrg */ 3377ec681f3Smrg if (!aspect) 3387ec681f3Smrg return false; 3397ec681f3Smrg 3407ec681f3Smrg /* If the aspect needs to be cleared with a draw call then we won't emit 3417ec681f3Smrg * the clear here. 3427ec681f3Smrg */ 3437ec681f3Smrg if (do_clear_with_draw) 3447ec681f3Smrg return false; 3457ec681f3Smrg 3467ec681f3Smrg /* If this is resuming a subpass started with another job, then attachment 3477ec681f3Smrg * load operations don't apply. 3487ec681f3Smrg */ 3497ec681f3Smrg if (state->job->is_subpass_continue) 3507ec681f3Smrg return false; 3517ec681f3Smrg 3527ec681f3Smrg /* If the render area is not aligned to tile boudaries we can't use the 3537ec681f3Smrg * TLB for a clear. 3547ec681f3Smrg */ 3557ec681f3Smrg if (!state->tile_aligned_render_area) 3567ec681f3Smrg return false; 3577ec681f3Smrg 3587ec681f3Smrg /* If this job is running in a subpass other than the first subpass in 3597ec681f3Smrg * which this attachment (or view) is used then attachment load operations 3607ec681f3Smrg * don't apply. 3617ec681f3Smrg */ 3627ec681f3Smrg if (state->job->first_subpass != first_subpass_idx) 3637ec681f3Smrg return false; 3647ec681f3Smrg 3657ec681f3Smrg /* The attachment load operation must be CLEAR */ 3667ec681f3Smrg return load_op == VK_ATTACHMENT_LOAD_OP_CLEAR; 3677ec681f3Smrg} 3687ec681f3Smrg 3697ec681f3Smrgstatic bool 3707ec681f3Smrgcheck_needs_store(const struct v3dv_cmd_buffer_state *state, 3717ec681f3Smrg VkImageAspectFlags aspect, 3727ec681f3Smrg uint32_t last_subpass_idx, 3737ec681f3Smrg VkAttachmentStoreOp store_op) 3747ec681f3Smrg{ 3757ec681f3Smrg /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are 3767ec681f3Smrg * testing does not exist in the image. 3777ec681f3Smrg */ 3787ec681f3Smrg if (!aspect) 3797ec681f3Smrg return false; 3807ec681f3Smrg 3817ec681f3Smrg /* Attachment (or view) store operations only apply on the last subpass 3827ec681f3Smrg * where the attachment (or view) is used, in other subpasses we always 3837ec681f3Smrg * need to store. 3847ec681f3Smrg */ 3857ec681f3Smrg if (state->subpass_idx < last_subpass_idx) 3867ec681f3Smrg return true; 3877ec681f3Smrg 3887ec681f3Smrg /* Attachment store operations only apply on the last job we emit on the the 3897ec681f3Smrg * last subpass where the attachment is used, otherwise we always need to 3907ec681f3Smrg * store. 3917ec681f3Smrg */ 3927ec681f3Smrg if (!state->job->is_subpass_finish) 3937ec681f3Smrg return true; 3947ec681f3Smrg 3957ec681f3Smrg /* The attachment store operation must be STORE */ 3967ec681f3Smrg return store_op == VK_ATTACHMENT_STORE_OP_STORE; 3977ec681f3Smrg} 3987ec681f3Smrg 3997ec681f3Smrgstatic void 4007ec681f3Smrgcmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer, 4017ec681f3Smrg struct v3dv_cl *cl, 4027ec681f3Smrg uint32_t layer) 4037ec681f3Smrg{ 4047ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 4057ec681f3Smrg struct v3dv_render_pass *pass = state->pass; 4067ec681f3Smrg const struct v3dv_subpass *subpass = 4077ec681f3Smrg &pass->subpasses[state->subpass_idx]; 4087ec681f3Smrg 4097ec681f3Smrg bool has_stores = false; 4107ec681f3Smrg bool use_global_zs_clear = false; 4117ec681f3Smrg bool use_global_rt_clear = false; 4127ec681f3Smrg 4137ec681f3Smrg assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT); 4147ec681f3Smrg 4157ec681f3Smrg /* FIXME: separate stencil */ 4167ec681f3Smrg uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; 4177ec681f3Smrg if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 4187ec681f3Smrg const struct v3dv_render_pass_attachment *ds_attachment = 4197ec681f3Smrg &state->pass->attachments[ds_attachment_idx]; 4207ec681f3Smrg 4217ec681f3Smrg assert(state->job->first_subpass >= ds_attachment->first_subpass); 4227ec681f3Smrg assert(state->subpass_idx >= ds_attachment->first_subpass); 4237ec681f3Smrg assert(state->subpass_idx <= ds_attachment->last_subpass); 4247ec681f3Smrg 4257ec681f3Smrg /* From the Vulkan spec, VkImageSubresourceRange: 4267ec681f3Smrg * 4277ec681f3Smrg * "When an image view of a depth/stencil image is used as a 4287ec681f3Smrg * depth/stencil framebuffer attachment, the aspectMask is ignored 4297ec681f3Smrg * and both depth and stencil image subresources are used." 4307ec681f3Smrg * 4317ec681f3Smrg * So we ignore the aspects from the subresource range of the image 4327ec681f3Smrg * view for the depth/stencil attachment, but we still need to restrict 4337ec681f3Smrg * the to aspects compatible with the render pass and the image. 4347ec681f3Smrg */ 4357ec681f3Smrg const VkImageAspectFlags aspects = 4367ec681f3Smrg vk_format_aspects(ds_attachment->desc.format); 4377ec681f3Smrg 4387ec681f3Smrg /* Only clear once on the first subpass that uses the attachment */ 4397ec681f3Smrg uint32_t ds_first_subpass = !state->pass->multiview_enabled ? 4407ec681f3Smrg ds_attachment->first_subpass : 4417ec681f3Smrg ds_attachment->views[layer].first_subpass; 4427ec681f3Smrg 4437ec681f3Smrg bool needs_depth_clear = 4447ec681f3Smrg check_needs_clear(state, 4457ec681f3Smrg aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 4467ec681f3Smrg ds_first_subpass, 4477ec681f3Smrg ds_attachment->desc.loadOp, 4487ec681f3Smrg subpass->do_depth_clear_with_draw); 4497ec681f3Smrg 4507ec681f3Smrg bool needs_stencil_clear = 4517ec681f3Smrg check_needs_clear(state, 4527ec681f3Smrg aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 4537ec681f3Smrg ds_first_subpass, 4547ec681f3Smrg ds_attachment->desc.stencilLoadOp, 4557ec681f3Smrg subpass->do_stencil_clear_with_draw); 4567ec681f3Smrg 4577ec681f3Smrg /* Skip the last store if it is not required */ 4587ec681f3Smrg uint32_t ds_last_subpass = !pass->multiview_enabled ? 4597ec681f3Smrg ds_attachment->last_subpass : 4607ec681f3Smrg ds_attachment->views[layer].last_subpass; 4617ec681f3Smrg 4627ec681f3Smrg bool needs_depth_store = 4637ec681f3Smrg check_needs_store(state, 4647ec681f3Smrg aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 4657ec681f3Smrg ds_last_subpass, 4667ec681f3Smrg ds_attachment->desc.storeOp); 4677ec681f3Smrg 4687ec681f3Smrg bool needs_stencil_store = 4697ec681f3Smrg check_needs_store(state, 4707ec681f3Smrg aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 4717ec681f3Smrg ds_last_subpass, 4727ec681f3Smrg ds_attachment->desc.stencilStoreOp); 4737ec681f3Smrg 4747ec681f3Smrg /* GFXH-1689: The per-buffer store command's clear buffer bit is broken 4757ec681f3Smrg * for depth/stencil. 4767ec681f3Smrg * 4777ec681f3Smrg * There used to be some confusion regarding the Clear Tile Buffers 4787ec681f3Smrg * Z/S bit also being broken, but we confirmed with Broadcom that this 4797ec681f3Smrg * is not the case, it was just that some other hardware bugs (that we 4807ec681f3Smrg * need to work around, such as GFXH-1461) could cause this bit to behave 4817ec681f3Smrg * incorrectly. 4827ec681f3Smrg * 4837ec681f3Smrg * There used to be another issue where the RTs bit in the Clear Tile 4847ec681f3Smrg * Buffers packet also cleared Z/S, but Broadcom confirmed this is 4857ec681f3Smrg * fixed since V3D 4.1. 4867ec681f3Smrg * 4877ec681f3Smrg * So if we have to emit a clear of depth or stencil we don't use 4887ec681f3Smrg * the per-buffer store clear bit, even if we need to store the buffers, 4897ec681f3Smrg * instead we always have to use the Clear Tile Buffers Z/S bit. 4907ec681f3Smrg * If we have configured the job to do early Z/S clearing, then we 4917ec681f3Smrg * don't want to emit any Clear Tile Buffers command at all here. 4927ec681f3Smrg * 4937ec681f3Smrg * Note that GFXH-1689 is not reproduced in the simulator, where 4947ec681f3Smrg * using the clear buffer bit in depth/stencil stores works fine. 4957ec681f3Smrg */ 4967ec681f3Smrg use_global_zs_clear = !state->job->early_zs_clear && 4977ec681f3Smrg (needs_depth_clear || needs_stencil_clear); 4987ec681f3Smrg if (needs_depth_store || needs_stencil_store) { 4997ec681f3Smrg const uint32_t zs_buffer = 5007ec681f3Smrg v3dv_zs_buffer(needs_depth_store, needs_stencil_store); 5017ec681f3Smrg cmd_buffer_render_pass_emit_store(cmd_buffer, cl, 5027ec681f3Smrg ds_attachment_idx, layer, 5037ec681f3Smrg zs_buffer, false, false); 5047ec681f3Smrg has_stores = true; 5057ec681f3Smrg } 5067ec681f3Smrg } 5077ec681f3Smrg 5087ec681f3Smrg for (uint32_t i = 0; i < subpass->color_count; i++) { 5097ec681f3Smrg uint32_t attachment_idx = subpass->color_attachments[i].attachment; 5107ec681f3Smrg 5117ec681f3Smrg if (attachment_idx == VK_ATTACHMENT_UNUSED) 5127ec681f3Smrg continue; 5137ec681f3Smrg 5147ec681f3Smrg const struct v3dv_render_pass_attachment *attachment = 5157ec681f3Smrg &state->pass->attachments[attachment_idx]; 5167ec681f3Smrg 5177ec681f3Smrg assert(state->job->first_subpass >= attachment->first_subpass); 5187ec681f3Smrg assert(state->subpass_idx >= attachment->first_subpass); 5197ec681f3Smrg assert(state->subpass_idx <= attachment->last_subpass); 5207ec681f3Smrg 5217ec681f3Smrg /* Only clear once on the first subpass that uses the attachment */ 5227ec681f3Smrg uint32_t first_subpass = !pass->multiview_enabled ? 5237ec681f3Smrg attachment->first_subpass : 5247ec681f3Smrg attachment->views[layer].first_subpass; 5257ec681f3Smrg 5267ec681f3Smrg bool needs_clear = 5277ec681f3Smrg check_needs_clear(state, 5287ec681f3Smrg VK_IMAGE_ASPECT_COLOR_BIT, 5297ec681f3Smrg first_subpass, 5307ec681f3Smrg attachment->desc.loadOp, 5317ec681f3Smrg false); 5327ec681f3Smrg 5337ec681f3Smrg /* Skip the last store if it is not required */ 5347ec681f3Smrg uint32_t last_subpass = !pass->multiview_enabled ? 5357ec681f3Smrg attachment->last_subpass : 5367ec681f3Smrg attachment->views[layer].last_subpass; 5377ec681f3Smrg 5387ec681f3Smrg bool needs_store = 5397ec681f3Smrg check_needs_store(state, 5407ec681f3Smrg VK_IMAGE_ASPECT_COLOR_BIT, 5417ec681f3Smrg last_subpass, 5427ec681f3Smrg attachment->desc.storeOp); 5437ec681f3Smrg 5447ec681f3Smrg /* If we need to resolve this attachment emit that store first. Notice 5457ec681f3Smrg * that we must not request a tile buffer clear here in that case, since 5467ec681f3Smrg * that would clear the tile buffer before we get to emit the actual 5477ec681f3Smrg * color attachment store below, since the clear happens after the 5487ec681f3Smrg * store is completed. 5497ec681f3Smrg * 5507ec681f3Smrg * If the attachment doesn't support TLB resolves then we will have to 5517ec681f3Smrg * fallback to doing the resolve in a shader separately after this 5527ec681f3Smrg * job, so we will need to store the multisampled sttachment even if that 5537ec681f3Smrg * wansn't requested by the client. 5547ec681f3Smrg */ 5557ec681f3Smrg const bool needs_resolve = 5567ec681f3Smrg subpass->resolve_attachments && 5577ec681f3Smrg subpass->resolve_attachments[i].attachment != VK_ATTACHMENT_UNUSED; 5587ec681f3Smrg if (needs_resolve && attachment->use_tlb_resolve) { 5597ec681f3Smrg const uint32_t resolve_attachment_idx = 5607ec681f3Smrg subpass->resolve_attachments[i].attachment; 5617ec681f3Smrg cmd_buffer_render_pass_emit_store(cmd_buffer, cl, 5627ec681f3Smrg resolve_attachment_idx, layer, 5637ec681f3Smrg RENDER_TARGET_0 + i, 5647ec681f3Smrg false, true); 5657ec681f3Smrg has_stores = true; 5667ec681f3Smrg } else if (needs_resolve) { 5677ec681f3Smrg needs_store = true; 5687ec681f3Smrg } 5697ec681f3Smrg 5707ec681f3Smrg /* Emit the color attachment store if needed */ 5717ec681f3Smrg if (needs_store) { 5727ec681f3Smrg cmd_buffer_render_pass_emit_store(cmd_buffer, cl, 5737ec681f3Smrg attachment_idx, layer, 5747ec681f3Smrg RENDER_TARGET_0 + i, 5757ec681f3Smrg needs_clear && !use_global_rt_clear, 5767ec681f3Smrg false); 5777ec681f3Smrg has_stores = true; 5787ec681f3Smrg } else if (needs_clear) { 5797ec681f3Smrg use_global_rt_clear = true; 5807ec681f3Smrg } 5817ec681f3Smrg } 5827ec681f3Smrg 5837ec681f3Smrg /* We always need to emit at least one dummy store */ 5847ec681f3Smrg if (!has_stores) { 5857ec681f3Smrg cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 5867ec681f3Smrg store.buffer_to_store = NONE; 5877ec681f3Smrg } 5887ec681f3Smrg } 5897ec681f3Smrg 5907ec681f3Smrg /* If we have any depth/stencil clears we can't use the per-buffer clear 5917ec681f3Smrg * bit and instead we have to emit a single clear of all tile buffers. 5927ec681f3Smrg */ 5937ec681f3Smrg if (use_global_zs_clear || use_global_rt_clear) { 5947ec681f3Smrg cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { 5957ec681f3Smrg clear.clear_z_stencil_buffer = use_global_zs_clear; 5967ec681f3Smrg clear.clear_all_render_targets = use_global_rt_clear; 5977ec681f3Smrg } 5987ec681f3Smrg } 5997ec681f3Smrg} 6007ec681f3Smrg 6017ec681f3Smrgstatic void 6027ec681f3Smrgcmd_buffer_render_pass_emit_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer, 6037ec681f3Smrg uint32_t layer) 6047ec681f3Smrg{ 6057ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 6067ec681f3Smrg assert(job); 6077ec681f3Smrg 6087ec681f3Smrg /* Emit the generic list in our indirect state -- the rcl will just 6097ec681f3Smrg * have pointers into it. 6107ec681f3Smrg */ 6117ec681f3Smrg struct v3dv_cl *cl = &job->indirect; 6127ec681f3Smrg v3dv_cl_ensure_space(cl, 200, 1); 6137ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 6147ec681f3Smrg 6157ec681f3Smrg struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 6167ec681f3Smrg 6177ec681f3Smrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 6187ec681f3Smrg 6197ec681f3Smrg cmd_buffer_render_pass_emit_loads(cmd_buffer, cl, layer); 6207ec681f3Smrg 6217ec681f3Smrg /* The binner starts out writing tiles assuming that the initial mode 6227ec681f3Smrg * is triangles, so make sure that's the case. 6237ec681f3Smrg */ 6247ec681f3Smrg cl_emit(cl, PRIM_LIST_FORMAT, fmt) { 6257ec681f3Smrg fmt.primitive_type = LIST_TRIANGLES; 6267ec681f3Smrg } 6277ec681f3Smrg 6287ec681f3Smrg /* PTB assumes that value to be 0, but hw will not set it. */ 6297ec681f3Smrg cl_emit(cl, SET_INSTANCEID, set) { 6307ec681f3Smrg set.instance_id = 0; 6317ec681f3Smrg } 6327ec681f3Smrg 6337ec681f3Smrg cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 6347ec681f3Smrg 6357ec681f3Smrg cmd_buffer_render_pass_emit_stores(cmd_buffer, cl, layer); 6367ec681f3Smrg 6377ec681f3Smrg cl_emit(cl, END_OF_TILE_MARKER, end); 6387ec681f3Smrg 6397ec681f3Smrg cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 6407ec681f3Smrg 6417ec681f3Smrg cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 6427ec681f3Smrg branch.start = tile_list_start; 6437ec681f3Smrg branch.end = v3dv_cl_get_address(cl); 6447ec681f3Smrg } 6457ec681f3Smrg} 6467ec681f3Smrg 6477ec681f3Smrgstatic void 6487ec681f3Smrgcmd_buffer_emit_render_pass_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer, 6497ec681f3Smrg uint32_t layer) 6507ec681f3Smrg{ 6517ec681f3Smrg const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 6527ec681f3Smrg 6537ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 6547ec681f3Smrg struct v3dv_cl *rcl = &job->rcl; 6557ec681f3Smrg 6567ec681f3Smrg /* If doing multicore binning, we would need to initialize each 6577ec681f3Smrg * core's tile list here. 6587ec681f3Smrg */ 6597ec681f3Smrg const struct v3dv_frame_tiling *tiling = &job->frame_tiling; 6607ec681f3Smrg const uint32_t tile_alloc_offset = 6617ec681f3Smrg 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y; 6627ec681f3Smrg cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { 6637ec681f3Smrg list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); 6647ec681f3Smrg } 6657ec681f3Smrg 6667ec681f3Smrg cmd_buffer_render_pass_emit_per_tile_rcl(cmd_buffer, layer); 6677ec681f3Smrg 6687ec681f3Smrg uint32_t supertile_w_in_pixels = 6697ec681f3Smrg tiling->tile_width * tiling->supertile_width; 6707ec681f3Smrg uint32_t supertile_h_in_pixels = 6717ec681f3Smrg tiling->tile_height * tiling->supertile_height; 6727ec681f3Smrg const uint32_t min_x_supertile = 6737ec681f3Smrg state->render_area.offset.x / supertile_w_in_pixels; 6747ec681f3Smrg const uint32_t min_y_supertile = 6757ec681f3Smrg state->render_area.offset.y / supertile_h_in_pixels; 6767ec681f3Smrg 6777ec681f3Smrg uint32_t max_render_x = state->render_area.offset.x; 6787ec681f3Smrg if (state->render_area.extent.width > 0) 6797ec681f3Smrg max_render_x += state->render_area.extent.width - 1; 6807ec681f3Smrg uint32_t max_render_y = state->render_area.offset.y; 6817ec681f3Smrg if (state->render_area.extent.height > 0) 6827ec681f3Smrg max_render_y += state->render_area.extent.height - 1; 6837ec681f3Smrg const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels; 6847ec681f3Smrg const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels; 6857ec681f3Smrg 6867ec681f3Smrg for (int y = min_y_supertile; y <= max_y_supertile; y++) { 6877ec681f3Smrg for (int x = min_x_supertile; x <= max_x_supertile; x++) { 6887ec681f3Smrg cl_emit(rcl, SUPERTILE_COORDINATES, coords) { 6897ec681f3Smrg coords.column_number_in_supertiles = x; 6907ec681f3Smrg coords.row_number_in_supertiles = y; 6917ec681f3Smrg } 6927ec681f3Smrg } 6937ec681f3Smrg } 6947ec681f3Smrg} 6957ec681f3Smrg 6967ec681f3Smrgstatic void 6977ec681f3Smrgset_rcl_early_z_config(struct v3dv_job *job, 6987ec681f3Smrg bool *early_z_disable, 6997ec681f3Smrg uint32_t *early_z_test_and_update_direction) 7007ec681f3Smrg{ 7017ec681f3Smrg /* If this is true then we have not emitted any draw calls in this job 7027ec681f3Smrg * and we don't get any benefits form early Z. 7037ec681f3Smrg */ 7047ec681f3Smrg if (!job->decided_global_ez_enable) { 7057ec681f3Smrg assert(job->draw_count == 0); 7067ec681f3Smrg *early_z_disable = true; 7077ec681f3Smrg return; 7087ec681f3Smrg } 7097ec681f3Smrg 7107ec681f3Smrg switch (job->first_ez_state) { 7117ec681f3Smrg case V3D_EZ_UNDECIDED: 7127ec681f3Smrg case V3D_EZ_LT_LE: 7137ec681f3Smrg *early_z_disable = false; 7147ec681f3Smrg *early_z_test_and_update_direction = EARLY_Z_DIRECTION_LT_LE; 7157ec681f3Smrg break; 7167ec681f3Smrg case V3D_EZ_GT_GE: 7177ec681f3Smrg *early_z_disable = false; 7187ec681f3Smrg *early_z_test_and_update_direction = EARLY_Z_DIRECTION_GT_GE; 7197ec681f3Smrg break; 7207ec681f3Smrg case V3D_EZ_DISABLED: 7217ec681f3Smrg *early_z_disable = true; 7227ec681f3Smrg break; 7237ec681f3Smrg } 7247ec681f3Smrg} 7257ec681f3Smrg 7267ec681f3Smrgvoid 7277ec681f3Smrgv3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) 7287ec681f3Smrg{ 7297ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 7307ec681f3Smrg assert(job); 7317ec681f3Smrg 7327ec681f3Smrg const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 7337ec681f3Smrg const struct v3dv_framebuffer *framebuffer = state->framebuffer; 7347ec681f3Smrg 7357ec681f3Smrg /* We can't emit the RCL until we have a framebuffer, which we may not have 7367ec681f3Smrg * if we are recording a secondary command buffer. In that case, we will 7377ec681f3Smrg * have to wait until vkCmdExecuteCommands is called from a primary command 7387ec681f3Smrg * buffer. 7397ec681f3Smrg */ 7407ec681f3Smrg if (!framebuffer) { 7417ec681f3Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 7427ec681f3Smrg return; 7437ec681f3Smrg } 7447ec681f3Smrg 7457ec681f3Smrg const struct v3dv_frame_tiling *tiling = &job->frame_tiling; 7467ec681f3Smrg 7477ec681f3Smrg const uint32_t fb_layers = job->frame_tiling.layers; 7487ec681f3Smrg 7497ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->rcl, 200 + 7507ec681f3Smrg MAX2(fb_layers, 1) * 256 * 7517ec681f3Smrg cl_packet_length(SUPERTILE_COORDINATES)); 7527ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 7537ec681f3Smrg 7547ec681f3Smrg assert(state->subpass_idx < state->pass->subpass_count); 7557ec681f3Smrg const struct v3dv_render_pass *pass = state->pass; 7567ec681f3Smrg const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; 7577ec681f3Smrg struct v3dv_cl *rcl = &job->rcl; 7587ec681f3Smrg 7597ec681f3Smrg /* Comon config must be the first TILE_RENDERING_MODE_CFG and 7607ec681f3Smrg * Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional 7617ec681f3Smrg * updates to the previous HW state. 7627ec681f3Smrg */ 7637ec681f3Smrg bool do_early_zs_clear = false; 7647ec681f3Smrg const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; 7657ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { 7667ec681f3Smrg config.image_width_pixels = framebuffer->width; 7677ec681f3Smrg config.image_height_pixels = framebuffer->height; 7687ec681f3Smrg config.number_of_render_targets = MAX2(subpass->color_count, 1); 7697ec681f3Smrg config.multisample_mode_4x = tiling->msaa; 7707ec681f3Smrg config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; 7717ec681f3Smrg 7727ec681f3Smrg if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 7737ec681f3Smrg const struct v3dv_image_view *iview = 7747ec681f3Smrg framebuffer->attachments[ds_attachment_idx]; 7757ec681f3Smrg config.internal_depth_type = iview->internal_type; 7767ec681f3Smrg 7777ec681f3Smrg set_rcl_early_z_config(job, 7787ec681f3Smrg &config.early_z_disable, 7797ec681f3Smrg &config.early_z_test_and_update_direction); 7807ec681f3Smrg 7817ec681f3Smrg /* Early-Z/S clear can be enabled if the job is clearing and not 7827ec681f3Smrg * storing (or loading) depth. If a stencil aspect is also present 7837ec681f3Smrg * we have the same requirements for it, however, in this case we 7847ec681f3Smrg * can accept stencil loadOp DONT_CARE as well, so instead of 7857ec681f3Smrg * checking that stencil is cleared we check that is not loaded. 7867ec681f3Smrg * 7877ec681f3Smrg * Early-Z/S clearing is independent of Early Z/S testing, so it is 7887ec681f3Smrg * possible to enable one but not the other so long as their 7897ec681f3Smrg * respective requirements are met. 7907ec681f3Smrg */ 7917ec681f3Smrg struct v3dv_render_pass_attachment *ds_attachment = 7927ec681f3Smrg &pass->attachments[ds_attachment_idx]; 7937ec681f3Smrg 7947ec681f3Smrg const VkImageAspectFlags ds_aspects = 7957ec681f3Smrg vk_format_aspects(ds_attachment->desc.format); 7967ec681f3Smrg 7977ec681f3Smrg bool needs_depth_clear = 7987ec681f3Smrg check_needs_clear(state, 7997ec681f3Smrg ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 8007ec681f3Smrg ds_attachment->first_subpass, 8017ec681f3Smrg ds_attachment->desc.loadOp, 8027ec681f3Smrg subpass->do_depth_clear_with_draw); 8037ec681f3Smrg 8047ec681f3Smrg bool needs_depth_store = 8057ec681f3Smrg check_needs_store(state, 8067ec681f3Smrg ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 8077ec681f3Smrg ds_attachment->last_subpass, 8087ec681f3Smrg ds_attachment->desc.storeOp); 8097ec681f3Smrg 8107ec681f3Smrg do_early_zs_clear = needs_depth_clear && !needs_depth_store; 8117ec681f3Smrg if (do_early_zs_clear && 8127ec681f3Smrg vk_format_has_stencil(ds_attachment->desc.format)) { 8137ec681f3Smrg bool needs_stencil_load = 8147ec681f3Smrg check_needs_load(state, 8157ec681f3Smrg ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 8167ec681f3Smrg ds_attachment->first_subpass, 8177ec681f3Smrg ds_attachment->desc.stencilLoadOp); 8187ec681f3Smrg 8197ec681f3Smrg bool needs_stencil_store = 8207ec681f3Smrg check_needs_store(state, 8217ec681f3Smrg ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 8227ec681f3Smrg ds_attachment->last_subpass, 8237ec681f3Smrg ds_attachment->desc.stencilStoreOp); 8247ec681f3Smrg 8257ec681f3Smrg do_early_zs_clear = !needs_stencil_load && !needs_stencil_store; 8267ec681f3Smrg } 8277ec681f3Smrg 8287ec681f3Smrg config.early_depth_stencil_clear = do_early_zs_clear; 8297ec681f3Smrg } else { 8307ec681f3Smrg config.early_z_disable = true; 8317ec681f3Smrg } 8327ec681f3Smrg } 8337ec681f3Smrg 8347ec681f3Smrg /* If we enabled early Z/S clear, then we can't emit any "Clear Tile Buffers" 8357ec681f3Smrg * commands with the Z/S bit set, so keep track of whether we enabled this 8367ec681f3Smrg * in the job so we can skip these later. 8377ec681f3Smrg */ 8387ec681f3Smrg job->early_zs_clear = do_early_zs_clear; 8397ec681f3Smrg 8407ec681f3Smrg for (uint32_t i = 0; i < subpass->color_count; i++) { 8417ec681f3Smrg uint32_t attachment_idx = subpass->color_attachments[i].attachment; 8427ec681f3Smrg if (attachment_idx == VK_ATTACHMENT_UNUSED) 8437ec681f3Smrg continue; 8447ec681f3Smrg 8457ec681f3Smrg struct v3dv_image_view *iview = 8467ec681f3Smrg state->framebuffer->attachments[attachment_idx]; 8477ec681f3Smrg 8487ec681f3Smrg const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; 8497ec681f3Smrg const struct v3d_resource_slice *slice = 8507ec681f3Smrg &image->slices[iview->vk.base_mip_level]; 8517ec681f3Smrg 8527ec681f3Smrg const uint32_t *clear_color = 8537ec681f3Smrg &state->attachments[attachment_idx].clear_value.color[0]; 8547ec681f3Smrg 8557ec681f3Smrg uint32_t clear_pad = 0; 8567ec681f3Smrg if (slice->tiling == V3D_TILING_UIF_NO_XOR || 8577ec681f3Smrg slice->tiling == V3D_TILING_UIF_XOR) { 8587ec681f3Smrg int uif_block_height = v3d_utile_height(image->cpp) * 2; 8597ec681f3Smrg 8607ec681f3Smrg uint32_t implicit_padded_height = 8617ec681f3Smrg align(framebuffer->height, uif_block_height) / uif_block_height; 8627ec681f3Smrg 8637ec681f3Smrg if (slice->padded_height_of_output_image_in_uif_blocks - 8647ec681f3Smrg implicit_padded_height >= 15) { 8657ec681f3Smrg clear_pad = slice->padded_height_of_output_image_in_uif_blocks; 8667ec681f3Smrg } 8677ec681f3Smrg } 8687ec681f3Smrg 8697ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { 8707ec681f3Smrg clear.clear_color_low_32_bits = clear_color[0]; 8717ec681f3Smrg clear.clear_color_next_24_bits = clear_color[1] & 0xffffff; 8727ec681f3Smrg clear.render_target_number = i; 8737ec681f3Smrg }; 8747ec681f3Smrg 8757ec681f3Smrg if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) { 8767ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { 8777ec681f3Smrg clear.clear_color_mid_low_32_bits = 8787ec681f3Smrg ((clear_color[1] >> 24) | (clear_color[2] << 8)); 8797ec681f3Smrg clear.clear_color_mid_high_24_bits = 8807ec681f3Smrg ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8)); 8817ec681f3Smrg clear.render_target_number = i; 8827ec681f3Smrg }; 8837ec681f3Smrg } 8847ec681f3Smrg 8857ec681f3Smrg if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { 8867ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { 8877ec681f3Smrg clear.uif_padded_height_in_uif_blocks = clear_pad; 8887ec681f3Smrg clear.clear_color_high_16_bits = clear_color[3] >> 16; 8897ec681f3Smrg clear.render_target_number = i; 8907ec681f3Smrg }; 8917ec681f3Smrg } 8927ec681f3Smrg } 8937ec681f3Smrg 8947ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { 8957ec681f3Smrg v3dX(cmd_buffer_render_pass_setup_render_target) 8967ec681f3Smrg (cmd_buffer, 0, &rt.render_target_0_internal_bpp, 8977ec681f3Smrg &rt.render_target_0_internal_type, &rt.render_target_0_clamp); 8987ec681f3Smrg v3dX(cmd_buffer_render_pass_setup_render_target) 8997ec681f3Smrg (cmd_buffer, 1, &rt.render_target_1_internal_bpp, 9007ec681f3Smrg &rt.render_target_1_internal_type, &rt.render_target_1_clamp); 9017ec681f3Smrg v3dX(cmd_buffer_render_pass_setup_render_target) 9027ec681f3Smrg (cmd_buffer, 2, &rt.render_target_2_internal_bpp, 9037ec681f3Smrg &rt.render_target_2_internal_type, &rt.render_target_2_clamp); 9047ec681f3Smrg v3dX(cmd_buffer_render_pass_setup_render_target) 9057ec681f3Smrg (cmd_buffer, 3, &rt.render_target_3_internal_bpp, 9067ec681f3Smrg &rt.render_target_3_internal_type, &rt.render_target_3_clamp); 9077ec681f3Smrg } 9087ec681f3Smrg 9097ec681f3Smrg /* Ends rendering mode config. */ 9107ec681f3Smrg if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 9117ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { 9127ec681f3Smrg clear.z_clear_value = 9137ec681f3Smrg state->attachments[ds_attachment_idx].clear_value.z; 9147ec681f3Smrg clear.stencil_clear_value = 9157ec681f3Smrg state->attachments[ds_attachment_idx].clear_value.s; 9167ec681f3Smrg }; 9177ec681f3Smrg } else { 9187ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { 9197ec681f3Smrg clear.z_clear_value = 1.0f; 9207ec681f3Smrg clear.stencil_clear_value = 0; 9217ec681f3Smrg }; 9227ec681f3Smrg } 9237ec681f3Smrg 9247ec681f3Smrg /* Always set initial block size before the first branch, which needs 9257ec681f3Smrg * to match the value from binning mode config. 9267ec681f3Smrg */ 9277ec681f3Smrg cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { 9287ec681f3Smrg init.use_auto_chained_tile_lists = true; 9297ec681f3Smrg init.size_of_first_block_in_chained_tile_lists = 9307ec681f3Smrg TILE_ALLOCATION_BLOCK_SIZE_64B; 9317ec681f3Smrg } 9327ec681f3Smrg 9337ec681f3Smrg cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { 9347ec681f3Smrg config.number_of_bin_tile_lists = 1; 9357ec681f3Smrg config.total_frame_width_in_tiles = tiling->draw_tiles_x; 9367ec681f3Smrg config.total_frame_height_in_tiles = tiling->draw_tiles_y; 9377ec681f3Smrg 9387ec681f3Smrg config.supertile_width_in_tiles = tiling->supertile_width; 9397ec681f3Smrg config.supertile_height_in_tiles = tiling->supertile_height; 9407ec681f3Smrg 9417ec681f3Smrg config.total_frame_width_in_supertiles = 9427ec681f3Smrg tiling->frame_width_in_supertiles; 9437ec681f3Smrg config.total_frame_height_in_supertiles = 9447ec681f3Smrg tiling->frame_height_in_supertiles; 9457ec681f3Smrg } 9467ec681f3Smrg 9477ec681f3Smrg /* Start by clearing the tile buffer. */ 9487ec681f3Smrg cl_emit(rcl, TILE_COORDINATES, coords) { 9497ec681f3Smrg coords.tile_column_number = 0; 9507ec681f3Smrg coords.tile_row_number = 0; 9517ec681f3Smrg } 9527ec681f3Smrg 9537ec681f3Smrg /* Emit an initial clear of the tile buffers. This is necessary 9547ec681f3Smrg * for any buffers that should be cleared (since clearing 9557ec681f3Smrg * normally happens at the *end* of the generic tile list), but 9567ec681f3Smrg * it's also nice to clear everything so the first tile doesn't 9577ec681f3Smrg * inherit any contents from some previous frame. 9587ec681f3Smrg * 9597ec681f3Smrg * Also, implement the GFXH-1742 workaround. There's a race in 9607ec681f3Smrg * the HW between the RCL updating the TLB's internal type/size 9617ec681f3Smrg * and the spawning of the QPU instances using the TLB's current 9627ec681f3Smrg * internal type/size. To make sure the QPUs get the right 9637ec681f3Smrg * state, we need 1 dummy store in between internal type/size 9647ec681f3Smrg * changes on V3D 3.x, and 2 dummy stores on 4.x. 9657ec681f3Smrg */ 9667ec681f3Smrg for (int i = 0; i < 2; i++) { 9677ec681f3Smrg if (i > 0) 9687ec681f3Smrg cl_emit(rcl, TILE_COORDINATES, coords); 9697ec681f3Smrg cl_emit(rcl, END_OF_LOADS, end); 9707ec681f3Smrg cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { 9717ec681f3Smrg store.buffer_to_store = NONE; 9727ec681f3Smrg } 9737ec681f3Smrg if (i == 0 && cmd_buffer->state.tile_aligned_render_area) { 9747ec681f3Smrg cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { 9757ec681f3Smrg clear.clear_z_stencil_buffer = !job->early_zs_clear; 9767ec681f3Smrg clear.clear_all_render_targets = true; 9777ec681f3Smrg } 9787ec681f3Smrg } 9797ec681f3Smrg cl_emit(rcl, END_OF_TILE_MARKER, end); 9807ec681f3Smrg } 9817ec681f3Smrg 9827ec681f3Smrg cl_emit(rcl, FLUSH_VCD_CACHE, flush); 9837ec681f3Smrg 9847ec681f3Smrg for (int layer = 0; layer < MAX2(1, fb_layers); layer++) { 9857ec681f3Smrg if (subpass->view_mask == 0 || (subpass->view_mask & (1u << layer))) 9867ec681f3Smrg cmd_buffer_emit_render_pass_layer_rcl(cmd_buffer, layer); 9877ec681f3Smrg } 9887ec681f3Smrg 9897ec681f3Smrg cl_emit(rcl, END_OF_RENDERING, end); 9907ec681f3Smrg} 9917ec681f3Smrg 9927ec681f3Smrgvoid 9937ec681f3Smrgv3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer) 9947ec681f3Smrg{ 9957ec681f3Smrg struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 9967ec681f3Smrg /* FIXME: right now we only support one viewport. viewporst[0] would work 9977ec681f3Smrg * now, would need to change if we allow multiple viewports 9987ec681f3Smrg */ 9997ec681f3Smrg float *vptranslate = dynamic->viewport.translate[0]; 10007ec681f3Smrg float *vpscale = dynamic->viewport.scale[0]; 10017ec681f3Smrg 10027ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 10037ec681f3Smrg assert(job); 10047ec681f3Smrg 10057ec681f3Smrg const uint32_t required_cl_size = 10067ec681f3Smrg cl_packet_length(CLIPPER_XY_SCALING) + 10077ec681f3Smrg cl_packet_length(CLIPPER_Z_SCALE_AND_OFFSET) + 10087ec681f3Smrg cl_packet_length(CLIPPER_Z_MIN_MAX_CLIPPING_PLANES) + 10097ec681f3Smrg cl_packet_length(VIEWPORT_OFFSET); 10107ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size); 10117ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 10127ec681f3Smrg 10137ec681f3Smrg cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { 10147ec681f3Smrg clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f; 10157ec681f3Smrg clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f; 10167ec681f3Smrg } 10177ec681f3Smrg 10187ec681f3Smrg cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { 10197ec681f3Smrg clip.viewport_z_offset_zc_to_zs = vptranslate[2]; 10207ec681f3Smrg clip.viewport_z_scale_zc_to_zs = vpscale[2]; 10217ec681f3Smrg } 10227ec681f3Smrg cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { 10237ec681f3Smrg /* Vulkan's Z NDC is [0..1], unlile OpenGL which is [-1, 1] */ 10247ec681f3Smrg float z1 = vptranslate[2]; 10257ec681f3Smrg float z2 = vptranslate[2] + vpscale[2]; 10267ec681f3Smrg clip.minimum_zw = MIN2(z1, z2); 10277ec681f3Smrg clip.maximum_zw = MAX2(z1, z2); 10287ec681f3Smrg } 10297ec681f3Smrg 10307ec681f3Smrg cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { 10317ec681f3Smrg vp.viewport_centre_x_coordinate = vptranslate[0]; 10327ec681f3Smrg vp.viewport_centre_y_coordinate = vptranslate[1]; 10337ec681f3Smrg } 10347ec681f3Smrg 10357ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEWPORT; 10367ec681f3Smrg} 10377ec681f3Smrg 10387ec681f3Smrgvoid 10397ec681f3Smrgv3dX(cmd_buffer_emit_stencil)(struct v3dv_cmd_buffer *cmd_buffer) 10407ec681f3Smrg{ 10417ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 10427ec681f3Smrg assert(job); 10437ec681f3Smrg 10447ec681f3Smrg struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 10457ec681f3Smrg struct v3dv_dynamic_state *dynamic_state = &cmd_buffer->state.dynamic; 10467ec681f3Smrg 10477ec681f3Smrg const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK | 10487ec681f3Smrg V3DV_DYNAMIC_STENCIL_WRITE_MASK | 10497ec681f3Smrg V3DV_DYNAMIC_STENCIL_REFERENCE; 10507ec681f3Smrg 10517ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, 10527ec681f3Smrg 2 * cl_packet_length(STENCIL_CFG)); 10537ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 10547ec681f3Smrg 10557ec681f3Smrg bool emitted_stencil = false; 10567ec681f3Smrg for (uint32_t i = 0; i < 2; i++) { 10577ec681f3Smrg if (pipeline->emit_stencil_cfg[i]) { 10587ec681f3Smrg if (dynamic_state->mask & dynamic_stencil_states) { 10597ec681f3Smrg cl_emit_with_prepacked(&job->bcl, STENCIL_CFG, 10607ec681f3Smrg pipeline->stencil_cfg[i], config) { 10617ec681f3Smrg if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK) { 10627ec681f3Smrg config.stencil_test_mask = 10637ec681f3Smrg i == 0 ? dynamic_state->stencil_compare_mask.front : 10647ec681f3Smrg dynamic_state->stencil_compare_mask.back; 10657ec681f3Smrg } 10667ec681f3Smrg if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK) { 10677ec681f3Smrg config.stencil_write_mask = 10687ec681f3Smrg i == 0 ? dynamic_state->stencil_write_mask.front : 10697ec681f3Smrg dynamic_state->stencil_write_mask.back; 10707ec681f3Smrg } 10717ec681f3Smrg if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_REFERENCE) { 10727ec681f3Smrg config.stencil_ref_value = 10737ec681f3Smrg i == 0 ? dynamic_state->stencil_reference.front : 10747ec681f3Smrg dynamic_state->stencil_reference.back; 10757ec681f3Smrg } 10767ec681f3Smrg } 10777ec681f3Smrg } else { 10787ec681f3Smrg cl_emit_prepacked(&job->bcl, &pipeline->stencil_cfg[i]); 10797ec681f3Smrg } 10807ec681f3Smrg 10817ec681f3Smrg emitted_stencil = true; 10827ec681f3Smrg } 10837ec681f3Smrg } 10847ec681f3Smrg 10857ec681f3Smrg if (emitted_stencil) { 10867ec681f3Smrg const uint32_t dynamic_stencil_dirty_flags = 10877ec681f3Smrg V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK | 10887ec681f3Smrg V3DV_CMD_DIRTY_STENCIL_WRITE_MASK | 10897ec681f3Smrg V3DV_CMD_DIRTY_STENCIL_REFERENCE; 10907ec681f3Smrg cmd_buffer->state.dirty &= ~dynamic_stencil_dirty_flags; 10917ec681f3Smrg } 10927ec681f3Smrg} 10937ec681f3Smrg 10947ec681f3Smrgvoid 10957ec681f3Smrgv3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer) 10967ec681f3Smrg{ 10977ec681f3Smrg struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 10987ec681f3Smrg assert(pipeline); 10997ec681f3Smrg 11007ec681f3Smrg if (!pipeline->depth_bias.enabled) 11017ec681f3Smrg return; 11027ec681f3Smrg 11037ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 11047ec681f3Smrg assert(job); 11057ec681f3Smrg 11067ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_OFFSET)); 11077ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 11087ec681f3Smrg 11097ec681f3Smrg struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 11107ec681f3Smrg cl_emit(&job->bcl, DEPTH_OFFSET, bias) { 11117ec681f3Smrg bias.depth_offset_factor = dynamic->depth_bias.slope_factor; 11127ec681f3Smrg bias.depth_offset_units = dynamic->depth_bias.constant_factor; 11137ec681f3Smrg if (pipeline->depth_bias.is_z16) 11147ec681f3Smrg bias.depth_offset_units *= 256.0f; 11157ec681f3Smrg bias.limit = dynamic->depth_bias.depth_bias_clamp; 11167ec681f3Smrg } 11177ec681f3Smrg 11187ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BIAS; 11197ec681f3Smrg} 11207ec681f3Smrg 11217ec681f3Smrgvoid 11227ec681f3Smrgv3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer) 11237ec681f3Smrg{ 11247ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 11257ec681f3Smrg assert(job); 11267ec681f3Smrg 11277ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(LINE_WIDTH)); 11287ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 11297ec681f3Smrg 11307ec681f3Smrg cl_emit(&job->bcl, LINE_WIDTH, line) { 11317ec681f3Smrg line.line_width = cmd_buffer->state.dynamic.line_width; 11327ec681f3Smrg } 11337ec681f3Smrg 11347ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_LINE_WIDTH; 11357ec681f3Smrg} 11367ec681f3Smrg 11377ec681f3Smrgvoid 11387ec681f3Smrgv3dX(cmd_buffer_emit_sample_state)(struct v3dv_cmd_buffer *cmd_buffer) 11397ec681f3Smrg{ 11407ec681f3Smrg struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 11417ec681f3Smrg assert(pipeline); 11427ec681f3Smrg 11437ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 11447ec681f3Smrg assert(job); 11457ec681f3Smrg 11467ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(SAMPLE_STATE)); 11477ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 11487ec681f3Smrg 11497ec681f3Smrg cl_emit(&job->bcl, SAMPLE_STATE, state) { 11507ec681f3Smrg state.coverage = 1.0f; 11517ec681f3Smrg state.mask = pipeline->sample_mask; 11527ec681f3Smrg } 11537ec681f3Smrg} 11547ec681f3Smrg 11557ec681f3Smrgvoid 11567ec681f3Smrgv3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer) 11577ec681f3Smrg{ 11587ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 11597ec681f3Smrg assert(job); 11607ec681f3Smrg 11617ec681f3Smrg struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 11627ec681f3Smrg assert(pipeline); 11637ec681f3Smrg 11647ec681f3Smrg const uint32_t blend_packets_size = 11657ec681f3Smrg cl_packet_length(BLEND_ENABLES) + 11667ec681f3Smrg cl_packet_length(BLEND_CONSTANT_COLOR) + 11677ec681f3Smrg cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS; 11687ec681f3Smrg 11697ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size); 11707ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 11717ec681f3Smrg 11727ec681f3Smrg if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) { 11737ec681f3Smrg if (pipeline->blend.enables) { 11747ec681f3Smrg cl_emit(&job->bcl, BLEND_ENABLES, enables) { 11757ec681f3Smrg enables.mask = pipeline->blend.enables; 11767ec681f3Smrg } 11777ec681f3Smrg } 11787ec681f3Smrg 11797ec681f3Smrg for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { 11807ec681f3Smrg if (pipeline->blend.enables & (1 << i)) 11817ec681f3Smrg cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]); 11827ec681f3Smrg } 11837ec681f3Smrg } 11847ec681f3Smrg 11857ec681f3Smrg if (pipeline->blend.needs_color_constants && 11867ec681f3Smrg cmd_buffer->state.dirty & V3DV_CMD_DIRTY_BLEND_CONSTANTS) { 11877ec681f3Smrg struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 11887ec681f3Smrg cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) { 11897ec681f3Smrg color.red_f16 = _mesa_float_to_half(dynamic->blend_constants[0]); 11907ec681f3Smrg color.green_f16 = _mesa_float_to_half(dynamic->blend_constants[1]); 11917ec681f3Smrg color.blue_f16 = _mesa_float_to_half(dynamic->blend_constants[2]); 11927ec681f3Smrg color.alpha_f16 = _mesa_float_to_half(dynamic->blend_constants[3]); 11937ec681f3Smrg } 11947ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_BLEND_CONSTANTS; 11957ec681f3Smrg } 11967ec681f3Smrg} 11977ec681f3Smrg 11987ec681f3Smrgvoid 11997ec681f3Smrgv3dX(cmd_buffer_emit_color_write_mask)(struct v3dv_cmd_buffer *cmd_buffer) 12007ec681f3Smrg{ 12017ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 12027ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(COLOR_WRITE_MASKS)); 12037ec681f3Smrg 12047ec681f3Smrg struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 12057ec681f3Smrg struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 12067ec681f3Smrg cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) { 12077ec681f3Smrg mask.mask = (~dynamic->color_write_enable | 12087ec681f3Smrg pipeline->blend.color_write_masks) & 0xffff; 12097ec681f3Smrg } 12107ec681f3Smrg 12117ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; 12127ec681f3Smrg} 12137ec681f3Smrg 12147ec681f3Smrgstatic void 12157ec681f3Smrgemit_flat_shade_flags(struct v3dv_job *job, 12167ec681f3Smrg int varying_offset, 12177ec681f3Smrg uint32_t varyings, 12187ec681f3Smrg enum V3DX(Varying_Flags_Action) lower, 12197ec681f3Smrg enum V3DX(Varying_Flags_Action) higher) 12207ec681f3Smrg{ 12217ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, 12227ec681f3Smrg cl_packet_length(FLAT_SHADE_FLAGS)); 12237ec681f3Smrg v3dv_return_if_oom(NULL, job); 12247ec681f3Smrg 12257ec681f3Smrg cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { 12267ec681f3Smrg flags.varying_offset_v0 = varying_offset; 12277ec681f3Smrg flags.flat_shade_flags_for_varyings_v024 = varyings; 12287ec681f3Smrg flags.action_for_flat_shade_flags_of_lower_numbered_varyings = lower; 12297ec681f3Smrg flags.action_for_flat_shade_flags_of_higher_numbered_varyings = higher; 12307ec681f3Smrg } 12317ec681f3Smrg} 12327ec681f3Smrg 12337ec681f3Smrgstatic void 12347ec681f3Smrgemit_noperspective_flags(struct v3dv_job *job, 12357ec681f3Smrg int varying_offset, 12367ec681f3Smrg uint32_t varyings, 12377ec681f3Smrg enum V3DX(Varying_Flags_Action) lower, 12387ec681f3Smrg enum V3DX(Varying_Flags_Action) higher) 12397ec681f3Smrg{ 12407ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, 12417ec681f3Smrg cl_packet_length(NON_PERSPECTIVE_FLAGS)); 12427ec681f3Smrg v3dv_return_if_oom(NULL, job); 12437ec681f3Smrg 12447ec681f3Smrg cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) { 12457ec681f3Smrg flags.varying_offset_v0 = varying_offset; 12467ec681f3Smrg flags.non_perspective_flags_for_varyings_v024 = varyings; 12477ec681f3Smrg flags.action_for_non_perspective_flags_of_lower_numbered_varyings = lower; 12487ec681f3Smrg flags.action_for_non_perspective_flags_of_higher_numbered_varyings = higher; 12497ec681f3Smrg } 12507ec681f3Smrg} 12517ec681f3Smrg 12527ec681f3Smrgstatic void 12537ec681f3Smrgemit_centroid_flags(struct v3dv_job *job, 12547ec681f3Smrg int varying_offset, 12557ec681f3Smrg uint32_t varyings, 12567ec681f3Smrg enum V3DX(Varying_Flags_Action) lower, 12577ec681f3Smrg enum V3DX(Varying_Flags_Action) higher) 12587ec681f3Smrg{ 12597ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, 12607ec681f3Smrg cl_packet_length(CENTROID_FLAGS)); 12617ec681f3Smrg v3dv_return_if_oom(NULL, job); 12627ec681f3Smrg 12637ec681f3Smrg cl_emit(&job->bcl, CENTROID_FLAGS, flags) { 12647ec681f3Smrg flags.varying_offset_v0 = varying_offset; 12657ec681f3Smrg flags.centroid_flags_for_varyings_v024 = varyings; 12667ec681f3Smrg flags.action_for_centroid_flags_of_lower_numbered_varyings = lower; 12677ec681f3Smrg flags.action_for_centroid_flags_of_higher_numbered_varyings = higher; 12687ec681f3Smrg } 12697ec681f3Smrg} 12707ec681f3Smrg 12717ec681f3Smrgstatic bool 12727ec681f3Smrgemit_varying_flags(struct v3dv_job *job, 12737ec681f3Smrg uint32_t num_flags, 12747ec681f3Smrg const uint32_t *flags, 12757ec681f3Smrg void (*flag_emit_callback)(struct v3dv_job *job, 12767ec681f3Smrg int varying_offset, 12777ec681f3Smrg uint32_t flags, 12787ec681f3Smrg enum V3DX(Varying_Flags_Action) lower, 12797ec681f3Smrg enum V3DX(Varying_Flags_Action) higher)) 12807ec681f3Smrg{ 12817ec681f3Smrg bool emitted_any = false; 12827ec681f3Smrg for (int i = 0; i < num_flags; i++) { 12837ec681f3Smrg if (!flags[i]) 12847ec681f3Smrg continue; 12857ec681f3Smrg 12867ec681f3Smrg if (emitted_any) { 12877ec681f3Smrg flag_emit_callback(job, i, flags[i], 12887ec681f3Smrg V3D_VARYING_FLAGS_ACTION_UNCHANGED, 12897ec681f3Smrg V3D_VARYING_FLAGS_ACTION_UNCHANGED); 12907ec681f3Smrg } else if (i == 0) { 12917ec681f3Smrg flag_emit_callback(job, i, flags[i], 12927ec681f3Smrg V3D_VARYING_FLAGS_ACTION_UNCHANGED, 12937ec681f3Smrg V3D_VARYING_FLAGS_ACTION_ZEROED); 12947ec681f3Smrg } else { 12957ec681f3Smrg flag_emit_callback(job, i, flags[i], 12967ec681f3Smrg V3D_VARYING_FLAGS_ACTION_ZEROED, 12977ec681f3Smrg V3D_VARYING_FLAGS_ACTION_ZEROED); 12987ec681f3Smrg } 12997ec681f3Smrg 13007ec681f3Smrg emitted_any = true; 13017ec681f3Smrg } 13027ec681f3Smrg 13037ec681f3Smrg return emitted_any; 13047ec681f3Smrg} 13057ec681f3Smrg 13067ec681f3Smrgvoid 13077ec681f3Smrgv3dX(cmd_buffer_emit_varyings_state)(struct v3dv_cmd_buffer *cmd_buffer) 13087ec681f3Smrg{ 13097ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 13107ec681f3Smrg struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 13117ec681f3Smrg 13127ec681f3Smrg struct v3d_fs_prog_data *prog_data_fs = 13137ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; 13147ec681f3Smrg 13157ec681f3Smrg const uint32_t num_flags = 13167ec681f3Smrg ARRAY_SIZE(prog_data_fs->flat_shade_flags); 13177ec681f3Smrg const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags; 13187ec681f3Smrg const uint32_t *noperspective_flags = prog_data_fs->noperspective_flags; 13197ec681f3Smrg const uint32_t *centroid_flags = prog_data_fs->centroid_flags; 13207ec681f3Smrg 13217ec681f3Smrg if (!emit_varying_flags(job, num_flags, flat_shade_flags, 13227ec681f3Smrg emit_flat_shade_flags)) { 13237ec681f3Smrg v3dv_cl_ensure_space_with_branch( 13247ec681f3Smrg &job->bcl, cl_packet_length(ZERO_ALL_FLAT_SHADE_FLAGS)); 13257ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 13267ec681f3Smrg 13277ec681f3Smrg cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags); 13287ec681f3Smrg } 13297ec681f3Smrg 13307ec681f3Smrg if (!emit_varying_flags(job, num_flags, noperspective_flags, 13317ec681f3Smrg emit_noperspective_flags)) { 13327ec681f3Smrg v3dv_cl_ensure_space_with_branch( 13337ec681f3Smrg &job->bcl, cl_packet_length(ZERO_ALL_NON_PERSPECTIVE_FLAGS)); 13347ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 13357ec681f3Smrg 13367ec681f3Smrg cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags); 13377ec681f3Smrg } 13387ec681f3Smrg 13397ec681f3Smrg if (!emit_varying_flags(job, num_flags, centroid_flags, 13407ec681f3Smrg emit_centroid_flags)) { 13417ec681f3Smrg v3dv_cl_ensure_space_with_branch( 13427ec681f3Smrg &job->bcl, cl_packet_length(ZERO_ALL_CENTROID_FLAGS)); 13437ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 13447ec681f3Smrg 13457ec681f3Smrg cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); 13467ec681f3Smrg } 13477ec681f3Smrg} 13487ec681f3Smrg 13497ec681f3Smrgstatic void 13507ec681f3Smrgjob_update_ez_state(struct v3dv_job *job, 13517ec681f3Smrg struct v3dv_pipeline *pipeline, 13527ec681f3Smrg struct v3dv_cmd_buffer *cmd_buffer) 13537ec681f3Smrg{ 13547ec681f3Smrg /* If first_ez_state is V3D_EZ_DISABLED it means that we have already 13557ec681f3Smrg * determined that we should disable EZ completely for all draw calls in 13567ec681f3Smrg * this job. This will cause us to disable EZ for the entire job in the 13577ec681f3Smrg * Tile Rendering Mode RCL packet and when we do that we need to make sure 13587ec681f3Smrg * we never emit a draw call in the job with EZ enabled in the CFG_BITS 13597ec681f3Smrg * packet, so ez_state must also be V3D_EZ_DISABLED; 13607ec681f3Smrg */ 13617ec681f3Smrg if (job->first_ez_state == V3D_EZ_DISABLED) { 13627ec681f3Smrg assert(job->ez_state == V3D_EZ_DISABLED); 13637ec681f3Smrg return; 13647ec681f3Smrg } 13657ec681f3Smrg 13667ec681f3Smrg /* This is part of the pre draw call handling, so we should be inside a 13677ec681f3Smrg * render pass. 13687ec681f3Smrg */ 13697ec681f3Smrg assert(cmd_buffer->state.pass); 13707ec681f3Smrg 13717ec681f3Smrg /* If this is the first time we update EZ state for this job we first check 13727ec681f3Smrg * if there is anything that requires disabling it completely for the entire 13737ec681f3Smrg * job (based on state that is not related to the current draw call and 13747ec681f3Smrg * pipeline state). 13757ec681f3Smrg */ 13767ec681f3Smrg if (!job->decided_global_ez_enable) { 13777ec681f3Smrg job->decided_global_ez_enable = true; 13787ec681f3Smrg 13797ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 13807ec681f3Smrg assert(state->subpass_idx < state->pass->subpass_count); 13817ec681f3Smrg struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx]; 13827ec681f3Smrg if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) { 13837ec681f3Smrg job->first_ez_state = V3D_EZ_DISABLED; 13847ec681f3Smrg job->ez_state = V3D_EZ_DISABLED; 13857ec681f3Smrg return; 13867ec681f3Smrg } 13877ec681f3Smrg 13887ec681f3Smrg /* GFXH-1918: the early-z buffer may load incorrect depth values 13897ec681f3Smrg * if the frame has odd width or height. 13907ec681f3Smrg * 13917ec681f3Smrg * So we need to disable EZ in this case. 13927ec681f3Smrg */ 13937ec681f3Smrg const struct v3dv_render_pass_attachment *ds_attachment = 13947ec681f3Smrg &state->pass->attachments[subpass->ds_attachment.attachment]; 13957ec681f3Smrg 13967ec681f3Smrg const VkImageAspectFlags ds_aspects = 13977ec681f3Smrg vk_format_aspects(ds_attachment->desc.format); 13987ec681f3Smrg 13997ec681f3Smrg bool needs_depth_load = 14007ec681f3Smrg check_needs_load(state, 14017ec681f3Smrg ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 14027ec681f3Smrg ds_attachment->first_subpass, 14037ec681f3Smrg ds_attachment->desc.loadOp); 14047ec681f3Smrg 14057ec681f3Smrg if (needs_depth_load) { 14067ec681f3Smrg struct v3dv_framebuffer *fb = state->framebuffer; 14077ec681f3Smrg 14087ec681f3Smrg if (!fb) { 14097ec681f3Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 14107ec681f3Smrg perf_debug("Loading depth aspect in a secondary command buffer " 14117ec681f3Smrg "without framebuffer info disables early-z tests.\n"); 14127ec681f3Smrg job->first_ez_state = V3D_EZ_DISABLED; 14137ec681f3Smrg job->ez_state = V3D_EZ_DISABLED; 14147ec681f3Smrg return; 14157ec681f3Smrg } 14167ec681f3Smrg 14177ec681f3Smrg if (((fb->width % 2) != 0 || (fb->height % 2) != 0)) { 14187ec681f3Smrg perf_debug("Loading depth aspect for framebuffer with odd width " 14197ec681f3Smrg "or height disables early-Z tests.\n"); 14207ec681f3Smrg job->first_ez_state = V3D_EZ_DISABLED; 14217ec681f3Smrg job->ez_state = V3D_EZ_DISABLED; 14227ec681f3Smrg return; 14237ec681f3Smrg } 14247ec681f3Smrg } 14257ec681f3Smrg } 14267ec681f3Smrg 14277ec681f3Smrg /* Otherwise, we can decide to selectively enable or disable EZ for draw 14287ec681f3Smrg * calls using the CFG_BITS packet based on the bound pipeline state. 14297ec681f3Smrg */ 14307ec681f3Smrg 14317ec681f3Smrg /* If the FS writes Z, then it may update against the chosen EZ direction */ 14327ec681f3Smrg struct v3dv_shader_variant *fs_variant = 14337ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 14347ec681f3Smrg if (fs_variant->prog_data.fs->writes_z) { 14357ec681f3Smrg job->ez_state = V3D_EZ_DISABLED; 14367ec681f3Smrg return; 14377ec681f3Smrg } 14387ec681f3Smrg 14397ec681f3Smrg switch (pipeline->ez_state) { 14407ec681f3Smrg case V3D_EZ_UNDECIDED: 14417ec681f3Smrg /* If the pipeline didn't pick a direction but didn't disable, then go 14427ec681f3Smrg * along with the current EZ state. This allows EZ optimization for Z 14437ec681f3Smrg * func == EQUAL or NEVER. 14447ec681f3Smrg */ 14457ec681f3Smrg break; 14467ec681f3Smrg 14477ec681f3Smrg case V3D_EZ_LT_LE: 14487ec681f3Smrg case V3D_EZ_GT_GE: 14497ec681f3Smrg /* If the pipeline picked a direction, then it needs to match the current 14507ec681f3Smrg * direction if we've decided on one. 14517ec681f3Smrg */ 14527ec681f3Smrg if (job->ez_state == V3D_EZ_UNDECIDED) 14537ec681f3Smrg job->ez_state = pipeline->ez_state; 14547ec681f3Smrg else if (job->ez_state != pipeline->ez_state) 14557ec681f3Smrg job->ez_state = V3D_EZ_DISABLED; 14567ec681f3Smrg break; 14577ec681f3Smrg 14587ec681f3Smrg case V3D_EZ_DISABLED: 14597ec681f3Smrg /* If the pipeline disables EZ because of a bad Z func or stencil 14607ec681f3Smrg * operation, then we can't do any more EZ in this frame. 14617ec681f3Smrg */ 14627ec681f3Smrg job->ez_state = V3D_EZ_DISABLED; 14637ec681f3Smrg break; 14647ec681f3Smrg } 14657ec681f3Smrg 14667ec681f3Smrg if (job->first_ez_state == V3D_EZ_UNDECIDED && 14677ec681f3Smrg job->ez_state != V3D_EZ_DISABLED) { 14687ec681f3Smrg job->first_ez_state = job->ez_state; 14697ec681f3Smrg } 14707ec681f3Smrg} 14717ec681f3Smrg 14727ec681f3Smrgvoid 14737ec681f3Smrgv3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer) 14747ec681f3Smrg{ 14757ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 14767ec681f3Smrg assert(job); 14777ec681f3Smrg 14787ec681f3Smrg struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 14797ec681f3Smrg assert(pipeline); 14807ec681f3Smrg 14817ec681f3Smrg job_update_ez_state(job, pipeline, cmd_buffer); 14827ec681f3Smrg 14837ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS)); 14847ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 14857ec681f3Smrg 14867ec681f3Smrg cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) { 14877ec681f3Smrg config.early_z_enable = job->ez_state != V3D_EZ_DISABLED; 14887ec681f3Smrg config.early_z_updates_enable = config.early_z_enable && 14897ec681f3Smrg pipeline->z_updates_enable; 14907ec681f3Smrg } 14917ec681f3Smrg} 14927ec681f3Smrg 14937ec681f3Smrgvoid 14947ec681f3Smrgv3dX(cmd_buffer_emit_occlusion_query)(struct v3dv_cmd_buffer *cmd_buffer) 14957ec681f3Smrg{ 14967ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 14977ec681f3Smrg assert(job); 14987ec681f3Smrg 14997ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, 15007ec681f3Smrg cl_packet_length(OCCLUSION_QUERY_COUNTER)); 15017ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 15027ec681f3Smrg 15037ec681f3Smrg cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { 15047ec681f3Smrg if (cmd_buffer->state.query.active_query.bo) { 15057ec681f3Smrg counter.address = 15067ec681f3Smrg v3dv_cl_address(cmd_buffer->state.query.active_query.bo, 15077ec681f3Smrg cmd_buffer->state.query.active_query.offset); 15087ec681f3Smrg } 15097ec681f3Smrg } 15107ec681f3Smrg 15117ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY; 15127ec681f3Smrg} 15137ec681f3Smrg 15147ec681f3Smrgstatic struct v3dv_job * 15157ec681f3Smrgcmd_buffer_subpass_split_for_barrier(struct v3dv_cmd_buffer *cmd_buffer, 15167ec681f3Smrg bool is_bcl_barrier) 15177ec681f3Smrg{ 15187ec681f3Smrg assert(cmd_buffer->state.subpass_idx != -1); 15197ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 15207ec681f3Smrg struct v3dv_job *job = 15217ec681f3Smrg v3dv_cmd_buffer_subpass_resume(cmd_buffer, 15227ec681f3Smrg cmd_buffer->state.subpass_idx); 15237ec681f3Smrg if (!job) 15247ec681f3Smrg return NULL; 15257ec681f3Smrg 15267ec681f3Smrg job->serialize = true; 15277ec681f3Smrg job->needs_bcl_sync = is_bcl_barrier; 15287ec681f3Smrg return job; 15297ec681f3Smrg} 15307ec681f3Smrg 15317ec681f3Smrgstatic void 15327ec681f3Smrgcmd_buffer_copy_secondary_end_query_state(struct v3dv_cmd_buffer *primary, 15337ec681f3Smrg struct v3dv_cmd_buffer *secondary) 15347ec681f3Smrg{ 15357ec681f3Smrg struct v3dv_cmd_buffer_state *p_state = &primary->state; 15367ec681f3Smrg struct v3dv_cmd_buffer_state *s_state = &secondary->state; 15377ec681f3Smrg 15387ec681f3Smrg const uint32_t total_state_count = 15397ec681f3Smrg p_state->query.end.used_count + s_state->query.end.used_count; 15407ec681f3Smrg v3dv_cmd_buffer_ensure_array_state(primary, 15417ec681f3Smrg sizeof(struct v3dv_end_query_cpu_job_info), 15427ec681f3Smrg total_state_count, 15437ec681f3Smrg &p_state->query.end.alloc_count, 15447ec681f3Smrg (void **) &p_state->query.end.states); 15457ec681f3Smrg v3dv_return_if_oom(primary, NULL); 15467ec681f3Smrg 15477ec681f3Smrg for (uint32_t i = 0; i < s_state->query.end.used_count; i++) { 15487ec681f3Smrg const struct v3dv_end_query_cpu_job_info *s_qstate = 15497ec681f3Smrg &secondary->state.query.end.states[i]; 15507ec681f3Smrg 15517ec681f3Smrg struct v3dv_end_query_cpu_job_info *p_qstate = 15527ec681f3Smrg &p_state->query.end.states[p_state->query.end.used_count++]; 15537ec681f3Smrg 15547ec681f3Smrg p_qstate->pool = s_qstate->pool; 15557ec681f3Smrg p_qstate->query = s_qstate->query; 15567ec681f3Smrg } 15577ec681f3Smrg} 15587ec681f3Smrg 15597ec681f3Smrgvoid 15607ec681f3Smrgv3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary, 15617ec681f3Smrg uint32_t cmd_buffer_count, 15627ec681f3Smrg const VkCommandBuffer *cmd_buffers) 15637ec681f3Smrg{ 15647ec681f3Smrg assert(primary->state.job); 15657ec681f3Smrg 15667ec681f3Smrg /* Emit occlusion query state if needed so the draw calls inside our 15677ec681f3Smrg * secondaries update the counters. 15687ec681f3Smrg */ 15697ec681f3Smrg bool has_occlusion_query = 15707ec681f3Smrg primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY; 15717ec681f3Smrg if (has_occlusion_query) 15727ec681f3Smrg v3dX(cmd_buffer_emit_occlusion_query)(primary); 15737ec681f3Smrg 15747ec681f3Smrg /* FIXME: if our primary job tiling doesn't enable MSSA but any of the 15757ec681f3Smrg * pipelines used by the secondaries do, we need to re-start the primary 15767ec681f3Smrg * job to enable MSAA. See cmd_buffer_restart_job_for_msaa_if_needed. 15777ec681f3Smrg */ 15787ec681f3Smrg bool pending_barrier = false; 15797ec681f3Smrg bool pending_bcl_barrier = false; 15807ec681f3Smrg for (uint32_t i = 0; i < cmd_buffer_count; i++) { 15817ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]); 15827ec681f3Smrg 15837ec681f3Smrg assert(secondary->usage_flags & 15847ec681f3Smrg VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT); 15857ec681f3Smrg 15867ec681f3Smrg list_for_each_entry(struct v3dv_job, secondary_job, 15877ec681f3Smrg &secondary->jobs, list_link) { 15887ec681f3Smrg if (secondary_job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) { 15897ec681f3Smrg /* If the job is a CL, then we branch to it from the primary BCL. 15907ec681f3Smrg * In this case the secondary's BCL is finished with a 15917ec681f3Smrg * RETURN_FROM_SUB_LIST command to return back to the primary BCL 15927ec681f3Smrg * once we are done executing it. 15937ec681f3Smrg */ 15947ec681f3Smrg assert(v3dv_cl_offset(&secondary_job->rcl) == 0); 15957ec681f3Smrg assert(secondary_job->bcl.bo); 15967ec681f3Smrg 15977ec681f3Smrg /* Sanity check that secondary BCL ends with RETURN_FROM_SUB_LIST */ 15987ec681f3Smrg STATIC_ASSERT(cl_packet_length(RETURN_FROM_SUB_LIST) == 1); 15997ec681f3Smrg assert(v3dv_cl_offset(&secondary_job->bcl) >= 1); 16007ec681f3Smrg assert(*(((uint8_t *)secondary_job->bcl.next) - 1) == 16017ec681f3Smrg V3DX(RETURN_FROM_SUB_LIST_opcode)); 16027ec681f3Smrg 16037ec681f3Smrg /* If this secondary has any barriers (or we had any pending barrier 16047ec681f3Smrg * to apply), then we can't just branch to it from the primary, we 16057ec681f3Smrg * need to split the primary to create a new job that can consume 16067ec681f3Smrg * the barriers first. 16077ec681f3Smrg * 16087ec681f3Smrg * FIXME: in this case, maybe just copy the secondary BCL without 16097ec681f3Smrg * the RETURN_FROM_SUB_LIST into the primary job to skip the 16107ec681f3Smrg * branch? 16117ec681f3Smrg */ 16127ec681f3Smrg struct v3dv_job *primary_job = primary->state.job; 16137ec681f3Smrg if (!primary_job || secondary_job->serialize || pending_barrier) { 16147ec681f3Smrg const bool needs_bcl_barrier = 16157ec681f3Smrg secondary_job->needs_bcl_sync || pending_bcl_barrier; 16167ec681f3Smrg primary_job = 16177ec681f3Smrg cmd_buffer_subpass_split_for_barrier(primary, 16187ec681f3Smrg needs_bcl_barrier); 16197ec681f3Smrg v3dv_return_if_oom(primary, NULL); 16207ec681f3Smrg 16217ec681f3Smrg /* Since we have created a new primary we need to re-emit 16227ec681f3Smrg * occlusion query state. 16237ec681f3Smrg */ 16247ec681f3Smrg if (has_occlusion_query) 16257ec681f3Smrg v3dX(cmd_buffer_emit_occlusion_query)(primary); 16267ec681f3Smrg } 16277ec681f3Smrg 16287ec681f3Smrg /* Make sure our primary job has all required BO references */ 16297ec681f3Smrg set_foreach(secondary_job->bos, entry) { 16307ec681f3Smrg struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; 16317ec681f3Smrg v3dv_job_add_bo(primary_job, bo); 16327ec681f3Smrg } 16337ec681f3Smrg 16347ec681f3Smrg /* Emit required branch instructions. We expect each of these 16357ec681f3Smrg * to end with a corresponding 'return from sub list' item. 16367ec681f3Smrg */ 16377ec681f3Smrg list_for_each_entry(struct v3dv_bo, bcl_bo, 16387ec681f3Smrg &secondary_job->bcl.bo_list, list_link) { 16397ec681f3Smrg v3dv_cl_ensure_space_with_branch(&primary_job->bcl, 16407ec681f3Smrg cl_packet_length(BRANCH_TO_SUB_LIST)); 16417ec681f3Smrg v3dv_return_if_oom(primary, NULL); 16427ec681f3Smrg cl_emit(&primary_job->bcl, BRANCH_TO_SUB_LIST, branch) { 16437ec681f3Smrg branch.address = v3dv_cl_address(bcl_bo, 0); 16447ec681f3Smrg } 16457ec681f3Smrg } 16467ec681f3Smrg 16477ec681f3Smrg primary_job->tmu_dirty_rcl |= secondary_job->tmu_dirty_rcl; 16487ec681f3Smrg } else { 16497ec681f3Smrg /* This is a regular job (CPU or GPU), so just finish the current 16507ec681f3Smrg * primary job (if any) and then add the secondary job to the 16517ec681f3Smrg * primary's job list right after it. 16527ec681f3Smrg */ 16537ec681f3Smrg v3dv_cmd_buffer_finish_job(primary); 16547ec681f3Smrg v3dv_job_clone_in_cmd_buffer(secondary_job, primary); 16557ec681f3Smrg if (pending_barrier) { 16567ec681f3Smrg secondary_job->serialize = true; 16577ec681f3Smrg if (pending_bcl_barrier) 16587ec681f3Smrg secondary_job->needs_bcl_sync = true; 16597ec681f3Smrg } 16607ec681f3Smrg } 16617ec681f3Smrg 16627ec681f3Smrg pending_barrier = false; 16637ec681f3Smrg pending_bcl_barrier = false; 16647ec681f3Smrg } 16657ec681f3Smrg 16667ec681f3Smrg /* If the secondary has recorded any vkCmdEndQuery commands, we need to 16677ec681f3Smrg * copy this state to the primary so it is processed properly when the 16687ec681f3Smrg * current primary job is finished. 16697ec681f3Smrg */ 16707ec681f3Smrg cmd_buffer_copy_secondary_end_query_state(primary, secondary); 16717ec681f3Smrg 16727ec681f3Smrg /* If this secondary had any pending barrier state we will need that 16737ec681f3Smrg * barrier state consumed with whatever comes next in the primary. 16747ec681f3Smrg */ 16757ec681f3Smrg assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier); 16767ec681f3Smrg pending_barrier = secondary->state.has_barrier; 16777ec681f3Smrg pending_bcl_barrier = secondary->state.has_bcl_barrier; 16787ec681f3Smrg } 16797ec681f3Smrg 16807ec681f3Smrg if (pending_barrier) { 16817ec681f3Smrg primary->state.has_barrier = true; 16827ec681f3Smrg primary->state.has_bcl_barrier |= pending_bcl_barrier; 16837ec681f3Smrg } 16847ec681f3Smrg} 16857ec681f3Smrg 16867ec681f3Smrgstatic void 16877ec681f3Smrgemit_gs_shader_state_record(struct v3dv_job *job, 16887ec681f3Smrg struct v3dv_bo *assembly_bo, 16897ec681f3Smrg struct v3dv_shader_variant *gs_bin, 16907ec681f3Smrg struct v3dv_cl_reloc gs_bin_uniforms, 16917ec681f3Smrg struct v3dv_shader_variant *gs, 16927ec681f3Smrg struct v3dv_cl_reloc gs_render_uniforms) 16937ec681f3Smrg{ 16947ec681f3Smrg cl_emit(&job->indirect, GEOMETRY_SHADER_STATE_RECORD, shader) { 16957ec681f3Smrg shader.geometry_bin_mode_shader_code_address = 16967ec681f3Smrg v3dv_cl_address(assembly_bo, gs_bin->assembly_offset); 16977ec681f3Smrg shader.geometry_bin_mode_shader_4_way_threadable = 16987ec681f3Smrg gs_bin->prog_data.gs->base.threads == 4; 16997ec681f3Smrg shader.geometry_bin_mode_shader_start_in_final_thread_section = 17007ec681f3Smrg gs_bin->prog_data.gs->base.single_seg; 17017ec681f3Smrg shader.geometry_bin_mode_shader_propagate_nans = true; 17027ec681f3Smrg shader.geometry_bin_mode_shader_uniforms_address = 17037ec681f3Smrg gs_bin_uniforms; 17047ec681f3Smrg 17057ec681f3Smrg shader.geometry_render_mode_shader_code_address = 17067ec681f3Smrg v3dv_cl_address(assembly_bo, gs->assembly_offset); 17077ec681f3Smrg shader.geometry_render_mode_shader_4_way_threadable = 17087ec681f3Smrg gs->prog_data.gs->base.threads == 4; 17097ec681f3Smrg shader.geometry_render_mode_shader_start_in_final_thread_section = 17107ec681f3Smrg gs->prog_data.gs->base.single_seg; 17117ec681f3Smrg shader.geometry_render_mode_shader_propagate_nans = true; 17127ec681f3Smrg shader.geometry_render_mode_shader_uniforms_address = 17137ec681f3Smrg gs_render_uniforms; 17147ec681f3Smrg } 17157ec681f3Smrg} 17167ec681f3Smrg 17177ec681f3Smrgstatic uint8_t 17187ec681f3Smrgv3d_gs_output_primitive(uint32_t prim_type) 17197ec681f3Smrg{ 17207ec681f3Smrg switch (prim_type) { 17217ec681f3Smrg case GL_POINTS: 17227ec681f3Smrg return GEOMETRY_SHADER_POINTS; 17237ec681f3Smrg case GL_LINE_STRIP: 17247ec681f3Smrg return GEOMETRY_SHADER_LINE_STRIP; 17257ec681f3Smrg case GL_TRIANGLE_STRIP: 17267ec681f3Smrg return GEOMETRY_SHADER_TRI_STRIP; 17277ec681f3Smrg default: 17287ec681f3Smrg unreachable("Unsupported primitive type"); 17297ec681f3Smrg } 17307ec681f3Smrg} 17317ec681f3Smrg 17327ec681f3Smrgstatic void 17337ec681f3Smrgemit_tes_gs_common_params(struct v3dv_job *job, 17347ec681f3Smrg uint8_t gs_out_prim_type, 17357ec681f3Smrg uint8_t gs_num_invocations) 17367ec681f3Smrg{ 17377ec681f3Smrg cl_emit(&job->indirect, TESSELLATION_GEOMETRY_COMMON_PARAMS, shader) { 17387ec681f3Smrg shader.tessellation_type = TESSELLATION_TYPE_TRIANGLE; 17397ec681f3Smrg shader.tessellation_point_mode = false; 17407ec681f3Smrg shader.tessellation_edge_spacing = TESSELLATION_EDGE_SPACING_EVEN; 17417ec681f3Smrg shader.tessellation_clockwise = true; 17427ec681f3Smrg shader.tessellation_invocations = 1; 17437ec681f3Smrg 17447ec681f3Smrg shader.geometry_shader_output_format = 17457ec681f3Smrg v3d_gs_output_primitive(gs_out_prim_type); 17467ec681f3Smrg shader.geometry_shader_instances = gs_num_invocations & 0x1F; 17477ec681f3Smrg } 17487ec681f3Smrg} 17497ec681f3Smrg 17507ec681f3Smrgstatic uint8_t 17517ec681f3Smrgsimd_width_to_gs_pack_mode(uint32_t width) 17527ec681f3Smrg{ 17537ec681f3Smrg switch (width) { 17547ec681f3Smrg case 16: 17557ec681f3Smrg return V3D_PACK_MODE_16_WAY; 17567ec681f3Smrg case 8: 17577ec681f3Smrg return V3D_PACK_MODE_8_WAY; 17587ec681f3Smrg case 4: 17597ec681f3Smrg return V3D_PACK_MODE_4_WAY; 17607ec681f3Smrg case 1: 17617ec681f3Smrg return V3D_PACK_MODE_1_WAY; 17627ec681f3Smrg default: 17637ec681f3Smrg unreachable("Invalid SIMD width"); 17647ec681f3Smrg }; 17657ec681f3Smrg} 17667ec681f3Smrg 17677ec681f3Smrgstatic void 17687ec681f3Smrgemit_tes_gs_shader_params(struct v3dv_job *job, 17697ec681f3Smrg uint32_t gs_simd, 17707ec681f3Smrg uint32_t gs_vpm_output_size, 17717ec681f3Smrg uint32_t gs_max_vpm_input_size_per_batch) 17727ec681f3Smrg{ 17737ec681f3Smrg cl_emit(&job->indirect, TESSELLATION_GEOMETRY_SHADER_PARAMS, shader) { 17747ec681f3Smrg shader.tcs_batch_flush_mode = V3D_TCS_FLUSH_MODE_FULLY_PACKED; 17757ec681f3Smrg shader.per_patch_data_column_depth = 1; 17767ec681f3Smrg shader.tcs_output_segment_size_in_sectors = 1; 17777ec681f3Smrg shader.tcs_output_segment_pack_mode = V3D_PACK_MODE_16_WAY; 17787ec681f3Smrg shader.tes_output_segment_size_in_sectors = 1; 17797ec681f3Smrg shader.tes_output_segment_pack_mode = V3D_PACK_MODE_16_WAY; 17807ec681f3Smrg shader.gs_output_segment_size_in_sectors = gs_vpm_output_size; 17817ec681f3Smrg shader.gs_output_segment_pack_mode = 17827ec681f3Smrg simd_width_to_gs_pack_mode(gs_simd); 17837ec681f3Smrg shader.tbg_max_patches_per_tcs_batch = 1; 17847ec681f3Smrg shader.tbg_max_extra_vertex_segs_for_patches_after_first = 0; 17857ec681f3Smrg shader.tbg_min_tcs_output_segments_required_in_play = 1; 17867ec681f3Smrg shader.tbg_min_per_patch_data_segments_required_in_play = 1; 17877ec681f3Smrg shader.tpg_max_patches_per_tes_batch = 1; 17887ec681f3Smrg shader.tpg_max_vertex_segments_per_tes_batch = 0; 17897ec681f3Smrg shader.tpg_max_tcs_output_segments_per_tes_batch = 1; 17907ec681f3Smrg shader.tpg_min_tes_output_segments_required_in_play = 1; 17917ec681f3Smrg shader.gbg_max_tes_output_vertex_segments_per_gs_batch = 17927ec681f3Smrg gs_max_vpm_input_size_per_batch; 17937ec681f3Smrg shader.gbg_min_gs_output_segments_required_in_play = 1; 17947ec681f3Smrg } 17957ec681f3Smrg} 17967ec681f3Smrg 17977ec681f3Smrgvoid 17987ec681f3Smrgv3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer) 17997ec681f3Smrg{ 18007ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 18017ec681f3Smrg assert(job); 18027ec681f3Smrg 18037ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 18047ec681f3Smrg struct v3dv_pipeline *pipeline = state->gfx.pipeline; 18057ec681f3Smrg assert(pipeline); 18067ec681f3Smrg 18077ec681f3Smrg struct v3dv_shader_variant *vs_variant = 18087ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 18097ec681f3Smrg struct v3d_vs_prog_data *prog_data_vs = vs_variant->prog_data.vs; 18107ec681f3Smrg 18117ec681f3Smrg struct v3dv_shader_variant *vs_bin_variant = 18127ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; 18137ec681f3Smrg struct v3d_vs_prog_data *prog_data_vs_bin = vs_bin_variant->prog_data.vs; 18147ec681f3Smrg 18157ec681f3Smrg struct v3dv_shader_variant *fs_variant = 18167ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 18177ec681f3Smrg struct v3d_fs_prog_data *prog_data_fs = fs_variant->prog_data.fs; 18187ec681f3Smrg 18197ec681f3Smrg struct v3dv_shader_variant *gs_variant = NULL; 18207ec681f3Smrg struct v3dv_shader_variant *gs_bin_variant = NULL; 18217ec681f3Smrg struct v3d_gs_prog_data *prog_data_gs = NULL; 18227ec681f3Smrg struct v3d_gs_prog_data *prog_data_gs_bin = NULL; 18237ec681f3Smrg if (pipeline->has_gs) { 18247ec681f3Smrg gs_variant = 18257ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 18267ec681f3Smrg prog_data_gs = gs_variant->prog_data.gs; 18277ec681f3Smrg 18287ec681f3Smrg gs_bin_variant = 18297ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 18307ec681f3Smrg prog_data_gs_bin = gs_bin_variant->prog_data.gs; 18317ec681f3Smrg } 18327ec681f3Smrg 18337ec681f3Smrg /* Update the cache dirty flag based on the shader progs data */ 18347ec681f3Smrg job->tmu_dirty_rcl |= prog_data_vs_bin->base.tmu_dirty_rcl; 18357ec681f3Smrg job->tmu_dirty_rcl |= prog_data_vs->base.tmu_dirty_rcl; 18367ec681f3Smrg job->tmu_dirty_rcl |= prog_data_fs->base.tmu_dirty_rcl; 18377ec681f3Smrg if (pipeline->has_gs) { 18387ec681f3Smrg job->tmu_dirty_rcl |= prog_data_gs_bin->base.tmu_dirty_rcl; 18397ec681f3Smrg job->tmu_dirty_rcl |= prog_data_gs->base.tmu_dirty_rcl; 18407ec681f3Smrg } 18417ec681f3Smrg 18427ec681f3Smrg /* See GFXH-930 workaround below */ 18437ec681f3Smrg uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1); 18447ec681f3Smrg 18457ec681f3Smrg uint32_t shader_state_record_length = 18467ec681f3Smrg cl_packet_length(GL_SHADER_STATE_RECORD); 18477ec681f3Smrg if (pipeline->has_gs) { 18487ec681f3Smrg shader_state_record_length += 18497ec681f3Smrg cl_packet_length(GEOMETRY_SHADER_STATE_RECORD) + 18507ec681f3Smrg cl_packet_length(TESSELLATION_GEOMETRY_COMMON_PARAMS) + 18517ec681f3Smrg 2 * cl_packet_length(TESSELLATION_GEOMETRY_SHADER_PARAMS); 18527ec681f3Smrg } 18537ec681f3Smrg 18547ec681f3Smrg uint32_t shader_rec_offset = 18557ec681f3Smrg v3dv_cl_ensure_space(&job->indirect, 18567ec681f3Smrg shader_state_record_length + 18577ec681f3Smrg num_elements_to_emit * 18587ec681f3Smrg cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), 18597ec681f3Smrg 32); 18607ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 18617ec681f3Smrg 18627ec681f3Smrg struct v3dv_bo *assembly_bo = pipeline->shared_data->assembly_bo; 18637ec681f3Smrg 18647ec681f3Smrg if (pipeline->has_gs) { 18657ec681f3Smrg emit_gs_shader_state_record(job, 18667ec681f3Smrg assembly_bo, 18677ec681f3Smrg gs_bin_variant, 18687ec681f3Smrg cmd_buffer->state.uniforms.gs_bin, 18697ec681f3Smrg gs_variant, 18707ec681f3Smrg cmd_buffer->state.uniforms.gs); 18717ec681f3Smrg 18727ec681f3Smrg emit_tes_gs_common_params(job, 18737ec681f3Smrg prog_data_gs->out_prim_type, 18747ec681f3Smrg prog_data_gs->num_invocations); 18757ec681f3Smrg 18767ec681f3Smrg emit_tes_gs_shader_params(job, 18777ec681f3Smrg pipeline->vpm_cfg_bin.gs_width, 18787ec681f3Smrg pipeline->vpm_cfg_bin.Gd, 18797ec681f3Smrg pipeline->vpm_cfg_bin.Gv); 18807ec681f3Smrg 18817ec681f3Smrg emit_tes_gs_shader_params(job, 18827ec681f3Smrg pipeline->vpm_cfg.gs_width, 18837ec681f3Smrg pipeline->vpm_cfg.Gd, 18847ec681f3Smrg pipeline->vpm_cfg.Gv); 18857ec681f3Smrg } 18867ec681f3Smrg 18877ec681f3Smrg struct v3dv_bo *default_attribute_values = 18887ec681f3Smrg pipeline->default_attribute_values != NULL ? 18897ec681f3Smrg pipeline->default_attribute_values : 18907ec681f3Smrg pipeline->device->default_attribute_float; 18917ec681f3Smrg 18927ec681f3Smrg cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD, 18937ec681f3Smrg pipeline->shader_state_record, shader) { 18947ec681f3Smrg 18957ec681f3Smrg /* FIXME: we are setting this values here and during the 18967ec681f3Smrg * prepacking. This is because both cl_emit_with_prepacked and v3dvx_pack 18977ec681f3Smrg * asserts for minimum values of these. It would be good to get 18987ec681f3Smrg * v3dvx_pack to assert on the final value if possible 18997ec681f3Smrg */ 19007ec681f3Smrg shader.min_coord_shader_input_segments_required_in_play = 19017ec681f3Smrg pipeline->vpm_cfg_bin.As; 19027ec681f3Smrg shader.min_vertex_shader_input_segments_required_in_play = 19037ec681f3Smrg pipeline->vpm_cfg.As; 19047ec681f3Smrg 19057ec681f3Smrg shader.coordinate_shader_code_address = 19067ec681f3Smrg v3dv_cl_address(assembly_bo, vs_bin_variant->assembly_offset); 19077ec681f3Smrg shader.vertex_shader_code_address = 19087ec681f3Smrg v3dv_cl_address(assembly_bo, vs_variant->assembly_offset); 19097ec681f3Smrg shader.fragment_shader_code_address = 19107ec681f3Smrg v3dv_cl_address(assembly_bo, fs_variant->assembly_offset); 19117ec681f3Smrg 19127ec681f3Smrg shader.coordinate_shader_uniforms_address = cmd_buffer->state.uniforms.vs_bin; 19137ec681f3Smrg shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs; 19147ec681f3Smrg shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs; 19157ec681f3Smrg 19167ec681f3Smrg shader.address_of_default_attribute_values = 19177ec681f3Smrg v3dv_cl_address(default_attribute_values, 0); 19187ec681f3Smrg 19197ec681f3Smrg shader.any_shader_reads_hardware_written_primitive_id = 19207ec681f3Smrg (pipeline->has_gs && prog_data_gs->uses_pid) || prog_data_fs->uses_pid; 19217ec681f3Smrg shader.insert_primitive_id_as_first_varying_to_fragment_shader = 19227ec681f3Smrg !pipeline->has_gs && prog_data_fs->uses_pid; 19237ec681f3Smrg } 19247ec681f3Smrg 19257ec681f3Smrg /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */ 19267ec681f3Smrg bool cs_loaded_any = false; 19277ec681f3Smrg const bool cs_uses_builtins = prog_data_vs_bin->uses_iid || 19287ec681f3Smrg prog_data_vs_bin->uses_biid || 19297ec681f3Smrg prog_data_vs_bin->uses_vid; 19307ec681f3Smrg const uint32_t packet_length = 19317ec681f3Smrg cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); 19327ec681f3Smrg 19337ec681f3Smrg uint32_t emitted_va_count = 0; 19347ec681f3Smrg for (uint32_t i = 0; emitted_va_count < pipeline->va_count; i++) { 19357ec681f3Smrg assert(i < MAX_VERTEX_ATTRIBS); 19367ec681f3Smrg 19377ec681f3Smrg if (pipeline->va[i].vk_format == VK_FORMAT_UNDEFINED) 19387ec681f3Smrg continue; 19397ec681f3Smrg 19407ec681f3Smrg const uint32_t binding = pipeline->va[i].binding; 19417ec681f3Smrg 19427ec681f3Smrg /* We store each vertex attribute in the array using its driver location 19437ec681f3Smrg * as index. 19447ec681f3Smrg */ 19457ec681f3Smrg const uint32_t location = i; 19467ec681f3Smrg 19477ec681f3Smrg struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding]; 19487ec681f3Smrg 19497ec681f3Smrg cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, 19507ec681f3Smrg &pipeline->vertex_attrs[i * packet_length], attr) { 19517ec681f3Smrg 19527ec681f3Smrg assert(c_vb->buffer->mem->bo); 19537ec681f3Smrg attr.address = v3dv_cl_address(c_vb->buffer->mem->bo, 19547ec681f3Smrg c_vb->buffer->mem_offset + 19557ec681f3Smrg pipeline->va[i].offset + 19567ec681f3Smrg c_vb->offset); 19577ec681f3Smrg 19587ec681f3Smrg attr.number_of_values_read_by_coordinate_shader = 19597ec681f3Smrg prog_data_vs_bin->vattr_sizes[location]; 19607ec681f3Smrg attr.number_of_values_read_by_vertex_shader = 19617ec681f3Smrg prog_data_vs->vattr_sizes[location]; 19627ec681f3Smrg 19637ec681f3Smrg /* GFXH-930: At least one attribute must be enabled and read by CS 19647ec681f3Smrg * and VS. If we have attributes being consumed by the VS but not 19657ec681f3Smrg * the CS, then set up a dummy load of the last attribute into the 19667ec681f3Smrg * CS's VPM inputs. (Since CS is just dead-code-elimination compared 19677ec681f3Smrg * to VS, we can't have CS loading but not VS). 19687ec681f3Smrg * 19697ec681f3Smrg * GFXH-1602: first attribute must be active if using builtins. 19707ec681f3Smrg */ 19717ec681f3Smrg if (prog_data_vs_bin->vattr_sizes[location]) 19727ec681f3Smrg cs_loaded_any = true; 19737ec681f3Smrg 19747ec681f3Smrg if (i == 0 && cs_uses_builtins && !cs_loaded_any) { 19757ec681f3Smrg attr.number_of_values_read_by_coordinate_shader = 1; 19767ec681f3Smrg cs_loaded_any = true; 19777ec681f3Smrg } else if (i == pipeline->va_count - 1 && !cs_loaded_any) { 19787ec681f3Smrg attr.number_of_values_read_by_coordinate_shader = 1; 19797ec681f3Smrg cs_loaded_any = true; 19807ec681f3Smrg } 19817ec681f3Smrg 19827ec681f3Smrg attr.maximum_index = 0xffffff; 19837ec681f3Smrg } 19847ec681f3Smrg 19857ec681f3Smrg emitted_va_count++; 19867ec681f3Smrg } 19877ec681f3Smrg 19887ec681f3Smrg if (pipeline->va_count == 0) { 19897ec681f3Smrg /* GFXH-930: At least one attribute must be enabled and read 19907ec681f3Smrg * by CS and VS. If we have no attributes being consumed by 19917ec681f3Smrg * the shader, set up a dummy to be loaded into the VPM. 19927ec681f3Smrg */ 19937ec681f3Smrg cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { 19947ec681f3Smrg /* Valid address of data whose value will be unused. */ 19957ec681f3Smrg attr.address = v3dv_cl_address(job->indirect.bo, 0); 19967ec681f3Smrg 19977ec681f3Smrg attr.type = ATTRIBUTE_FLOAT; 19987ec681f3Smrg attr.stride = 0; 19997ec681f3Smrg attr.vec_size = 1; 20007ec681f3Smrg 20017ec681f3Smrg attr.number_of_values_read_by_coordinate_shader = 1; 20027ec681f3Smrg attr.number_of_values_read_by_vertex_shader = 1; 20037ec681f3Smrg } 20047ec681f3Smrg } 20057ec681f3Smrg 20067ec681f3Smrg if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) { 20077ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, 20087ec681f3Smrg sizeof(pipeline->vcm_cache_size)); 20097ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 20107ec681f3Smrg 20117ec681f3Smrg cl_emit_prepacked(&job->bcl, &pipeline->vcm_cache_size); 20127ec681f3Smrg } 20137ec681f3Smrg 20147ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, 20157ec681f3Smrg cl_packet_length(GL_SHADER_STATE)); 20167ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 20177ec681f3Smrg 20187ec681f3Smrg if (pipeline->has_gs) { 20197ec681f3Smrg cl_emit(&job->bcl, GL_SHADER_STATE_INCLUDING_GS, state) { 20207ec681f3Smrg state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset); 20217ec681f3Smrg state.number_of_attribute_arrays = num_elements_to_emit; 20227ec681f3Smrg } 20237ec681f3Smrg } else { 20247ec681f3Smrg cl_emit(&job->bcl, GL_SHADER_STATE, state) { 20257ec681f3Smrg state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset); 20267ec681f3Smrg state.number_of_attribute_arrays = num_elements_to_emit; 20277ec681f3Smrg } 20287ec681f3Smrg } 20297ec681f3Smrg 20307ec681f3Smrg cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_VERTEX_BUFFER | 20317ec681f3Smrg V3DV_CMD_DIRTY_DESCRIPTOR_SETS | 20327ec681f3Smrg V3DV_CMD_DIRTY_PUSH_CONSTANTS); 20337ec681f3Smrg cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS; 20347ec681f3Smrg cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS; 20357ec681f3Smrg} 20367ec681f3Smrg 20377ec681f3Smrg/* FIXME: C&P from v3dx_draw. Refactor to common place? */ 20387ec681f3Smrgstatic uint32_t 20397ec681f3Smrgv3d_hw_prim_type(enum pipe_prim_type prim_type) 20407ec681f3Smrg{ 20417ec681f3Smrg switch (prim_type) { 20427ec681f3Smrg case PIPE_PRIM_POINTS: 20437ec681f3Smrg case PIPE_PRIM_LINES: 20447ec681f3Smrg case PIPE_PRIM_LINE_LOOP: 20457ec681f3Smrg case PIPE_PRIM_LINE_STRIP: 20467ec681f3Smrg case PIPE_PRIM_TRIANGLES: 20477ec681f3Smrg case PIPE_PRIM_TRIANGLE_STRIP: 20487ec681f3Smrg case PIPE_PRIM_TRIANGLE_FAN: 20497ec681f3Smrg return prim_type; 20507ec681f3Smrg 20517ec681f3Smrg case PIPE_PRIM_LINES_ADJACENCY: 20527ec681f3Smrg case PIPE_PRIM_LINE_STRIP_ADJACENCY: 20537ec681f3Smrg case PIPE_PRIM_TRIANGLES_ADJACENCY: 20547ec681f3Smrg case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: 20557ec681f3Smrg return 8 + (prim_type - PIPE_PRIM_LINES_ADJACENCY); 20567ec681f3Smrg 20577ec681f3Smrg default: 20587ec681f3Smrg unreachable("Unsupported primitive type"); 20597ec681f3Smrg } 20607ec681f3Smrg} 20617ec681f3Smrg 20627ec681f3Smrgvoid 20637ec681f3Smrgv3dX(cmd_buffer_emit_draw)(struct v3dv_cmd_buffer *cmd_buffer, 20647ec681f3Smrg struct v3dv_draw_info *info) 20657ec681f3Smrg{ 20667ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 20677ec681f3Smrg assert(job); 20687ec681f3Smrg 20697ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 20707ec681f3Smrg struct v3dv_pipeline *pipeline = state->gfx.pipeline; 20717ec681f3Smrg 20727ec681f3Smrg assert(pipeline); 20737ec681f3Smrg 20747ec681f3Smrg uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); 20757ec681f3Smrg 20767ec681f3Smrg if (info->first_instance > 0) { 20777ec681f3Smrg v3dv_cl_ensure_space_with_branch( 20787ec681f3Smrg &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE)); 20797ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 20807ec681f3Smrg 20817ec681f3Smrg cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { 20827ec681f3Smrg base.base_instance = info->first_instance; 20837ec681f3Smrg base.base_vertex = 0; 20847ec681f3Smrg } 20857ec681f3Smrg } 20867ec681f3Smrg 20877ec681f3Smrg if (info->instance_count > 1) { 20887ec681f3Smrg v3dv_cl_ensure_space_with_branch( 20897ec681f3Smrg &job->bcl, cl_packet_length(VERTEX_ARRAY_INSTANCED_PRIMS)); 20907ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 20917ec681f3Smrg 20927ec681f3Smrg cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) { 20937ec681f3Smrg prim.mode = hw_prim_type; 20947ec681f3Smrg prim.index_of_first_vertex = info->first_vertex; 20957ec681f3Smrg prim.number_of_instances = info->instance_count; 20967ec681f3Smrg prim.instance_length = info->vertex_count; 20977ec681f3Smrg } 20987ec681f3Smrg } else { 20997ec681f3Smrg v3dv_cl_ensure_space_with_branch( 21007ec681f3Smrg &job->bcl, cl_packet_length(VERTEX_ARRAY_PRIMS)); 21017ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 21027ec681f3Smrg cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) { 21037ec681f3Smrg prim.mode = hw_prim_type; 21047ec681f3Smrg prim.length = info->vertex_count; 21057ec681f3Smrg prim.index_of_first_vertex = info->first_vertex; 21067ec681f3Smrg } 21077ec681f3Smrg } 21087ec681f3Smrg} 21097ec681f3Smrg 21107ec681f3Smrgvoid 21117ec681f3Smrgv3dX(cmd_buffer_emit_index_buffer)(struct v3dv_cmd_buffer *cmd_buffer) 21127ec681f3Smrg{ 21137ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 21147ec681f3Smrg assert(job); 21157ec681f3Smrg 21167ec681f3Smrg /* We flag all state as dirty when we create a new job so make sure we 21177ec681f3Smrg * have a valid index buffer before attempting to emit state for it. 21187ec681f3Smrg */ 21197ec681f3Smrg struct v3dv_buffer *ibuffer = 21207ec681f3Smrg v3dv_buffer_from_handle(cmd_buffer->state.index_buffer.buffer); 21217ec681f3Smrg if (ibuffer) { 21227ec681f3Smrg v3dv_cl_ensure_space_with_branch( 21237ec681f3Smrg &job->bcl, cl_packet_length(INDEX_BUFFER_SETUP)); 21247ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 21257ec681f3Smrg 21267ec681f3Smrg const uint32_t offset = cmd_buffer->state.index_buffer.offset; 21277ec681f3Smrg cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) { 21287ec681f3Smrg ib.address = v3dv_cl_address(ibuffer->mem->bo, 21297ec681f3Smrg ibuffer->mem_offset + offset); 21307ec681f3Smrg ib.size = ibuffer->mem->bo->size; 21317ec681f3Smrg } 21327ec681f3Smrg } 21337ec681f3Smrg 21347ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_INDEX_BUFFER; 21357ec681f3Smrg} 21367ec681f3Smrg 21377ec681f3Smrgvoid 21387ec681f3Smrgv3dX(cmd_buffer_emit_draw_indexed)(struct v3dv_cmd_buffer *cmd_buffer, 21397ec681f3Smrg uint32_t indexCount, 21407ec681f3Smrg uint32_t instanceCount, 21417ec681f3Smrg uint32_t firstIndex, 21427ec681f3Smrg int32_t vertexOffset, 21437ec681f3Smrg uint32_t firstInstance) 21447ec681f3Smrg{ 21457ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 21467ec681f3Smrg assert(job); 21477ec681f3Smrg 21487ec681f3Smrg const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 21497ec681f3Smrg uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); 21507ec681f3Smrg uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1; 21517ec681f3Smrg uint32_t index_offset = firstIndex * cmd_buffer->state.index_buffer.index_size; 21527ec681f3Smrg 21537ec681f3Smrg if (vertexOffset != 0 || firstInstance != 0) { 21547ec681f3Smrg v3dv_cl_ensure_space_with_branch( 21557ec681f3Smrg &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE)); 21567ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 21577ec681f3Smrg 21587ec681f3Smrg cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { 21597ec681f3Smrg base.base_instance = firstInstance; 21607ec681f3Smrg base.base_vertex = vertexOffset; 21617ec681f3Smrg } 21627ec681f3Smrg } 21637ec681f3Smrg 21647ec681f3Smrg if (instanceCount == 1) { 21657ec681f3Smrg v3dv_cl_ensure_space_with_branch( 21667ec681f3Smrg &job->bcl, cl_packet_length(INDEXED_PRIM_LIST)); 21677ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 21687ec681f3Smrg 21697ec681f3Smrg cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) { 21707ec681f3Smrg prim.index_type = index_type; 21717ec681f3Smrg prim.length = indexCount; 21727ec681f3Smrg prim.index_offset = index_offset; 21737ec681f3Smrg prim.mode = hw_prim_type; 21747ec681f3Smrg prim.enable_primitive_restarts = pipeline->primitive_restart; 21757ec681f3Smrg } 21767ec681f3Smrg } else if (instanceCount > 1) { 21777ec681f3Smrg v3dv_cl_ensure_space_with_branch( 21787ec681f3Smrg &job->bcl, cl_packet_length(INDEXED_INSTANCED_PRIM_LIST)); 21797ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 21807ec681f3Smrg 21817ec681f3Smrg cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) { 21827ec681f3Smrg prim.index_type = index_type; 21837ec681f3Smrg prim.index_offset = index_offset; 21847ec681f3Smrg prim.mode = hw_prim_type; 21857ec681f3Smrg prim.enable_primitive_restarts = pipeline->primitive_restart; 21867ec681f3Smrg prim.number_of_instances = instanceCount; 21877ec681f3Smrg prim.instance_length = indexCount; 21887ec681f3Smrg } 21897ec681f3Smrg } 21907ec681f3Smrg} 21917ec681f3Smrg 21927ec681f3Smrgvoid 21937ec681f3Smrgv3dX(cmd_buffer_emit_draw_indirect)(struct v3dv_cmd_buffer *cmd_buffer, 21947ec681f3Smrg struct v3dv_buffer *buffer, 21957ec681f3Smrg VkDeviceSize offset, 21967ec681f3Smrg uint32_t drawCount, 21977ec681f3Smrg uint32_t stride) 21987ec681f3Smrg{ 21997ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 22007ec681f3Smrg assert(job); 22017ec681f3Smrg 22027ec681f3Smrg const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 22037ec681f3Smrg uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); 22047ec681f3Smrg 22057ec681f3Smrg v3dv_cl_ensure_space_with_branch( 22067ec681f3Smrg &job->bcl, cl_packet_length(INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS)); 22077ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 22087ec681f3Smrg 22097ec681f3Smrg cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) { 22107ec681f3Smrg prim.mode = hw_prim_type; 22117ec681f3Smrg prim.number_of_draw_indirect_array_records = drawCount; 22127ec681f3Smrg prim.stride_in_multiples_of_4_bytes = stride >> 2; 22137ec681f3Smrg prim.address = v3dv_cl_address(buffer->mem->bo, 22147ec681f3Smrg buffer->mem_offset + offset); 22157ec681f3Smrg } 22167ec681f3Smrg} 22177ec681f3Smrg 22187ec681f3Smrgvoid 22197ec681f3Smrgv3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer, 22207ec681f3Smrg struct v3dv_buffer *buffer, 22217ec681f3Smrg VkDeviceSize offset, 22227ec681f3Smrg uint32_t drawCount, 22237ec681f3Smrg uint32_t stride) 22247ec681f3Smrg{ 22257ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 22267ec681f3Smrg assert(job); 22277ec681f3Smrg 22287ec681f3Smrg const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 22297ec681f3Smrg uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); 22307ec681f3Smrg uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1; 22317ec681f3Smrg 22327ec681f3Smrg v3dv_cl_ensure_space_with_branch( 22337ec681f3Smrg &job->bcl, cl_packet_length(INDIRECT_INDEXED_INSTANCED_PRIM_LIST)); 22347ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 22357ec681f3Smrg 22367ec681f3Smrg cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) { 22377ec681f3Smrg prim.index_type = index_type; 22387ec681f3Smrg prim.mode = hw_prim_type; 22397ec681f3Smrg prim.enable_primitive_restarts = pipeline->primitive_restart; 22407ec681f3Smrg prim.number_of_draw_indirect_indexed_records = drawCount; 22417ec681f3Smrg prim.stride_in_multiples_of_4_bytes = stride >> 2; 22427ec681f3Smrg prim.address = v3dv_cl_address(buffer->mem->bo, 22437ec681f3Smrg buffer->mem_offset + offset); 22447ec681f3Smrg } 22457ec681f3Smrg} 22467ec681f3Smrg 22477ec681f3Smrgvoid 22487ec681f3Smrgv3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer, 22497ec681f3Smrg int rt, 22507ec681f3Smrg uint32_t *rt_bpp, 22517ec681f3Smrg uint32_t *rt_type, 22527ec681f3Smrg uint32_t *rt_clamp) 22537ec681f3Smrg{ 22547ec681f3Smrg const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 22557ec681f3Smrg 22567ec681f3Smrg assert(state->subpass_idx < state->pass->subpass_count); 22577ec681f3Smrg const struct v3dv_subpass *subpass = 22587ec681f3Smrg &state->pass->subpasses[state->subpass_idx]; 22597ec681f3Smrg 22607ec681f3Smrg if (rt >= subpass->color_count) 22617ec681f3Smrg return; 22627ec681f3Smrg 22637ec681f3Smrg struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt]; 22647ec681f3Smrg const uint32_t attachment_idx = attachment->attachment; 22657ec681f3Smrg if (attachment_idx == VK_ATTACHMENT_UNUSED) 22667ec681f3Smrg return; 22677ec681f3Smrg 22687ec681f3Smrg const struct v3dv_framebuffer *framebuffer = state->framebuffer; 22697ec681f3Smrg assert(attachment_idx < framebuffer->attachment_count); 22707ec681f3Smrg struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx]; 22717ec681f3Smrg assert(iview->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT); 22727ec681f3Smrg 22737ec681f3Smrg *rt_bpp = iview->internal_bpp; 22747ec681f3Smrg *rt_type = iview->internal_type; 22757ec681f3Smrg if (vk_format_is_int(iview->vk.format)) 22767ec681f3Smrg *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT; 22777ec681f3Smrg else if (vk_format_is_srgb(iview->vk.format)) 22787ec681f3Smrg *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM; 22797ec681f3Smrg else 22807ec681f3Smrg *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; 22817ec681f3Smrg} 2282