1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016 Red Hat. 3b8e80941Smrg * Copyright © 2016 Bas Nieuwenhuizen 4b8e80941Smrg * 5b8e80941Smrg * based in part on anv driver which is: 6b8e80941Smrg * Copyright © 2015 Intel Corporation 7b8e80941Smrg * 8b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 9b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 10b8e80941Smrg * to deal in the Software without restriction, including without limitation 11b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 13b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 14b8e80941Smrg * 15b8e80941Smrg * The above copyright notice and this permission notice (including the next 16b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 17b8e80941Smrg * Software. 18b8e80941Smrg * 19b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25b8e80941Smrg * IN THE SOFTWARE. 26b8e80941Smrg */ 27b8e80941Smrg 28b8e80941Smrg#include "radv_private.h" 29b8e80941Smrg#include "radv_radeon_winsys.h" 30b8e80941Smrg#include "radv_shader.h" 31b8e80941Smrg#include "radv_cs.h" 32b8e80941Smrg#include "sid.h" 33b8e80941Smrg#include "gfx9d.h" 34b8e80941Smrg#include "vk_format.h" 35b8e80941Smrg#include "radv_debug.h" 36b8e80941Smrg#include "radv_meta.h" 37b8e80941Smrg 38b8e80941Smrg#include "ac_debug.h" 39b8e80941Smrg 40b8e80941Smrgenum { 41b8e80941Smrg RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0), 42b8e80941Smrg RADV_PREFETCH_VS = (1 << 1), 43b8e80941Smrg RADV_PREFETCH_TCS = (1 << 2), 44b8e80941Smrg RADV_PREFETCH_TES = (1 << 3), 45b8e80941Smrg RADV_PREFETCH_GS = (1 << 4), 46b8e80941Smrg RADV_PREFETCH_PS = (1 << 5), 47b8e80941Smrg RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | 48b8e80941Smrg RADV_PREFETCH_TCS | 49b8e80941Smrg RADV_PREFETCH_TES | 50b8e80941Smrg RADV_PREFETCH_GS | 51b8e80941Smrg RADV_PREFETCH_PS) 52b8e80941Smrg}; 53b8e80941Smrg 54b8e80941Smrgstatic void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, 55b8e80941Smrg struct radv_image *image, 56b8e80941Smrg VkImageLayout src_layout, 57b8e80941Smrg VkImageLayout dst_layout, 58b8e80941Smrg uint32_t src_family, 59b8e80941Smrg uint32_t dst_family, 60b8e80941Smrg const VkImageSubresourceRange *range); 61b8e80941Smrg 62b8e80941Smrgconst struct radv_dynamic_state default_dynamic_state = { 63b8e80941Smrg .viewport = { 64b8e80941Smrg .count = 0, 65b8e80941Smrg }, 66b8e80941Smrg .scissor = { 67b8e80941Smrg .count = 0, 68b8e80941Smrg }, 69b8e80941Smrg .line_width = 1.0f, 70b8e80941Smrg .depth_bias = { 71b8e80941Smrg .bias = 0.0f, 72b8e80941Smrg .clamp = 0.0f, 73b8e80941Smrg .slope = 0.0f, 74b8e80941Smrg }, 75b8e80941Smrg .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, 76b8e80941Smrg .depth_bounds = { 77b8e80941Smrg .min = 0.0f, 78b8e80941Smrg .max = 1.0f, 79b8e80941Smrg }, 80b8e80941Smrg .stencil_compare_mask = { 81b8e80941Smrg .front = ~0u, 82b8e80941Smrg .back = ~0u, 83b8e80941Smrg }, 84b8e80941Smrg .stencil_write_mask = { 85b8e80941Smrg .front = ~0u, 86b8e80941Smrg .back = ~0u, 87b8e80941Smrg }, 88b8e80941Smrg .stencil_reference = { 89b8e80941Smrg .front = 0u, 90b8e80941Smrg .back = 0u, 91b8e80941Smrg }, 92b8e80941Smrg}; 93b8e80941Smrg 94b8e80941Smrgstatic void 95b8e80941Smrgradv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, 96b8e80941Smrg const struct radv_dynamic_state *src) 97b8e80941Smrg{ 98b8e80941Smrg struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic; 99b8e80941Smrg uint32_t copy_mask = src->mask; 100b8e80941Smrg uint32_t dest_mask = 0; 101b8e80941Smrg 102b8e80941Smrg /* Make sure to copy the number of viewports/scissors because they can 103b8e80941Smrg * only be specified at pipeline creation time. 104b8e80941Smrg */ 105b8e80941Smrg dest->viewport.count = src->viewport.count; 106b8e80941Smrg dest->scissor.count = src->scissor.count; 107b8e80941Smrg dest->discard_rectangle.count = src->discard_rectangle.count; 108b8e80941Smrg 109b8e80941Smrg if (copy_mask & RADV_DYNAMIC_VIEWPORT) { 110b8e80941Smrg if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, 111b8e80941Smrg src->viewport.count * sizeof(VkViewport))) { 112b8e80941Smrg typed_memcpy(dest->viewport.viewports, 113b8e80941Smrg src->viewport.viewports, 114b8e80941Smrg src->viewport.count); 115b8e80941Smrg dest_mask |= RADV_DYNAMIC_VIEWPORT; 116b8e80941Smrg } 117b8e80941Smrg } 118b8e80941Smrg 119b8e80941Smrg if (copy_mask & RADV_DYNAMIC_SCISSOR) { 120b8e80941Smrg if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, 121b8e80941Smrg src->scissor.count * sizeof(VkRect2D))) { 122b8e80941Smrg typed_memcpy(dest->scissor.scissors, 123b8e80941Smrg src->scissor.scissors, src->scissor.count); 124b8e80941Smrg dest_mask |= RADV_DYNAMIC_SCISSOR; 125b8e80941Smrg } 126b8e80941Smrg } 127b8e80941Smrg 128b8e80941Smrg if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) { 129b8e80941Smrg if (dest->line_width != src->line_width) { 130b8e80941Smrg dest->line_width = src->line_width; 131b8e80941Smrg dest_mask |= RADV_DYNAMIC_LINE_WIDTH; 132b8e80941Smrg } 133b8e80941Smrg } 134b8e80941Smrg 135b8e80941Smrg if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) { 136b8e80941Smrg if (memcmp(&dest->depth_bias, &src->depth_bias, 137b8e80941Smrg sizeof(src->depth_bias))) { 138b8e80941Smrg dest->depth_bias = src->depth_bias; 139b8e80941Smrg dest_mask |= RADV_DYNAMIC_DEPTH_BIAS; 140b8e80941Smrg } 141b8e80941Smrg } 142b8e80941Smrg 143b8e80941Smrg if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) { 144b8e80941Smrg if (memcmp(&dest->blend_constants, &src->blend_constants, 145b8e80941Smrg sizeof(src->blend_constants))) { 146b8e80941Smrg typed_memcpy(dest->blend_constants, 147b8e80941Smrg src->blend_constants, 4); 148b8e80941Smrg dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS; 149b8e80941Smrg } 150b8e80941Smrg } 151b8e80941Smrg 152b8e80941Smrg if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) { 153b8e80941Smrg if (memcmp(&dest->depth_bounds, &src->depth_bounds, 154b8e80941Smrg sizeof(src->depth_bounds))) { 155b8e80941Smrg dest->depth_bounds = src->depth_bounds; 156b8e80941Smrg dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS; 157b8e80941Smrg } 158b8e80941Smrg } 159b8e80941Smrg 160b8e80941Smrg if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) { 161b8e80941Smrg if (memcmp(&dest->stencil_compare_mask, 162b8e80941Smrg &src->stencil_compare_mask, 163b8e80941Smrg sizeof(src->stencil_compare_mask))) { 164b8e80941Smrg dest->stencil_compare_mask = src->stencil_compare_mask; 165b8e80941Smrg dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK; 166b8e80941Smrg } 167b8e80941Smrg } 168b8e80941Smrg 169b8e80941Smrg if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) { 170b8e80941Smrg if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, 171b8e80941Smrg sizeof(src->stencil_write_mask))) { 172b8e80941Smrg dest->stencil_write_mask = src->stencil_write_mask; 173b8e80941Smrg dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK; 174b8e80941Smrg } 175b8e80941Smrg } 176b8e80941Smrg 177b8e80941Smrg if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) { 178b8e80941Smrg if (memcmp(&dest->stencil_reference, &src->stencil_reference, 179b8e80941Smrg sizeof(src->stencil_reference))) { 180b8e80941Smrg dest->stencil_reference = src->stencil_reference; 181b8e80941Smrg dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE; 182b8e80941Smrg } 183b8e80941Smrg } 184b8e80941Smrg 185b8e80941Smrg if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) { 186b8e80941Smrg if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles, 187b8e80941Smrg src->discard_rectangle.count * sizeof(VkRect2D))) { 188b8e80941Smrg typed_memcpy(dest->discard_rectangle.rectangles, 189b8e80941Smrg src->discard_rectangle.rectangles, 190b8e80941Smrg src->discard_rectangle.count); 191b8e80941Smrg dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE; 192b8e80941Smrg } 193b8e80941Smrg } 194b8e80941Smrg 195b8e80941Smrg cmd_buffer->state.dirty |= dest_mask; 196b8e80941Smrg} 197b8e80941Smrg 198b8e80941Smrgstatic void 199b8e80941Smrgradv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, 200b8e80941Smrg struct radv_pipeline *pipeline) 201b8e80941Smrg{ 202b8e80941Smrg struct radv_streamout_state *so = &cmd_buffer->state.streamout; 203b8e80941Smrg struct radv_shader_info *info; 204b8e80941Smrg 205b8e80941Smrg if (!pipeline->streamout_shader) 206b8e80941Smrg return; 207b8e80941Smrg 208b8e80941Smrg info = &pipeline->streamout_shader->info.info; 209b8e80941Smrg for (int i = 0; i < MAX_SO_BUFFERS; i++) 210b8e80941Smrg so->stride_in_dw[i] = info->so.strides[i]; 211b8e80941Smrg 212b8e80941Smrg so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask; 213b8e80941Smrg} 214b8e80941Smrg 215b8e80941Smrgbool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) 216b8e80941Smrg{ 217b8e80941Smrg return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && 218b8e80941Smrg cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; 219b8e80941Smrg} 220b8e80941Smrg 221b8e80941Smrgenum ring_type radv_queue_family_to_ring(int f) { 222b8e80941Smrg switch (f) { 223b8e80941Smrg case RADV_QUEUE_GENERAL: 224b8e80941Smrg return RING_GFX; 225b8e80941Smrg case RADV_QUEUE_COMPUTE: 226b8e80941Smrg return RING_COMPUTE; 227b8e80941Smrg case RADV_QUEUE_TRANSFER: 228b8e80941Smrg return RING_DMA; 229b8e80941Smrg default: 230b8e80941Smrg unreachable("Unknown queue family"); 231b8e80941Smrg } 232b8e80941Smrg} 233b8e80941Smrg 234b8e80941Smrgstatic VkResult radv_create_cmd_buffer( 235b8e80941Smrg struct radv_device * device, 236b8e80941Smrg struct radv_cmd_pool * pool, 237b8e80941Smrg VkCommandBufferLevel level, 238b8e80941Smrg VkCommandBuffer* pCommandBuffer) 239b8e80941Smrg{ 240b8e80941Smrg struct radv_cmd_buffer *cmd_buffer; 241b8e80941Smrg unsigned ring; 242b8e80941Smrg cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, 243b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 244b8e80941Smrg if (cmd_buffer == NULL) 245b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 246b8e80941Smrg 247b8e80941Smrg cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 248b8e80941Smrg cmd_buffer->device = device; 249b8e80941Smrg cmd_buffer->pool = pool; 250b8e80941Smrg cmd_buffer->level = level; 251b8e80941Smrg 252b8e80941Smrg if (pool) { 253b8e80941Smrg list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); 254b8e80941Smrg cmd_buffer->queue_family_index = pool->queue_family_index; 255b8e80941Smrg 256b8e80941Smrg } else { 257b8e80941Smrg /* Init the pool_link so we can safely call list_del when we destroy 258b8e80941Smrg * the command buffer 259b8e80941Smrg */ 260b8e80941Smrg list_inithead(&cmd_buffer->pool_link); 261b8e80941Smrg cmd_buffer->queue_family_index = RADV_QUEUE_GENERAL; 262b8e80941Smrg } 263b8e80941Smrg 264b8e80941Smrg ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); 265b8e80941Smrg 266b8e80941Smrg cmd_buffer->cs = device->ws->cs_create(device->ws, ring); 267b8e80941Smrg if (!cmd_buffer->cs) { 268b8e80941Smrg vk_free(&cmd_buffer->pool->alloc, cmd_buffer); 269b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 270b8e80941Smrg } 271b8e80941Smrg 272b8e80941Smrg *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer); 273b8e80941Smrg 274b8e80941Smrg list_inithead(&cmd_buffer->upload.list); 275b8e80941Smrg 276b8e80941Smrg return VK_SUCCESS; 277b8e80941Smrg} 278b8e80941Smrg 279b8e80941Smrgstatic void 280b8e80941Smrgradv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer) 281b8e80941Smrg{ 282b8e80941Smrg list_del(&cmd_buffer->pool_link); 283b8e80941Smrg 284b8e80941Smrg list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, 285b8e80941Smrg &cmd_buffer->upload.list, list) { 286b8e80941Smrg cmd_buffer->device->ws->buffer_destroy(up->upload_bo); 287b8e80941Smrg list_del(&up->list); 288b8e80941Smrg free(up); 289b8e80941Smrg } 290b8e80941Smrg 291b8e80941Smrg if (cmd_buffer->upload.upload_bo) 292b8e80941Smrg cmd_buffer->device->ws->buffer_destroy(cmd_buffer->upload.upload_bo); 293b8e80941Smrg cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs); 294b8e80941Smrg 295b8e80941Smrg for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) 296b8e80941Smrg free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr); 297b8e80941Smrg 298b8e80941Smrg vk_free(&cmd_buffer->pool->alloc, cmd_buffer); 299b8e80941Smrg} 300b8e80941Smrg 301b8e80941Smrgstatic VkResult 302b8e80941Smrgradv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) 303b8e80941Smrg{ 304b8e80941Smrg cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); 305b8e80941Smrg 306b8e80941Smrg list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, 307b8e80941Smrg &cmd_buffer->upload.list, list) { 308b8e80941Smrg cmd_buffer->device->ws->buffer_destroy(up->upload_bo); 309b8e80941Smrg list_del(&up->list); 310b8e80941Smrg free(up); 311b8e80941Smrg } 312b8e80941Smrg 313b8e80941Smrg cmd_buffer->push_constant_stages = 0; 314b8e80941Smrg cmd_buffer->scratch_size_needed = 0; 315b8e80941Smrg cmd_buffer->compute_scratch_size_needed = 0; 316b8e80941Smrg cmd_buffer->esgs_ring_size_needed = 0; 317b8e80941Smrg cmd_buffer->gsvs_ring_size_needed = 0; 318b8e80941Smrg cmd_buffer->tess_rings_needed = false; 319b8e80941Smrg cmd_buffer->sample_positions_needed = false; 320b8e80941Smrg 321b8e80941Smrg if (cmd_buffer->upload.upload_bo) 322b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 323b8e80941Smrg cmd_buffer->upload.upload_bo); 324b8e80941Smrg cmd_buffer->upload.offset = 0; 325b8e80941Smrg 326b8e80941Smrg cmd_buffer->record_result = VK_SUCCESS; 327b8e80941Smrg 328b8e80941Smrg memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings)); 329b8e80941Smrg 330b8e80941Smrg for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) { 331b8e80941Smrg cmd_buffer->descriptors[i].dirty = 0; 332b8e80941Smrg cmd_buffer->descriptors[i].valid = 0; 333b8e80941Smrg cmd_buffer->descriptors[i].push_dirty = false; 334b8e80941Smrg } 335b8e80941Smrg 336b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 && 337b8e80941Smrg cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { 338b8e80941Smrg unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends; 339b8e80941Smrg unsigned fence_offset, eop_bug_offset; 340b8e80941Smrg void *fence_ptr; 341b8e80941Smrg 342b8e80941Smrg radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8, &fence_offset, 343b8e80941Smrg &fence_ptr); 344b8e80941Smrg 345b8e80941Smrg cmd_buffer->gfx9_fence_va = 346b8e80941Smrg radv_buffer_get_va(cmd_buffer->upload.upload_bo); 347b8e80941Smrg cmd_buffer->gfx9_fence_va += fence_offset; 348b8e80941Smrg 349b8e80941Smrg /* Allocate a buffer for the EOP bug on GFX9. */ 350b8e80941Smrg radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8, 351b8e80941Smrg &eop_bug_offset, &fence_ptr); 352b8e80941Smrg cmd_buffer->gfx9_eop_bug_va = 353b8e80941Smrg radv_buffer_get_va(cmd_buffer->upload.upload_bo); 354b8e80941Smrg cmd_buffer->gfx9_eop_bug_va += eop_bug_offset; 355b8e80941Smrg } 356b8e80941Smrg 357b8e80941Smrg cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL; 358b8e80941Smrg 359b8e80941Smrg return cmd_buffer->record_result; 360b8e80941Smrg} 361b8e80941Smrg 362b8e80941Smrgstatic bool 363b8e80941Smrgradv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, 364b8e80941Smrg uint64_t min_needed) 365b8e80941Smrg{ 366b8e80941Smrg uint64_t new_size; 367b8e80941Smrg struct radeon_winsys_bo *bo; 368b8e80941Smrg struct radv_cmd_buffer_upload *upload; 369b8e80941Smrg struct radv_device *device = cmd_buffer->device; 370b8e80941Smrg 371b8e80941Smrg new_size = MAX2(min_needed, 16 * 1024); 372b8e80941Smrg new_size = MAX2(new_size, 2 * cmd_buffer->upload.size); 373b8e80941Smrg 374b8e80941Smrg bo = device->ws->buffer_create(device->ws, 375b8e80941Smrg new_size, 4096, 376b8e80941Smrg RADEON_DOMAIN_GTT, 377b8e80941Smrg RADEON_FLAG_CPU_ACCESS| 378b8e80941Smrg RADEON_FLAG_NO_INTERPROCESS_SHARING | 379b8e80941Smrg RADEON_FLAG_32BIT, 380b8e80941Smrg RADV_BO_PRIORITY_UPLOAD_BUFFER); 381b8e80941Smrg 382b8e80941Smrg if (!bo) { 383b8e80941Smrg cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; 384b8e80941Smrg return false; 385b8e80941Smrg } 386b8e80941Smrg 387b8e80941Smrg radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); 388b8e80941Smrg if (cmd_buffer->upload.upload_bo) { 389b8e80941Smrg upload = malloc(sizeof(*upload)); 390b8e80941Smrg 391b8e80941Smrg if (!upload) { 392b8e80941Smrg cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 393b8e80941Smrg device->ws->buffer_destroy(bo); 394b8e80941Smrg return false; 395b8e80941Smrg } 396b8e80941Smrg 397b8e80941Smrg memcpy(upload, &cmd_buffer->upload, sizeof(*upload)); 398b8e80941Smrg list_add(&upload->list, &cmd_buffer->upload.list); 399b8e80941Smrg } 400b8e80941Smrg 401b8e80941Smrg cmd_buffer->upload.upload_bo = bo; 402b8e80941Smrg cmd_buffer->upload.size = new_size; 403b8e80941Smrg cmd_buffer->upload.offset = 0; 404b8e80941Smrg cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo); 405b8e80941Smrg 406b8e80941Smrg if (!cmd_buffer->upload.map) { 407b8e80941Smrg cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; 408b8e80941Smrg return false; 409b8e80941Smrg } 410b8e80941Smrg 411b8e80941Smrg return true; 412b8e80941Smrg} 413b8e80941Smrg 414b8e80941Smrgbool 415b8e80941Smrgradv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, 416b8e80941Smrg unsigned size, 417b8e80941Smrg unsigned alignment, 418b8e80941Smrg unsigned *out_offset, 419b8e80941Smrg void **ptr) 420b8e80941Smrg{ 421b8e80941Smrg assert(util_is_power_of_two_nonzero(alignment)); 422b8e80941Smrg 423b8e80941Smrg uint64_t offset = align(cmd_buffer->upload.offset, alignment); 424b8e80941Smrg if (offset + size > cmd_buffer->upload.size) { 425b8e80941Smrg if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) 426b8e80941Smrg return false; 427b8e80941Smrg offset = 0; 428b8e80941Smrg } 429b8e80941Smrg 430b8e80941Smrg *out_offset = offset; 431b8e80941Smrg *ptr = cmd_buffer->upload.map + offset; 432b8e80941Smrg 433b8e80941Smrg cmd_buffer->upload.offset = offset + size; 434b8e80941Smrg return true; 435b8e80941Smrg} 436b8e80941Smrg 437b8e80941Smrgbool 438b8e80941Smrgradv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, 439b8e80941Smrg unsigned size, unsigned alignment, 440b8e80941Smrg const void *data, unsigned *out_offset) 441b8e80941Smrg{ 442b8e80941Smrg uint8_t *ptr; 443b8e80941Smrg 444b8e80941Smrg if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, alignment, 445b8e80941Smrg out_offset, (void **)&ptr)) 446b8e80941Smrg return false; 447b8e80941Smrg 448b8e80941Smrg if (ptr) 449b8e80941Smrg memcpy(ptr, data, size); 450b8e80941Smrg 451b8e80941Smrg return true; 452b8e80941Smrg} 453b8e80941Smrg 454b8e80941Smrgstatic void 455b8e80941Smrgradv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t va, 456b8e80941Smrg unsigned count, const uint32_t *data) 457b8e80941Smrg{ 458b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 459b8e80941Smrg 460b8e80941Smrg radeon_check_space(cmd_buffer->device->ws, cs, 4 + count); 461b8e80941Smrg 462b8e80941Smrg radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); 463b8e80941Smrg radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | 464b8e80941Smrg S_370_WR_CONFIRM(1) | 465b8e80941Smrg S_370_ENGINE_SEL(V_370_ME)); 466b8e80941Smrg radeon_emit(cs, va); 467b8e80941Smrg radeon_emit(cs, va >> 32); 468b8e80941Smrg radeon_emit_array(cs, data, count); 469b8e80941Smrg} 470b8e80941Smrg 471b8e80941Smrgvoid radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) 472b8e80941Smrg{ 473b8e80941Smrg struct radv_device *device = cmd_buffer->device; 474b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 475b8e80941Smrg uint64_t va; 476b8e80941Smrg 477b8e80941Smrg va = radv_buffer_get_va(device->trace_bo); 478b8e80941Smrg if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) 479b8e80941Smrg va += 4; 480b8e80941Smrg 481b8e80941Smrg ++cmd_buffer->state.trace_id; 482b8e80941Smrg radv_emit_write_data_packet(cmd_buffer, va, 1, 483b8e80941Smrg &cmd_buffer->state.trace_id); 484b8e80941Smrg 485b8e80941Smrg radeon_check_space(cmd_buffer->device->ws, cs, 2); 486b8e80941Smrg 487b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 488b8e80941Smrg radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); 489b8e80941Smrg} 490b8e80941Smrg 491b8e80941Smrgstatic void 492b8e80941Smrgradv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, 493b8e80941Smrg enum radv_cmd_flush_bits flags) 494b8e80941Smrg{ 495b8e80941Smrg if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) { 496b8e80941Smrg assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 497b8e80941Smrg RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); 498b8e80941Smrg 499b8e80941Smrg radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); 500b8e80941Smrg 501b8e80941Smrg /* Force wait for graphics or compute engines to be idle. */ 502b8e80941Smrg si_cs_emit_cache_flush(cmd_buffer->cs, 503b8e80941Smrg cmd_buffer->device->physical_device->rad_info.chip_class, 504b8e80941Smrg &cmd_buffer->gfx9_fence_idx, 505b8e80941Smrg cmd_buffer->gfx9_fence_va, 506b8e80941Smrg radv_cmd_buffer_uses_mec(cmd_buffer), 507b8e80941Smrg flags, cmd_buffer->gfx9_eop_bug_va); 508b8e80941Smrg } 509b8e80941Smrg 510b8e80941Smrg if (unlikely(cmd_buffer->device->trace_bo)) 511b8e80941Smrg radv_cmd_buffer_trace_emit(cmd_buffer); 512b8e80941Smrg} 513b8e80941Smrg 514b8e80941Smrgstatic void 515b8e80941Smrgradv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, 516b8e80941Smrg struct radv_pipeline *pipeline, enum ring_type ring) 517b8e80941Smrg{ 518b8e80941Smrg struct radv_device *device = cmd_buffer->device; 519b8e80941Smrg uint32_t data[2]; 520b8e80941Smrg uint64_t va; 521b8e80941Smrg 522b8e80941Smrg va = radv_buffer_get_va(device->trace_bo); 523b8e80941Smrg 524b8e80941Smrg switch (ring) { 525b8e80941Smrg case RING_GFX: 526b8e80941Smrg va += 8; 527b8e80941Smrg break; 528b8e80941Smrg case RING_COMPUTE: 529b8e80941Smrg va += 16; 530b8e80941Smrg break; 531b8e80941Smrg default: 532b8e80941Smrg assert(!"invalid ring type"); 533b8e80941Smrg } 534b8e80941Smrg 535b8e80941Smrg data[0] = (uintptr_t)pipeline; 536b8e80941Smrg data[1] = (uintptr_t)pipeline >> 32; 537b8e80941Smrg 538b8e80941Smrg radv_emit_write_data_packet(cmd_buffer, va, 2, data); 539b8e80941Smrg} 540b8e80941Smrg 541b8e80941Smrgvoid radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 542b8e80941Smrg VkPipelineBindPoint bind_point, 543b8e80941Smrg struct radv_descriptor_set *set, 544b8e80941Smrg unsigned idx) 545b8e80941Smrg{ 546b8e80941Smrg struct radv_descriptor_state *descriptors_state = 547b8e80941Smrg radv_get_descriptors_state(cmd_buffer, bind_point); 548b8e80941Smrg 549b8e80941Smrg descriptors_state->sets[idx] = set; 550b8e80941Smrg 551b8e80941Smrg descriptors_state->valid |= (1u << idx); /* active descriptors */ 552b8e80941Smrg descriptors_state->dirty |= (1u << idx); 553b8e80941Smrg} 554b8e80941Smrg 555b8e80941Smrgstatic void 556b8e80941Smrgradv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, 557b8e80941Smrg VkPipelineBindPoint bind_point) 558b8e80941Smrg{ 559b8e80941Smrg struct radv_descriptor_state *descriptors_state = 560b8e80941Smrg radv_get_descriptors_state(cmd_buffer, bind_point); 561b8e80941Smrg struct radv_device *device = cmd_buffer->device; 562b8e80941Smrg uint32_t data[MAX_SETS * 2] = {}; 563b8e80941Smrg uint64_t va; 564b8e80941Smrg unsigned i; 565b8e80941Smrg va = radv_buffer_get_va(device->trace_bo) + 24; 566b8e80941Smrg 567b8e80941Smrg for_each_bit(i, descriptors_state->valid) { 568b8e80941Smrg struct radv_descriptor_set *set = descriptors_state->sets[i]; 569b8e80941Smrg data[i * 2] = (uint64_t)(uintptr_t)set; 570b8e80941Smrg data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; 571b8e80941Smrg } 572b8e80941Smrg 573b8e80941Smrg radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data); 574b8e80941Smrg} 575b8e80941Smrg 576b8e80941Smrgstruct radv_userdata_info * 577b8e80941Smrgradv_lookup_user_sgpr(struct radv_pipeline *pipeline, 578b8e80941Smrg gl_shader_stage stage, 579b8e80941Smrg int idx) 580b8e80941Smrg{ 581b8e80941Smrg struct radv_shader_variant *shader = radv_get_shader(pipeline, stage); 582b8e80941Smrg return &shader->info.user_sgprs_locs.shader_data[idx]; 583b8e80941Smrg} 584b8e80941Smrg 585b8e80941Smrgstatic void 586b8e80941Smrgradv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, 587b8e80941Smrg struct radv_pipeline *pipeline, 588b8e80941Smrg gl_shader_stage stage, 589b8e80941Smrg int idx, uint64_t va) 590b8e80941Smrg{ 591b8e80941Smrg struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); 592b8e80941Smrg uint32_t base_reg = pipeline->user_data_0[stage]; 593b8e80941Smrg if (loc->sgpr_idx == -1) 594b8e80941Smrg return; 595b8e80941Smrg 596b8e80941Smrg assert(loc->num_sgprs == 1); 597b8e80941Smrg 598b8e80941Smrg radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, 599b8e80941Smrg base_reg + loc->sgpr_idx * 4, va, false); 600b8e80941Smrg} 601b8e80941Smrg 602b8e80941Smrgstatic void 603b8e80941Smrgradv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, 604b8e80941Smrg struct radv_pipeline *pipeline, 605b8e80941Smrg struct radv_descriptor_state *descriptors_state, 606b8e80941Smrg gl_shader_stage stage) 607b8e80941Smrg{ 608b8e80941Smrg struct radv_device *device = cmd_buffer->device; 609b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 610b8e80941Smrg uint32_t sh_base = pipeline->user_data_0[stage]; 611b8e80941Smrg struct radv_userdata_locations *locs = 612b8e80941Smrg &pipeline->shaders[stage]->info.user_sgprs_locs; 613b8e80941Smrg unsigned mask = locs->descriptor_sets_enabled; 614b8e80941Smrg 615b8e80941Smrg mask &= descriptors_state->dirty & descriptors_state->valid; 616b8e80941Smrg 617b8e80941Smrg while (mask) { 618b8e80941Smrg int start, count; 619b8e80941Smrg 620b8e80941Smrg u_bit_scan_consecutive_range(&mask, &start, &count); 621b8e80941Smrg 622b8e80941Smrg struct radv_userdata_info *loc = &locs->descriptor_sets[start]; 623b8e80941Smrg unsigned sh_offset = sh_base + loc->sgpr_idx * 4; 624b8e80941Smrg 625b8e80941Smrg radv_emit_shader_pointer_head(cs, sh_offset, count, true); 626b8e80941Smrg for (int i = 0; i < count; i++) { 627b8e80941Smrg struct radv_descriptor_set *set = 628b8e80941Smrg descriptors_state->sets[start + i]; 629b8e80941Smrg 630b8e80941Smrg radv_emit_shader_pointer_body(device, cs, set->va, true); 631b8e80941Smrg } 632b8e80941Smrg } 633b8e80941Smrg} 634b8e80941Smrg 635b8e80941Smrgstatic void 636b8e80941Smrgradv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer, 637b8e80941Smrg struct radv_pipeline *pipeline, 638b8e80941Smrg gl_shader_stage stage, 639b8e80941Smrg int idx, int count, uint32_t *values) 640b8e80941Smrg{ 641b8e80941Smrg struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); 642b8e80941Smrg uint32_t base_reg = pipeline->user_data_0[stage]; 643b8e80941Smrg if (loc->sgpr_idx == -1) 644b8e80941Smrg return; 645b8e80941Smrg 646b8e80941Smrg assert(loc->num_sgprs == count); 647b8e80941Smrg 648b8e80941Smrg radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count); 649b8e80941Smrg radeon_emit_array(cmd_buffer->cs, values, count); 650b8e80941Smrg} 651b8e80941Smrg 652b8e80941Smrgstatic void 653b8e80941Smrgradv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, 654b8e80941Smrg struct radv_pipeline *pipeline) 655b8e80941Smrg{ 656b8e80941Smrg int num_samples = pipeline->graphics.ms.num_samples; 657b8e80941Smrg struct radv_multisample_state *ms = &pipeline->graphics.ms; 658b8e80941Smrg struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline; 659b8e80941Smrg 660b8e80941Smrg if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions) 661b8e80941Smrg cmd_buffer->sample_positions_needed = true; 662b8e80941Smrg 663b8e80941Smrg if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples) 664b8e80941Smrg return; 665b8e80941Smrg 666b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2); 667b8e80941Smrg radeon_emit(cmd_buffer->cs, ms->pa_sc_line_cntl); 668b8e80941Smrg radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_config); 669b8e80941Smrg 670b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0, ms->pa_sc_mode_cntl_0); 671b8e80941Smrg 672b8e80941Smrg radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples); 673b8e80941Smrg 674b8e80941Smrg /* GFX9: Flush DFSM when the AA mode changes. */ 675b8e80941Smrg if (cmd_buffer->device->dfsm_allowed) { 676b8e80941Smrg radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 677b8e80941Smrg radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); 678b8e80941Smrg } 679b8e80941Smrg 680b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = true; 681b8e80941Smrg} 682b8e80941Smrg 683b8e80941Smrgstatic void 684b8e80941Smrgradv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, 685b8e80941Smrg struct radv_shader_variant *shader) 686b8e80941Smrg{ 687b8e80941Smrg uint64_t va; 688b8e80941Smrg 689b8e80941Smrg if (!shader) 690b8e80941Smrg return; 691b8e80941Smrg 692b8e80941Smrg va = radv_buffer_get_va(shader->bo) + shader->bo_offset; 693b8e80941Smrg 694b8e80941Smrg si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); 695b8e80941Smrg} 696b8e80941Smrg 697b8e80941Smrgstatic void 698b8e80941Smrgradv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, 699b8e80941Smrg struct radv_pipeline *pipeline, 700b8e80941Smrg bool vertex_stage_only) 701b8e80941Smrg{ 702b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 703b8e80941Smrg uint32_t mask = state->prefetch_L2_mask; 704b8e80941Smrg 705b8e80941Smrg if (vertex_stage_only) { 706b8e80941Smrg /* Fast prefetch path for starting draws as soon as possible. 707b8e80941Smrg */ 708b8e80941Smrg mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS | 709b8e80941Smrg RADV_PREFETCH_VBO_DESCRIPTORS); 710b8e80941Smrg } 711b8e80941Smrg 712b8e80941Smrg if (mask & RADV_PREFETCH_VS) 713b8e80941Smrg radv_emit_shader_prefetch(cmd_buffer, 714b8e80941Smrg pipeline->shaders[MESA_SHADER_VERTEX]); 715b8e80941Smrg 716b8e80941Smrg if (mask & RADV_PREFETCH_VBO_DESCRIPTORS) 717b8e80941Smrg si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size); 718b8e80941Smrg 719b8e80941Smrg if (mask & RADV_PREFETCH_TCS) 720b8e80941Smrg radv_emit_shader_prefetch(cmd_buffer, 721b8e80941Smrg pipeline->shaders[MESA_SHADER_TESS_CTRL]); 722b8e80941Smrg 723b8e80941Smrg if (mask & RADV_PREFETCH_TES) 724b8e80941Smrg radv_emit_shader_prefetch(cmd_buffer, 725b8e80941Smrg pipeline->shaders[MESA_SHADER_TESS_EVAL]); 726b8e80941Smrg 727b8e80941Smrg if (mask & RADV_PREFETCH_GS) { 728b8e80941Smrg radv_emit_shader_prefetch(cmd_buffer, 729b8e80941Smrg pipeline->shaders[MESA_SHADER_GEOMETRY]); 730b8e80941Smrg radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader); 731b8e80941Smrg } 732b8e80941Smrg 733b8e80941Smrg if (mask & RADV_PREFETCH_PS) 734b8e80941Smrg radv_emit_shader_prefetch(cmd_buffer, 735b8e80941Smrg pipeline->shaders[MESA_SHADER_FRAGMENT]); 736b8e80941Smrg 737b8e80941Smrg state->prefetch_L2_mask &= ~mask; 738b8e80941Smrg} 739b8e80941Smrg 740b8e80941Smrgstatic void 741b8e80941Smrgradv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) 742b8e80941Smrg{ 743b8e80941Smrg if (!cmd_buffer->device->physical_device->rbplus_allowed) 744b8e80941Smrg return; 745b8e80941Smrg 746b8e80941Smrg struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; 747b8e80941Smrg struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 748b8e80941Smrg const struct radv_subpass *subpass = cmd_buffer->state.subpass; 749b8e80941Smrg 750b8e80941Smrg unsigned sx_ps_downconvert = 0; 751b8e80941Smrg unsigned sx_blend_opt_epsilon = 0; 752b8e80941Smrg unsigned sx_blend_opt_control = 0; 753b8e80941Smrg 754b8e80941Smrg for (unsigned i = 0; i < subpass->color_count; ++i) { 755b8e80941Smrg if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { 756b8e80941Smrg sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 757b8e80941Smrg sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 758b8e80941Smrg continue; 759b8e80941Smrg } 760b8e80941Smrg 761b8e80941Smrg int idx = subpass->color_attachments[i].attachment; 762b8e80941Smrg struct radv_color_buffer_info *cb = &framebuffer->attachments[idx].cb; 763b8e80941Smrg 764b8e80941Smrg unsigned format = G_028C70_FORMAT(cb->cb_color_info); 765b8e80941Smrg unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info); 766b8e80941Smrg uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf; 767b8e80941Smrg uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf; 768b8e80941Smrg 769b8e80941Smrg bool has_alpha, has_rgb; 770b8e80941Smrg 771b8e80941Smrg /* Set if RGB and A are present. */ 772b8e80941Smrg has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib); 773b8e80941Smrg 774b8e80941Smrg if (format == V_028C70_COLOR_8 || 775b8e80941Smrg format == V_028C70_COLOR_16 || 776b8e80941Smrg format == V_028C70_COLOR_32) 777b8e80941Smrg has_rgb = !has_alpha; 778b8e80941Smrg else 779b8e80941Smrg has_rgb = true; 780b8e80941Smrg 781b8e80941Smrg /* Check the colormask and export format. */ 782b8e80941Smrg if (!(colormask & 0x7)) 783b8e80941Smrg has_rgb = false; 784b8e80941Smrg if (!(colormask & 0x8)) 785b8e80941Smrg has_alpha = false; 786b8e80941Smrg 787b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_ZERO) { 788b8e80941Smrg has_rgb = false; 789b8e80941Smrg has_alpha = false; 790b8e80941Smrg } 791b8e80941Smrg 792b8e80941Smrg /* Disable value checking for disabled channels. */ 793b8e80941Smrg if (!has_rgb) 794b8e80941Smrg sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 795b8e80941Smrg if (!has_alpha) 796b8e80941Smrg sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 797b8e80941Smrg 798b8e80941Smrg /* Enable down-conversion for 32bpp and smaller formats. */ 799b8e80941Smrg switch (format) { 800b8e80941Smrg case V_028C70_COLOR_8: 801b8e80941Smrg case V_028C70_COLOR_8_8: 802b8e80941Smrg case V_028C70_COLOR_8_8_8_8: 803b8e80941Smrg /* For 1 and 2-channel formats, use the superset thereof. */ 804b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 805b8e80941Smrg spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 806b8e80941Smrg spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 807b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 808b8e80941Smrg sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 809b8e80941Smrg } 810b8e80941Smrg break; 811b8e80941Smrg 812b8e80941Smrg case V_028C70_COLOR_5_6_5: 813b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 814b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 815b8e80941Smrg sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 816b8e80941Smrg } 817b8e80941Smrg break; 818b8e80941Smrg 819b8e80941Smrg case V_028C70_COLOR_1_5_5_5: 820b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 821b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 822b8e80941Smrg sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 823b8e80941Smrg } 824b8e80941Smrg break; 825b8e80941Smrg 826b8e80941Smrg case V_028C70_COLOR_4_4_4_4: 827b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 828b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 829b8e80941Smrg sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 830b8e80941Smrg } 831b8e80941Smrg break; 832b8e80941Smrg 833b8e80941Smrg case V_028C70_COLOR_32: 834b8e80941Smrg if (swap == V_028C70_SWAP_STD && 835b8e80941Smrg spi_format == V_028714_SPI_SHADER_32_R) 836b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 837b8e80941Smrg else if (swap == V_028C70_SWAP_ALT_REV && 838b8e80941Smrg spi_format == V_028714_SPI_SHADER_32_AR) 839b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 840b8e80941Smrg break; 841b8e80941Smrg 842b8e80941Smrg case V_028C70_COLOR_16: 843b8e80941Smrg case V_028C70_COLOR_16_16: 844b8e80941Smrg /* For 1-channel formats, use the superset thereof. */ 845b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 846b8e80941Smrg spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 847b8e80941Smrg spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 848b8e80941Smrg spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 849b8e80941Smrg if (swap == V_028C70_SWAP_STD || 850b8e80941Smrg swap == V_028C70_SWAP_STD_REV) 851b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 852b8e80941Smrg else 853b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 854b8e80941Smrg } 855b8e80941Smrg break; 856b8e80941Smrg 857b8e80941Smrg case V_028C70_COLOR_10_11_11: 858b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 859b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 860b8e80941Smrg sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 861b8e80941Smrg } 862b8e80941Smrg break; 863b8e80941Smrg 864b8e80941Smrg case V_028C70_COLOR_2_10_10_10: 865b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 866b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 867b8e80941Smrg sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 868b8e80941Smrg } 869b8e80941Smrg break; 870b8e80941Smrg } 871b8e80941Smrg } 872b8e80941Smrg 873b8e80941Smrg for (unsigned i = subpass->color_count; i < 8; ++i) { 874b8e80941Smrg sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 875b8e80941Smrg sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 876b8e80941Smrg } 877b8e80941Smrg /* TODO: avoid redundantly setting context registers */ 878b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); 879b8e80941Smrg radeon_emit(cmd_buffer->cs, sx_ps_downconvert); 880b8e80941Smrg radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); 881b8e80941Smrg radeon_emit(cmd_buffer->cs, sx_blend_opt_control); 882b8e80941Smrg 883b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = true; 884b8e80941Smrg} 885b8e80941Smrg 886b8e80941Smrgstatic void 887b8e80941Smrgradv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) 888b8e80941Smrg{ 889b8e80941Smrg struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; 890b8e80941Smrg 891b8e80941Smrg if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline) 892b8e80941Smrg return; 893b8e80941Smrg 894b8e80941Smrg radv_update_multisample_state(cmd_buffer, pipeline); 895b8e80941Smrg 896b8e80941Smrg cmd_buffer->scratch_size_needed = 897b8e80941Smrg MAX2(cmd_buffer->scratch_size_needed, 898b8e80941Smrg pipeline->max_waves * pipeline->scratch_bytes_per_wave); 899b8e80941Smrg 900b8e80941Smrg if (!cmd_buffer->state.emitted_pipeline || 901b8e80941Smrg cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband != 902b8e80941Smrg pipeline->graphics.can_use_guardband) 903b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; 904b8e80941Smrg 905b8e80941Smrg radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); 906b8e80941Smrg 907b8e80941Smrg if (!cmd_buffer->state.emitted_pipeline || 908b8e80941Smrg cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw || 909b8e80941Smrg cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash || 910b8e80941Smrg memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf, 911b8e80941Smrg pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) { 912b8e80941Smrg radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw); 913b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = true; 914b8e80941Smrg } 915b8e80941Smrg 916b8e80941Smrg for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { 917b8e80941Smrg if (!pipeline->shaders[i]) 918b8e80941Smrg continue; 919b8e80941Smrg 920b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 921b8e80941Smrg pipeline->shaders[i]->bo); 922b8e80941Smrg } 923b8e80941Smrg 924b8e80941Smrg if (radv_pipeline_has_gs(pipeline)) 925b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 926b8e80941Smrg pipeline->gs_copy_shader->bo); 927b8e80941Smrg 928b8e80941Smrg if (unlikely(cmd_buffer->device->trace_bo)) 929b8e80941Smrg radv_save_pipeline(cmd_buffer, pipeline, RING_GFX); 930b8e80941Smrg 931b8e80941Smrg cmd_buffer->state.emitted_pipeline = pipeline; 932b8e80941Smrg 933b8e80941Smrg cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE; 934b8e80941Smrg} 935b8e80941Smrg 936b8e80941Smrgstatic void 937b8e80941Smrgradv_emit_viewport(struct radv_cmd_buffer *cmd_buffer) 938b8e80941Smrg{ 939b8e80941Smrg si_write_viewport(cmd_buffer->cs, 0, cmd_buffer->state.dynamic.viewport.count, 940b8e80941Smrg cmd_buffer->state.dynamic.viewport.viewports); 941b8e80941Smrg} 942b8e80941Smrg 943b8e80941Smrgstatic void 944b8e80941Smrgradv_emit_scissor(struct radv_cmd_buffer *cmd_buffer) 945b8e80941Smrg{ 946b8e80941Smrg uint32_t count = cmd_buffer->state.dynamic.scissor.count; 947b8e80941Smrg 948b8e80941Smrg si_write_scissors(cmd_buffer->cs, 0, count, 949b8e80941Smrg cmd_buffer->state.dynamic.scissor.scissors, 950b8e80941Smrg cmd_buffer->state.dynamic.viewport.viewports, 951b8e80941Smrg cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband); 952b8e80941Smrg 953b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = false; 954b8e80941Smrg} 955b8e80941Smrg 956b8e80941Smrgstatic void 957b8e80941Smrgradv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer) 958b8e80941Smrg{ 959b8e80941Smrg if (!cmd_buffer->state.dynamic.discard_rectangle.count) 960b8e80941Smrg return; 961b8e80941Smrg 962b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, 963b8e80941Smrg cmd_buffer->state.dynamic.discard_rectangle.count * 2); 964b8e80941Smrg for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) { 965b8e80941Smrg VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i]; 966b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y)); 967b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) | 968b8e80941Smrg S_028214_BR_Y(rect.offset.y + rect.extent.height)); 969b8e80941Smrg } 970b8e80941Smrg} 971b8e80941Smrg 972b8e80941Smrgstatic void 973b8e80941Smrgradv_emit_line_width(struct radv_cmd_buffer *cmd_buffer) 974b8e80941Smrg{ 975b8e80941Smrg unsigned width = cmd_buffer->state.dynamic.line_width * 8; 976b8e80941Smrg 977b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL, 978b8e80941Smrg S_028A08_WIDTH(CLAMP(width, 0, 0xFFF))); 979b8e80941Smrg} 980b8e80941Smrg 981b8e80941Smrgstatic void 982b8e80941Smrgradv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer) 983b8e80941Smrg{ 984b8e80941Smrg struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 985b8e80941Smrg 986b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4); 987b8e80941Smrg radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4); 988b8e80941Smrg} 989b8e80941Smrg 990b8e80941Smrgstatic void 991b8e80941Smrgradv_emit_stencil(struct radv_cmd_buffer *cmd_buffer) 992b8e80941Smrg{ 993b8e80941Smrg struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 994b8e80941Smrg 995b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, 996b8e80941Smrg R_028430_DB_STENCILREFMASK, 2); 997b8e80941Smrg radeon_emit(cmd_buffer->cs, 998b8e80941Smrg S_028430_STENCILTESTVAL(d->stencil_reference.front) | 999b8e80941Smrg S_028430_STENCILMASK(d->stencil_compare_mask.front) | 1000b8e80941Smrg S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) | 1001b8e80941Smrg S_028430_STENCILOPVAL(1)); 1002b8e80941Smrg radeon_emit(cmd_buffer->cs, 1003b8e80941Smrg S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) | 1004b8e80941Smrg S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) | 1005b8e80941Smrg S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) | 1006b8e80941Smrg S_028434_STENCILOPVAL_BF(1)); 1007b8e80941Smrg} 1008b8e80941Smrg 1009b8e80941Smrgstatic void 1010b8e80941Smrgradv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer) 1011b8e80941Smrg{ 1012b8e80941Smrg struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1013b8e80941Smrg 1014b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, 1015b8e80941Smrg fui(d->depth_bounds.min)); 1016b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX, 1017b8e80941Smrg fui(d->depth_bounds.max)); 1018b8e80941Smrg} 1019b8e80941Smrg 1020b8e80941Smrgstatic void 1021b8e80941Smrgradv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) 1022b8e80941Smrg{ 1023b8e80941Smrg struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1024b8e80941Smrg unsigned slope = fui(d->depth_bias.slope * 16.0f); 1025b8e80941Smrg unsigned bias = fui(d->depth_bias.bias * cmd_buffer->state.offset_scale); 1026b8e80941Smrg 1027b8e80941Smrg 1028b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, 1029b8e80941Smrg R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5); 1030b8e80941Smrg radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */ 1031b8e80941Smrg radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */ 1032b8e80941Smrg radeon_emit(cmd_buffer->cs, bias); /* FRONT OFFSET */ 1033b8e80941Smrg radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */ 1034b8e80941Smrg radeon_emit(cmd_buffer->cs, bias); /* BACK OFFSET */ 1035b8e80941Smrg} 1036b8e80941Smrg 1037b8e80941Smrgstatic void 1038b8e80941Smrgradv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, 1039b8e80941Smrg int index, 1040b8e80941Smrg struct radv_attachment_info *att, 1041b8e80941Smrg struct radv_image *image, 1042b8e80941Smrg VkImageLayout layout) 1043b8e80941Smrg{ 1044b8e80941Smrg bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; 1045b8e80941Smrg struct radv_color_buffer_info *cb = &att->cb; 1046b8e80941Smrg uint32_t cb_color_info = cb->cb_color_info; 1047b8e80941Smrg 1048b8e80941Smrg if (!radv_layout_dcc_compressed(image, layout, 1049b8e80941Smrg radv_image_queue_family_mask(image, 1050b8e80941Smrg cmd_buffer->queue_family_index, 1051b8e80941Smrg cmd_buffer->queue_family_index))) { 1052b8e80941Smrg cb_color_info &= C_028C70_DCC_ENABLE; 1053b8e80941Smrg } 1054b8e80941Smrg 1055b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1056b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); 1057b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_base); 1058b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32)); 1059b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2); 1060b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_view); 1061b8e80941Smrg radeon_emit(cmd_buffer->cs, cb_color_info); 1062b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); 1063b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); 1064b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); 1065b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32)); 1066b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); 1067b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32)); 1068b8e80941Smrg 1069b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2); 1070b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_dcc_base); 1071b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32)); 1072b8e80941Smrg 1073b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4, 1074b8e80941Smrg cb->cb_mrt_epitch); 1075b8e80941Smrg } else { 1076b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); 1077b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_base); 1078b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_pitch); 1079b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_slice); 1080b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_view); 1081b8e80941Smrg radeon_emit(cmd_buffer->cs, cb_color_info); 1082b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); 1083b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); 1084b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); 1085b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice); 1086b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); 1087b8e80941Smrg radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice); 1088b8e80941Smrg 1089b8e80941Smrg if (is_vi) { /* DCC BASE */ 1090b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); 1091b8e80941Smrg } 1092b8e80941Smrg } 1093b8e80941Smrg 1094b8e80941Smrg if (radv_image_has_dcc(image)) { 1095b8e80941Smrg /* Drawing with DCC enabled also compresses colorbuffers. */ 1096b8e80941Smrg radv_update_dcc_metadata(cmd_buffer, image, true); 1097b8e80941Smrg } 1098b8e80941Smrg} 1099b8e80941Smrg 1100b8e80941Smrgstatic void 1101b8e80941Smrgradv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, 1102b8e80941Smrg struct radv_ds_buffer_info *ds, 1103b8e80941Smrg struct radv_image *image, VkImageLayout layout, 1104b8e80941Smrg bool requires_cond_exec) 1105b8e80941Smrg{ 1106b8e80941Smrg uint32_t db_z_info = ds->db_z_info; 1107b8e80941Smrg uint32_t db_z_info_reg; 1108b8e80941Smrg 1109b8e80941Smrg if (!radv_image_is_tc_compat_htile(image)) 1110b8e80941Smrg return; 1111b8e80941Smrg 1112b8e80941Smrg if (!radv_layout_has_htile(image, layout, 1113b8e80941Smrg radv_image_queue_family_mask(image, 1114b8e80941Smrg cmd_buffer->queue_family_index, 1115b8e80941Smrg cmd_buffer->queue_family_index))) { 1116b8e80941Smrg db_z_info &= C_028040_TILE_SURFACE_ENABLE; 1117b8e80941Smrg } 1118b8e80941Smrg 1119b8e80941Smrg db_z_info &= C_028040_ZRANGE_PRECISION; 1120b8e80941Smrg 1121b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1122b8e80941Smrg db_z_info_reg = R_028038_DB_Z_INFO; 1123b8e80941Smrg } else { 1124b8e80941Smrg db_z_info_reg = R_028040_DB_Z_INFO; 1125b8e80941Smrg } 1126b8e80941Smrg 1127b8e80941Smrg /* When we don't know the last fast clear value we need to emit a 1128b8e80941Smrg * conditional packet that will eventually skip the following 1129b8e80941Smrg * SET_CONTEXT_REG packet. 1130b8e80941Smrg */ 1131b8e80941Smrg if (requires_cond_exec) { 1132b8e80941Smrg uint64_t va = radv_buffer_get_va(image->bo); 1133b8e80941Smrg va += image->offset + image->tc_compat_zrange_offset; 1134b8e80941Smrg 1135b8e80941Smrg radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0)); 1136b8e80941Smrg radeon_emit(cmd_buffer->cs, va); 1137b8e80941Smrg radeon_emit(cmd_buffer->cs, va >> 32); 1138b8e80941Smrg radeon_emit(cmd_buffer->cs, 0); 1139b8e80941Smrg radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */ 1140b8e80941Smrg } 1141b8e80941Smrg 1142b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); 1143b8e80941Smrg} 1144b8e80941Smrg 1145b8e80941Smrgstatic void 1146b8e80941Smrgradv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, 1147b8e80941Smrg struct radv_ds_buffer_info *ds, 1148b8e80941Smrg struct radv_image *image, 1149b8e80941Smrg VkImageLayout layout) 1150b8e80941Smrg{ 1151b8e80941Smrg uint32_t db_z_info = ds->db_z_info; 1152b8e80941Smrg uint32_t db_stencil_info = ds->db_stencil_info; 1153b8e80941Smrg 1154b8e80941Smrg if (!radv_layout_has_htile(image, layout, 1155b8e80941Smrg radv_image_queue_family_mask(image, 1156b8e80941Smrg cmd_buffer->queue_family_index, 1157b8e80941Smrg cmd_buffer->queue_family_index))) { 1158b8e80941Smrg db_z_info &= C_028040_TILE_SURFACE_ENABLE; 1159b8e80941Smrg db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); 1160b8e80941Smrg } 1161b8e80941Smrg 1162b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view); 1163b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface); 1164b8e80941Smrg 1165b8e80941Smrg 1166b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1167b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3); 1168b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_htile_data_base); 1169b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32)); 1170b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_depth_size); 1171b8e80941Smrg 1172b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10); 1173b8e80941Smrg radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */ 1174b8e80941Smrg radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */ 1175b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */ 1176b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */ 1177b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */ 1178b8e80941Smrg radeon_emit(cmd_buffer->cs, S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ 1179b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */ 1180b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */ 1181b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */ 1182b8e80941Smrg radeon_emit(cmd_buffer->cs, S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ 1183b8e80941Smrg 1184b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2); 1185b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_z_info2); 1186b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_stencil_info2); 1187b8e80941Smrg } else { 1188b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); 1189b8e80941Smrg 1190b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9); 1191b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 1192b8e80941Smrg radeon_emit(cmd_buffer->cs, db_z_info); /* R_028040_DB_Z_INFO */ 1193b8e80941Smrg radeon_emit(cmd_buffer->cs, db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 1194b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* R_028048_DB_Z_READ_BASE */ 1195b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* R_02804C_DB_STENCIL_READ_BASE */ 1196b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* R_028050_DB_Z_WRITE_BASE */ 1197b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 1198b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 1199b8e80941Smrg radeon_emit(cmd_buffer->cs, ds->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 1200b8e80941Smrg 1201b8e80941Smrg } 1202b8e80941Smrg 1203b8e80941Smrg /* Update the ZRANGE_PRECISION value for the TC-compat bug. */ 1204b8e80941Smrg radv_update_zrange_precision(cmd_buffer, ds, image, layout, true); 1205b8e80941Smrg 1206b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 1207b8e80941Smrg ds->pa_su_poly_offset_db_fmt_cntl); 1208b8e80941Smrg} 1209b8e80941Smrg 1210b8e80941Smrg/** 1211b8e80941Smrg * Update the fast clear depth/stencil values if the image is bound as a 1212b8e80941Smrg * depth/stencil buffer. 1213b8e80941Smrg */ 1214b8e80941Smrgstatic void 1215b8e80941Smrgradv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, 1216b8e80941Smrg struct radv_image *image, 1217b8e80941Smrg VkClearDepthStencilValue ds_clear_value, 1218b8e80941Smrg VkImageAspectFlags aspects) 1219b8e80941Smrg{ 1220b8e80941Smrg struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 1221b8e80941Smrg const struct radv_subpass *subpass = cmd_buffer->state.subpass; 1222b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 1223b8e80941Smrg struct radv_attachment_info *att; 1224b8e80941Smrg uint32_t att_idx; 1225b8e80941Smrg 1226b8e80941Smrg if (!framebuffer || !subpass) 1227b8e80941Smrg return; 1228b8e80941Smrg 1229b8e80941Smrg if (!subpass->depth_stencil_attachment) 1230b8e80941Smrg return; 1231b8e80941Smrg 1232b8e80941Smrg att_idx = subpass->depth_stencil_attachment->attachment; 1233b8e80941Smrg att = &framebuffer->attachments[att_idx]; 1234b8e80941Smrg if (att->attachment->image != image) 1235b8e80941Smrg return; 1236b8e80941Smrg 1237b8e80941Smrg radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 1238b8e80941Smrg radeon_emit(cs, ds_clear_value.stencil); 1239b8e80941Smrg radeon_emit(cs, fui(ds_clear_value.depth)); 1240b8e80941Smrg 1241b8e80941Smrg /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is 1242b8e80941Smrg * only needed when clearing Z to 0.0. 1243b8e80941Smrg */ 1244b8e80941Smrg if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && 1245b8e80941Smrg ds_clear_value.depth == 0.0) { 1246b8e80941Smrg VkImageLayout layout = subpass->depth_stencil_attachment->layout; 1247b8e80941Smrg 1248b8e80941Smrg radv_update_zrange_precision(cmd_buffer, &att->ds, image, 1249b8e80941Smrg layout, false); 1250b8e80941Smrg } 1251b8e80941Smrg 1252b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = true; 1253b8e80941Smrg} 1254b8e80941Smrg 1255b8e80941Smrg/** 1256b8e80941Smrg * Set the clear depth/stencil values to the image's metadata. 1257b8e80941Smrg */ 1258b8e80941Smrgstatic void 1259b8e80941Smrgradv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 1260b8e80941Smrg struct radv_image *image, 1261b8e80941Smrg VkClearDepthStencilValue ds_clear_value, 1262b8e80941Smrg VkImageAspectFlags aspects) 1263b8e80941Smrg{ 1264b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 1265b8e80941Smrg uint64_t va = radv_buffer_get_va(image->bo); 1266b8e80941Smrg unsigned reg_offset = 0, reg_count = 0; 1267b8e80941Smrg 1268b8e80941Smrg va += image->offset + image->clear_value_offset; 1269b8e80941Smrg 1270b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 1271b8e80941Smrg ++reg_count; 1272b8e80941Smrg } else { 1273b8e80941Smrg ++reg_offset; 1274b8e80941Smrg va += 4; 1275b8e80941Smrg } 1276b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 1277b8e80941Smrg ++reg_count; 1278b8e80941Smrg 1279b8e80941Smrg radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, cmd_buffer->state.predicating)); 1280b8e80941Smrg radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | 1281b8e80941Smrg S_370_WR_CONFIRM(1) | 1282b8e80941Smrg S_370_ENGINE_SEL(V_370_PFP)); 1283b8e80941Smrg radeon_emit(cs, va); 1284b8e80941Smrg radeon_emit(cs, va >> 32); 1285b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) 1286b8e80941Smrg radeon_emit(cs, ds_clear_value.stencil); 1287b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 1288b8e80941Smrg radeon_emit(cs, fui(ds_clear_value.depth)); 1289b8e80941Smrg} 1290b8e80941Smrg 1291b8e80941Smrg/** 1292b8e80941Smrg * Update the TC-compat metadata value for this image. 1293b8e80941Smrg */ 1294b8e80941Smrgstatic void 1295b8e80941Smrgradv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, 1296b8e80941Smrg struct radv_image *image, 1297b8e80941Smrg uint32_t value) 1298b8e80941Smrg{ 1299b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 1300b8e80941Smrg uint64_t va = radv_buffer_get_va(image->bo); 1301b8e80941Smrg va += image->offset + image->tc_compat_zrange_offset; 1302b8e80941Smrg 1303b8e80941Smrg radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating)); 1304b8e80941Smrg radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | 1305b8e80941Smrg S_370_WR_CONFIRM(1) | 1306b8e80941Smrg S_370_ENGINE_SEL(V_370_PFP)); 1307b8e80941Smrg radeon_emit(cs, va); 1308b8e80941Smrg radeon_emit(cs, va >> 32); 1309b8e80941Smrg radeon_emit(cs, value); 1310b8e80941Smrg} 1311b8e80941Smrg 1312b8e80941Smrgstatic void 1313b8e80941Smrgradv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, 1314b8e80941Smrg struct radv_image *image, 1315b8e80941Smrg VkClearDepthStencilValue ds_clear_value) 1316b8e80941Smrg{ 1317b8e80941Smrg uint64_t va = radv_buffer_get_va(image->bo); 1318b8e80941Smrg va += image->offset + image->tc_compat_zrange_offset; 1319b8e80941Smrg uint32_t cond_val; 1320b8e80941Smrg 1321b8e80941Smrg /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last 1322b8e80941Smrg * depth clear value is 0.0f. 1323b8e80941Smrg */ 1324b8e80941Smrg cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0; 1325b8e80941Smrg 1326b8e80941Smrg radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val); 1327b8e80941Smrg} 1328b8e80941Smrg 1329b8e80941Smrg/** 1330b8e80941Smrg * Update the clear depth/stencil values for this image. 1331b8e80941Smrg */ 1332b8e80941Smrgvoid 1333b8e80941Smrgradv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 1334b8e80941Smrg struct radv_image *image, 1335b8e80941Smrg VkClearDepthStencilValue ds_clear_value, 1336b8e80941Smrg VkImageAspectFlags aspects) 1337b8e80941Smrg{ 1338b8e80941Smrg assert(radv_image_has_htile(image)); 1339b8e80941Smrg 1340b8e80941Smrg radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects); 1341b8e80941Smrg 1342b8e80941Smrg if (radv_image_is_tc_compat_htile(image) && 1343b8e80941Smrg (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { 1344b8e80941Smrg radv_update_tc_compat_zrange_metadata(cmd_buffer, image, 1345b8e80941Smrg ds_clear_value); 1346b8e80941Smrg } 1347b8e80941Smrg 1348b8e80941Smrg radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value, 1349b8e80941Smrg aspects); 1350b8e80941Smrg} 1351b8e80941Smrg 1352b8e80941Smrg/** 1353b8e80941Smrg * Load the clear depth/stencil values from the image's metadata. 1354b8e80941Smrg */ 1355b8e80941Smrgstatic void 1356b8e80941Smrgradv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 1357b8e80941Smrg struct radv_image *image) 1358b8e80941Smrg{ 1359b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 1360b8e80941Smrg VkImageAspectFlags aspects = vk_format_aspects(image->vk_format); 1361b8e80941Smrg uint64_t va = radv_buffer_get_va(image->bo); 1362b8e80941Smrg unsigned reg_offset = 0, reg_count = 0; 1363b8e80941Smrg 1364b8e80941Smrg va += image->offset + image->clear_value_offset; 1365b8e80941Smrg 1366b8e80941Smrg if (!radv_image_has_htile(image)) 1367b8e80941Smrg return; 1368b8e80941Smrg 1369b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 1370b8e80941Smrg ++reg_count; 1371b8e80941Smrg } else { 1372b8e80941Smrg ++reg_offset; 1373b8e80941Smrg va += 4; 1374b8e80941Smrg } 1375b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 1376b8e80941Smrg ++reg_count; 1377b8e80941Smrg 1378b8e80941Smrg uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset; 1379b8e80941Smrg 1380b8e80941Smrg if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) { 1381b8e80941Smrg radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0)); 1382b8e80941Smrg radeon_emit(cs, va); 1383b8e80941Smrg radeon_emit(cs, va >> 32); 1384b8e80941Smrg radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); 1385b8e80941Smrg radeon_emit(cs, reg_count); 1386b8e80941Smrg } else { 1387b8e80941Smrg radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 1388b8e80941Smrg radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | 1389b8e80941Smrg COPY_DATA_DST_SEL(COPY_DATA_REG) | 1390b8e80941Smrg (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); 1391b8e80941Smrg radeon_emit(cs, va); 1392b8e80941Smrg radeon_emit(cs, va >> 32); 1393b8e80941Smrg radeon_emit(cs, reg >> 2); 1394b8e80941Smrg radeon_emit(cs, 0); 1395b8e80941Smrg 1396b8e80941Smrg radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 1397b8e80941Smrg radeon_emit(cs, 0); 1398b8e80941Smrg } 1399b8e80941Smrg} 1400b8e80941Smrg 1401b8e80941Smrg/* 1402b8e80941Smrg * With DCC some colors don't require CMASK elimination before being 1403b8e80941Smrg * used as a texture. This sets a predicate value to determine if the 1404b8e80941Smrg * cmask eliminate is required. 1405b8e80941Smrg */ 1406b8e80941Smrgvoid 1407b8e80941Smrgradv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, 1408b8e80941Smrg struct radv_image *image, bool value) 1409b8e80941Smrg{ 1410b8e80941Smrg uint64_t pred_val = value; 1411b8e80941Smrg uint64_t va = radv_buffer_get_va(image->bo); 1412b8e80941Smrg va += image->offset + image->fce_pred_offset; 1413b8e80941Smrg 1414b8e80941Smrg assert(radv_image_has_dcc(image)); 1415b8e80941Smrg 1416b8e80941Smrg radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0)); 1417b8e80941Smrg radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | 1418b8e80941Smrg S_370_WR_CONFIRM(1) | 1419b8e80941Smrg S_370_ENGINE_SEL(V_370_PFP)); 1420b8e80941Smrg radeon_emit(cmd_buffer->cs, va); 1421b8e80941Smrg radeon_emit(cmd_buffer->cs, va >> 32); 1422b8e80941Smrg radeon_emit(cmd_buffer->cs, pred_val); 1423b8e80941Smrg radeon_emit(cmd_buffer->cs, pred_val >> 32); 1424b8e80941Smrg} 1425b8e80941Smrg 1426b8e80941Smrg/** 1427b8e80941Smrg * Update the DCC predicate to reflect the compression state. 1428b8e80941Smrg */ 1429b8e80941Smrgvoid 1430b8e80941Smrgradv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, 1431b8e80941Smrg struct radv_image *image, bool value) 1432b8e80941Smrg{ 1433b8e80941Smrg uint64_t pred_val = value; 1434b8e80941Smrg uint64_t va = radv_buffer_get_va(image->bo); 1435b8e80941Smrg va += image->offset + image->dcc_pred_offset; 1436b8e80941Smrg 1437b8e80941Smrg assert(radv_image_has_dcc(image)); 1438b8e80941Smrg 1439b8e80941Smrg radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0)); 1440b8e80941Smrg radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | 1441b8e80941Smrg S_370_WR_CONFIRM(1) | 1442b8e80941Smrg S_370_ENGINE_SEL(V_370_PFP)); 1443b8e80941Smrg radeon_emit(cmd_buffer->cs, va); 1444b8e80941Smrg radeon_emit(cmd_buffer->cs, va >> 32); 1445b8e80941Smrg radeon_emit(cmd_buffer->cs, pred_val); 1446b8e80941Smrg radeon_emit(cmd_buffer->cs, pred_val >> 32); 1447b8e80941Smrg} 1448b8e80941Smrg 1449b8e80941Smrg/** 1450b8e80941Smrg * Update the fast clear color values if the image is bound as a color buffer. 1451b8e80941Smrg */ 1452b8e80941Smrgstatic void 1453b8e80941Smrgradv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, 1454b8e80941Smrg struct radv_image *image, 1455b8e80941Smrg int cb_idx, 1456b8e80941Smrg uint32_t color_values[2]) 1457b8e80941Smrg{ 1458b8e80941Smrg struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 1459b8e80941Smrg const struct radv_subpass *subpass = cmd_buffer->state.subpass; 1460b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 1461b8e80941Smrg struct radv_attachment_info *att; 1462b8e80941Smrg uint32_t att_idx; 1463b8e80941Smrg 1464b8e80941Smrg if (!framebuffer || !subpass) 1465b8e80941Smrg return; 1466b8e80941Smrg 1467b8e80941Smrg att_idx = subpass->color_attachments[cb_idx].attachment; 1468b8e80941Smrg if (att_idx == VK_ATTACHMENT_UNUSED) 1469b8e80941Smrg return; 1470b8e80941Smrg 1471b8e80941Smrg att = &framebuffer->attachments[att_idx]; 1472b8e80941Smrg if (att->attachment->image != image) 1473b8e80941Smrg return; 1474b8e80941Smrg 1475b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2); 1476b8e80941Smrg radeon_emit(cs, color_values[0]); 1477b8e80941Smrg radeon_emit(cs, color_values[1]); 1478b8e80941Smrg 1479b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = true; 1480b8e80941Smrg} 1481b8e80941Smrg 1482b8e80941Smrg/** 1483b8e80941Smrg * Set the clear color values to the image's metadata. 1484b8e80941Smrg */ 1485b8e80941Smrgstatic void 1486b8e80941Smrgradv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 1487b8e80941Smrg struct radv_image *image, 1488b8e80941Smrg uint32_t color_values[2]) 1489b8e80941Smrg{ 1490b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 1491b8e80941Smrg uint64_t va = radv_buffer_get_va(image->bo); 1492b8e80941Smrg 1493b8e80941Smrg va += image->offset + image->clear_value_offset; 1494b8e80941Smrg 1495b8e80941Smrg assert(radv_image_has_cmask(image) || radv_image_has_dcc(image)); 1496b8e80941Smrg 1497b8e80941Smrg radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, cmd_buffer->state.predicating)); 1498b8e80941Smrg radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | 1499b8e80941Smrg S_370_WR_CONFIRM(1) | 1500b8e80941Smrg S_370_ENGINE_SEL(V_370_PFP)); 1501b8e80941Smrg radeon_emit(cs, va); 1502b8e80941Smrg radeon_emit(cs, va >> 32); 1503b8e80941Smrg radeon_emit(cs, color_values[0]); 1504b8e80941Smrg radeon_emit(cs, color_values[1]); 1505b8e80941Smrg} 1506b8e80941Smrg 1507b8e80941Smrg/** 1508b8e80941Smrg * Update the clear color values for this image. 1509b8e80941Smrg */ 1510b8e80941Smrgvoid 1511b8e80941Smrgradv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 1512b8e80941Smrg struct radv_image *image, 1513b8e80941Smrg int cb_idx, 1514b8e80941Smrg uint32_t color_values[2]) 1515b8e80941Smrg{ 1516b8e80941Smrg assert(radv_image_has_cmask(image) || radv_image_has_dcc(image)); 1517b8e80941Smrg 1518b8e80941Smrg radv_set_color_clear_metadata(cmd_buffer, image, color_values); 1519b8e80941Smrg 1520b8e80941Smrg radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, 1521b8e80941Smrg color_values); 1522b8e80941Smrg} 1523b8e80941Smrg 1524b8e80941Smrg/** 1525b8e80941Smrg * Load the clear color values from the image's metadata. 1526b8e80941Smrg */ 1527b8e80941Smrgstatic void 1528b8e80941Smrgradv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 1529b8e80941Smrg struct radv_image *image, 1530b8e80941Smrg int cb_idx) 1531b8e80941Smrg{ 1532b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 1533b8e80941Smrg uint64_t va = radv_buffer_get_va(image->bo); 1534b8e80941Smrg 1535b8e80941Smrg va += image->offset + image->clear_value_offset; 1536b8e80941Smrg 1537b8e80941Smrg if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image)) 1538b8e80941Smrg return; 1539b8e80941Smrg 1540b8e80941Smrg uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c; 1541b8e80941Smrg 1542b8e80941Smrg if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) { 1543b8e80941Smrg radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating)); 1544b8e80941Smrg radeon_emit(cs, va); 1545b8e80941Smrg radeon_emit(cs, va >> 32); 1546b8e80941Smrg radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); 1547b8e80941Smrg radeon_emit(cs, 2); 1548b8e80941Smrg } else { 1549b8e80941Smrg radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); 1550b8e80941Smrg radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | 1551b8e80941Smrg COPY_DATA_DST_SEL(COPY_DATA_REG) | 1552b8e80941Smrg COPY_DATA_COUNT_SEL); 1553b8e80941Smrg radeon_emit(cs, va); 1554b8e80941Smrg radeon_emit(cs, va >> 32); 1555b8e80941Smrg radeon_emit(cs, reg >> 2); 1556b8e80941Smrg radeon_emit(cs, 0); 1557b8e80941Smrg 1558b8e80941Smrg radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); 1559b8e80941Smrg radeon_emit(cs, 0); 1560b8e80941Smrg } 1561b8e80941Smrg} 1562b8e80941Smrg 1563b8e80941Smrgstatic void 1564b8e80941Smrgradv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) 1565b8e80941Smrg{ 1566b8e80941Smrg int i; 1567b8e80941Smrg struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 1568b8e80941Smrg const struct radv_subpass *subpass = cmd_buffer->state.subpass; 1569b8e80941Smrg unsigned num_bpp64_colorbufs = 0; 1570b8e80941Smrg 1571b8e80941Smrg /* this may happen for inherited secondary recording */ 1572b8e80941Smrg if (!framebuffer) 1573b8e80941Smrg return; 1574b8e80941Smrg 1575b8e80941Smrg for (i = 0; i < 8; ++i) { 1576b8e80941Smrg if (i >= subpass->color_count || subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { 1577b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 1578b8e80941Smrg S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 1579b8e80941Smrg continue; 1580b8e80941Smrg } 1581b8e80941Smrg 1582b8e80941Smrg int idx = subpass->color_attachments[i].attachment; 1583b8e80941Smrg struct radv_attachment_info *att = &framebuffer->attachments[idx]; 1584b8e80941Smrg struct radv_image *image = att->attachment->image; 1585b8e80941Smrg VkImageLayout layout = subpass->color_attachments[i].layout; 1586b8e80941Smrg 1587b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo); 1588b8e80941Smrg 1589b8e80941Smrg assert(att->attachment->aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT | 1590b8e80941Smrg VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)); 1591b8e80941Smrg radv_emit_fb_color_state(cmd_buffer, i, att, image, layout); 1592b8e80941Smrg 1593b8e80941Smrg radv_load_color_clear_metadata(cmd_buffer, image, i); 1594b8e80941Smrg 1595b8e80941Smrg if (image->planes[0].surface.bpe >= 8) 1596b8e80941Smrg num_bpp64_colorbufs++; 1597b8e80941Smrg } 1598b8e80941Smrg 1599b8e80941Smrg if (subpass->depth_stencil_attachment) { 1600b8e80941Smrg int idx = subpass->depth_stencil_attachment->attachment; 1601b8e80941Smrg VkImageLayout layout = subpass->depth_stencil_attachment->layout; 1602b8e80941Smrg struct radv_attachment_info *att = &framebuffer->attachments[idx]; 1603b8e80941Smrg struct radv_image *image = att->attachment->image; 1604b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo); 1605b8e80941Smrg MAYBE_UNUSED uint32_t queue_mask = radv_image_queue_family_mask(image, 1606b8e80941Smrg cmd_buffer->queue_family_index, 1607b8e80941Smrg cmd_buffer->queue_family_index); 1608b8e80941Smrg /* We currently don't support writing decompressed HTILE */ 1609b8e80941Smrg assert(radv_layout_has_htile(image, layout, queue_mask) == 1610b8e80941Smrg radv_layout_is_htile_compressed(image, layout, queue_mask)); 1611b8e80941Smrg 1612b8e80941Smrg radv_emit_fb_ds_state(cmd_buffer, &att->ds, image, layout); 1613b8e80941Smrg 1614b8e80941Smrg if (att->ds.offset_scale != cmd_buffer->state.offset_scale) { 1615b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; 1616b8e80941Smrg cmd_buffer->state.offset_scale = att->ds.offset_scale; 1617b8e80941Smrg } 1618b8e80941Smrg radv_load_ds_clear_metadata(cmd_buffer, image); 1619b8e80941Smrg } else { 1620b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) 1621b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2); 1622b8e80941Smrg else 1623b8e80941Smrg radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2); 1624b8e80941Smrg 1625b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ 1626b8e80941Smrg radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 1627b8e80941Smrg } 1628b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 1629b8e80941Smrg S_028208_BR_X(framebuffer->width) | 1630b8e80941Smrg S_028208_BR_Y(framebuffer->height)); 1631b8e80941Smrg 1632b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) { 1633b8e80941Smrg uint8_t watermark = 4; /* Default value for VI. */ 1634b8e80941Smrg 1635b8e80941Smrg /* For optimal DCC performance. */ 1636b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1637b8e80941Smrg if (num_bpp64_colorbufs >= 5) { 1638b8e80941Smrg watermark = 8; 1639b8e80941Smrg } else { 1640b8e80941Smrg watermark = 6; 1641b8e80941Smrg } 1642b8e80941Smrg } 1643b8e80941Smrg 1644b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL, 1645b8e80941Smrg S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 1646b8e80941Smrg S_028424_OVERWRITE_COMBINER_WATERMARK(watermark)); 1647b8e80941Smrg } 1648b8e80941Smrg 1649b8e80941Smrg if (cmd_buffer->device->dfsm_allowed) { 1650b8e80941Smrg radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1651b8e80941Smrg radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 1652b8e80941Smrg } 1653b8e80941Smrg 1654b8e80941Smrg cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER; 1655b8e80941Smrg} 1656b8e80941Smrg 1657b8e80941Smrgstatic void 1658b8e80941Smrgradv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer) 1659b8e80941Smrg{ 1660b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 1661b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 1662b8e80941Smrg 1663b8e80941Smrg if (state->index_type != state->last_index_type) { 1664b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1665b8e80941Smrg radeon_set_uconfig_reg_idx(cs, R_03090C_VGT_INDEX_TYPE, 1666b8e80941Smrg 2, state->index_type); 1667b8e80941Smrg } else { 1668b8e80941Smrg radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); 1669b8e80941Smrg radeon_emit(cs, state->index_type); 1670b8e80941Smrg } 1671b8e80941Smrg 1672b8e80941Smrg state->last_index_type = state->index_type; 1673b8e80941Smrg } 1674b8e80941Smrg 1675b8e80941Smrg radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0)); 1676b8e80941Smrg radeon_emit(cs, state->index_va); 1677b8e80941Smrg radeon_emit(cs, state->index_va >> 32); 1678b8e80941Smrg 1679b8e80941Smrg radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); 1680b8e80941Smrg radeon_emit(cs, state->max_index_count); 1681b8e80941Smrg 1682b8e80941Smrg cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER; 1683b8e80941Smrg} 1684b8e80941Smrg 1685b8e80941Smrgvoid radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) 1686b8e80941Smrg{ 1687b8e80941Smrg bool has_perfect_queries = cmd_buffer->state.perfect_occlusion_queries_enabled; 1688b8e80941Smrg struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; 1689b8e80941Smrg uint32_t pa_sc_mode_cntl_1 = 1690b8e80941Smrg pipeline ? pipeline->graphics.ms.pa_sc_mode_cntl_1 : 0; 1691b8e80941Smrg uint32_t db_count_control; 1692b8e80941Smrg 1693b8e80941Smrg if(!cmd_buffer->state.active_occlusion_queries) { 1694b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { 1695b8e80941Smrg if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && 1696b8e80941Smrg pipeline->graphics.disable_out_of_order_rast_for_occlusion && 1697b8e80941Smrg has_perfect_queries) { 1698b8e80941Smrg /* Re-enable out-of-order rasterization if the 1699b8e80941Smrg * bound pipeline supports it and if it's has 1700b8e80941Smrg * been disabled before starting any perfect 1701b8e80941Smrg * occlusion queries. 1702b8e80941Smrg */ 1703b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, 1704b8e80941Smrg R_028A4C_PA_SC_MODE_CNTL_1, 1705b8e80941Smrg pa_sc_mode_cntl_1); 1706b8e80941Smrg } 1707b8e80941Smrg } 1708b8e80941Smrg db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); 1709b8e80941Smrg } else { 1710b8e80941Smrg const struct radv_subpass *subpass = cmd_buffer->state.subpass; 1711b8e80941Smrg uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0; 1712b8e80941Smrg 1713b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { 1714b8e80941Smrg db_count_control = 1715b8e80941Smrg S_028004_PERFECT_ZPASS_COUNTS(has_perfect_queries) | 1716b8e80941Smrg S_028004_SAMPLE_RATE(sample_rate) | 1717b8e80941Smrg S_028004_ZPASS_ENABLE(1) | 1718b8e80941Smrg S_028004_SLICE_EVEN_ENABLE(1) | 1719b8e80941Smrg S_028004_SLICE_ODD_ENABLE(1); 1720b8e80941Smrg 1721b8e80941Smrg if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && 1722b8e80941Smrg pipeline->graphics.disable_out_of_order_rast_for_occlusion && 1723b8e80941Smrg has_perfect_queries) { 1724b8e80941Smrg /* If the bound pipeline has enabled 1725b8e80941Smrg * out-of-order rasterization, we should 1726b8e80941Smrg * disable it before starting any perfect 1727b8e80941Smrg * occlusion queries. 1728b8e80941Smrg */ 1729b8e80941Smrg pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE; 1730b8e80941Smrg 1731b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, 1732b8e80941Smrg R_028A4C_PA_SC_MODE_CNTL_1, 1733b8e80941Smrg pa_sc_mode_cntl_1); 1734b8e80941Smrg } 1735b8e80941Smrg } else { 1736b8e80941Smrg db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | 1737b8e80941Smrg S_028004_SAMPLE_RATE(sample_rate); 1738b8e80941Smrg } 1739b8e80941Smrg } 1740b8e80941Smrg 1741b8e80941Smrg radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control); 1742b8e80941Smrg 1743b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = true; 1744b8e80941Smrg} 1745b8e80941Smrg 1746b8e80941Smrgstatic void 1747b8e80941Smrgradv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer) 1748b8e80941Smrg{ 1749b8e80941Smrg uint32_t states = cmd_buffer->state.dirty & cmd_buffer->state.emitted_pipeline->graphics.needed_dynamic_state; 1750b8e80941Smrg 1751b8e80941Smrg if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT)) 1752b8e80941Smrg radv_emit_viewport(cmd_buffer); 1753b8e80941Smrg 1754b8e80941Smrg if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) && 1755b8e80941Smrg !cmd_buffer->device->physical_device->has_scissor_bug) 1756b8e80941Smrg radv_emit_scissor(cmd_buffer); 1757b8e80941Smrg 1758b8e80941Smrg if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) 1759b8e80941Smrg radv_emit_line_width(cmd_buffer); 1760b8e80941Smrg 1761b8e80941Smrg if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) 1762b8e80941Smrg radv_emit_blend_constants(cmd_buffer); 1763b8e80941Smrg 1764b8e80941Smrg if (states & (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | 1765b8e80941Smrg RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | 1766b8e80941Smrg RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK)) 1767b8e80941Smrg radv_emit_stencil(cmd_buffer); 1768b8e80941Smrg 1769b8e80941Smrg if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) 1770b8e80941Smrg radv_emit_depth_bounds(cmd_buffer); 1771b8e80941Smrg 1772b8e80941Smrg if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) 1773b8e80941Smrg radv_emit_depth_bias(cmd_buffer); 1774b8e80941Smrg 1775b8e80941Smrg if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE) 1776b8e80941Smrg radv_emit_discard_rectangle(cmd_buffer); 1777b8e80941Smrg 1778b8e80941Smrg cmd_buffer->state.dirty &= ~states; 1779b8e80941Smrg} 1780b8e80941Smrg 1781b8e80941Smrgstatic void 1782b8e80941Smrgradv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, 1783b8e80941Smrg VkPipelineBindPoint bind_point) 1784b8e80941Smrg{ 1785b8e80941Smrg struct radv_descriptor_state *descriptors_state = 1786b8e80941Smrg radv_get_descriptors_state(cmd_buffer, bind_point); 1787b8e80941Smrg struct radv_descriptor_set *set = &descriptors_state->push_set.set; 1788b8e80941Smrg unsigned bo_offset; 1789b8e80941Smrg 1790b8e80941Smrg if (!radv_cmd_buffer_upload_data(cmd_buffer, set->size, 32, 1791b8e80941Smrg set->mapped_ptr, 1792b8e80941Smrg &bo_offset)) 1793b8e80941Smrg return; 1794b8e80941Smrg 1795b8e80941Smrg set->va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 1796b8e80941Smrg set->va += bo_offset; 1797b8e80941Smrg} 1798b8e80941Smrg 1799b8e80941Smrgstatic void 1800b8e80941Smrgradv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, 1801b8e80941Smrg VkPipelineBindPoint bind_point) 1802b8e80941Smrg{ 1803b8e80941Smrg struct radv_descriptor_state *descriptors_state = 1804b8e80941Smrg radv_get_descriptors_state(cmd_buffer, bind_point); 1805b8e80941Smrg uint32_t size = MAX_SETS * 4; 1806b8e80941Smrg uint32_t offset; 1807b8e80941Smrg void *ptr; 1808b8e80941Smrg 1809b8e80941Smrg if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, 1810b8e80941Smrg 256, &offset, &ptr)) 1811b8e80941Smrg return; 1812b8e80941Smrg 1813b8e80941Smrg for (unsigned i = 0; i < MAX_SETS; i++) { 1814b8e80941Smrg uint32_t *uptr = ((uint32_t *)ptr) + i; 1815b8e80941Smrg uint64_t set_va = 0; 1816b8e80941Smrg struct radv_descriptor_set *set = descriptors_state->sets[i]; 1817b8e80941Smrg if (descriptors_state->valid & (1u << i)) 1818b8e80941Smrg set_va = set->va; 1819b8e80941Smrg uptr[0] = set_va & 0xffffffff; 1820b8e80941Smrg } 1821b8e80941Smrg 1822b8e80941Smrg uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 1823b8e80941Smrg va += offset; 1824b8e80941Smrg 1825b8e80941Smrg if (cmd_buffer->state.pipeline) { 1826b8e80941Smrg if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX]) 1827b8e80941Smrg radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, 1828b8e80941Smrg AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1829b8e80941Smrg 1830b8e80941Smrg if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) 1831b8e80941Smrg radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_FRAGMENT, 1832b8e80941Smrg AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1833b8e80941Smrg 1834b8e80941Smrg if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) 1835b8e80941Smrg radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, 1836b8e80941Smrg AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1837b8e80941Smrg 1838b8e80941Smrg if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) 1839b8e80941Smrg radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_CTRL, 1840b8e80941Smrg AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1841b8e80941Smrg 1842b8e80941Smrg if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) 1843b8e80941Smrg radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_EVAL, 1844b8e80941Smrg AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1845b8e80941Smrg } 1846b8e80941Smrg 1847b8e80941Smrg if (cmd_buffer->state.compute_pipeline) 1848b8e80941Smrg radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.compute_pipeline, MESA_SHADER_COMPUTE, 1849b8e80941Smrg AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1850b8e80941Smrg} 1851b8e80941Smrg 1852b8e80941Smrgstatic void 1853b8e80941Smrgradv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, 1854b8e80941Smrg VkShaderStageFlags stages) 1855b8e80941Smrg{ 1856b8e80941Smrg VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ? 1857b8e80941Smrg VK_PIPELINE_BIND_POINT_COMPUTE : 1858b8e80941Smrg VK_PIPELINE_BIND_POINT_GRAPHICS; 1859b8e80941Smrg struct radv_descriptor_state *descriptors_state = 1860b8e80941Smrg radv_get_descriptors_state(cmd_buffer, bind_point); 1861b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 1862b8e80941Smrg bool flush_indirect_descriptors; 1863b8e80941Smrg 1864b8e80941Smrg if (!descriptors_state->dirty) 1865b8e80941Smrg return; 1866b8e80941Smrg 1867b8e80941Smrg if (descriptors_state->push_dirty) 1868b8e80941Smrg radv_flush_push_descriptors(cmd_buffer, bind_point); 1869b8e80941Smrg 1870b8e80941Smrg flush_indirect_descriptors = 1871b8e80941Smrg (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS && 1872b8e80941Smrg state->pipeline && state->pipeline->need_indirect_descriptor_sets) || 1873b8e80941Smrg (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE && 1874b8e80941Smrg state->compute_pipeline && state->compute_pipeline->need_indirect_descriptor_sets); 1875b8e80941Smrg 1876b8e80941Smrg if (flush_indirect_descriptors) 1877b8e80941Smrg radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point); 1878b8e80941Smrg 1879b8e80941Smrg MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, 1880b8e80941Smrg cmd_buffer->cs, 1881b8e80941Smrg MAX_SETS * MESA_SHADER_STAGES * 4); 1882b8e80941Smrg 1883b8e80941Smrg if (cmd_buffer->state.pipeline) { 1884b8e80941Smrg radv_foreach_stage(stage, stages) { 1885b8e80941Smrg if (!cmd_buffer->state.pipeline->shaders[stage]) 1886b8e80941Smrg continue; 1887b8e80941Smrg 1888b8e80941Smrg radv_emit_descriptor_pointers(cmd_buffer, 1889b8e80941Smrg cmd_buffer->state.pipeline, 1890b8e80941Smrg descriptors_state, stage); 1891b8e80941Smrg } 1892b8e80941Smrg } 1893b8e80941Smrg 1894b8e80941Smrg if (cmd_buffer->state.compute_pipeline && 1895b8e80941Smrg (stages & VK_SHADER_STAGE_COMPUTE_BIT)) { 1896b8e80941Smrg radv_emit_descriptor_pointers(cmd_buffer, 1897b8e80941Smrg cmd_buffer->state.compute_pipeline, 1898b8e80941Smrg descriptors_state, 1899b8e80941Smrg MESA_SHADER_COMPUTE); 1900b8e80941Smrg } 1901b8e80941Smrg 1902b8e80941Smrg descriptors_state->dirty = 0; 1903b8e80941Smrg descriptors_state->push_dirty = false; 1904b8e80941Smrg 1905b8e80941Smrg assert(cmd_buffer->cs->cdw <= cdw_max); 1906b8e80941Smrg 1907b8e80941Smrg if (unlikely(cmd_buffer->device->trace_bo)) 1908b8e80941Smrg radv_save_descriptors(cmd_buffer, bind_point); 1909b8e80941Smrg} 1910b8e80941Smrg 1911b8e80941Smrgstatic void 1912b8e80941Smrgradv_flush_constants(struct radv_cmd_buffer *cmd_buffer, 1913b8e80941Smrg VkShaderStageFlags stages) 1914b8e80941Smrg{ 1915b8e80941Smrg struct radv_pipeline *pipeline = stages & VK_SHADER_STAGE_COMPUTE_BIT 1916b8e80941Smrg ? cmd_buffer->state.compute_pipeline 1917b8e80941Smrg : cmd_buffer->state.pipeline; 1918b8e80941Smrg VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ? 1919b8e80941Smrg VK_PIPELINE_BIND_POINT_COMPUTE : 1920b8e80941Smrg VK_PIPELINE_BIND_POINT_GRAPHICS; 1921b8e80941Smrg struct radv_descriptor_state *descriptors_state = 1922b8e80941Smrg radv_get_descriptors_state(cmd_buffer, bind_point); 1923b8e80941Smrg struct radv_pipeline_layout *layout = pipeline->layout; 1924b8e80941Smrg struct radv_shader_variant *shader, *prev_shader; 1925b8e80941Smrg bool need_push_constants = false; 1926b8e80941Smrg unsigned offset; 1927b8e80941Smrg void *ptr; 1928b8e80941Smrg uint64_t va; 1929b8e80941Smrg 1930b8e80941Smrg stages &= cmd_buffer->push_constant_stages; 1931b8e80941Smrg if (!stages || 1932b8e80941Smrg (!layout->push_constant_size && !layout->dynamic_offset_count)) 1933b8e80941Smrg return; 1934b8e80941Smrg 1935b8e80941Smrg radv_foreach_stage(stage, stages) { 1936b8e80941Smrg if (!pipeline->shaders[stage]) 1937b8e80941Smrg continue; 1938b8e80941Smrg 1939b8e80941Smrg need_push_constants |= pipeline->shaders[stage]->info.info.loads_push_constants; 1940b8e80941Smrg need_push_constants |= pipeline->shaders[stage]->info.info.loads_dynamic_offsets; 1941b8e80941Smrg 1942b8e80941Smrg uint8_t base = pipeline->shaders[stage]->info.info.base_inline_push_consts; 1943b8e80941Smrg uint8_t count = pipeline->shaders[stage]->info.info.num_inline_push_consts; 1944b8e80941Smrg 1945b8e80941Smrg radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, 1946b8e80941Smrg AC_UD_INLINE_PUSH_CONSTANTS, 1947b8e80941Smrg count, 1948b8e80941Smrg (uint32_t *)&cmd_buffer->push_constants[base * 4]); 1949b8e80941Smrg } 1950b8e80941Smrg 1951b8e80941Smrg if (need_push_constants) { 1952b8e80941Smrg if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size + 1953b8e80941Smrg 16 * layout->dynamic_offset_count, 1954b8e80941Smrg 256, &offset, &ptr)) 1955b8e80941Smrg return; 1956b8e80941Smrg 1957b8e80941Smrg memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size); 1958b8e80941Smrg memcpy((char*)ptr + layout->push_constant_size, 1959b8e80941Smrg descriptors_state->dynamic_buffers, 1960b8e80941Smrg 16 * layout->dynamic_offset_count); 1961b8e80941Smrg 1962b8e80941Smrg va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 1963b8e80941Smrg va += offset; 1964b8e80941Smrg 1965b8e80941Smrg MAYBE_UNUSED unsigned cdw_max = 1966b8e80941Smrg radeon_check_space(cmd_buffer->device->ws, 1967b8e80941Smrg cmd_buffer->cs, MESA_SHADER_STAGES * 4); 1968b8e80941Smrg 1969b8e80941Smrg prev_shader = NULL; 1970b8e80941Smrg radv_foreach_stage(stage, stages) { 1971b8e80941Smrg shader = radv_get_shader(pipeline, stage); 1972b8e80941Smrg 1973b8e80941Smrg /* Avoid redundantly emitting the address for merged stages. */ 1974b8e80941Smrg if (shader && shader != prev_shader) { 1975b8e80941Smrg radv_emit_userdata_address(cmd_buffer, pipeline, stage, 1976b8e80941Smrg AC_UD_PUSH_CONSTANTS, va); 1977b8e80941Smrg 1978b8e80941Smrg prev_shader = shader; 1979b8e80941Smrg } 1980b8e80941Smrg } 1981b8e80941Smrg assert(cmd_buffer->cs->cdw <= cdw_max); 1982b8e80941Smrg } 1983b8e80941Smrg 1984b8e80941Smrg cmd_buffer->push_constant_stages &= ~stages; 1985b8e80941Smrg} 1986b8e80941Smrg 1987b8e80941Smrgstatic void 1988b8e80941Smrgradv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, 1989b8e80941Smrg bool pipeline_is_dirty) 1990b8e80941Smrg{ 1991b8e80941Smrg if ((pipeline_is_dirty || 1992b8e80941Smrg (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) && 1993b8e80941Smrg cmd_buffer->state.pipeline->num_vertex_bindings && 1994b8e80941Smrg radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) { 1995b8e80941Smrg struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements; 1996b8e80941Smrg unsigned vb_offset; 1997b8e80941Smrg void *vb_ptr; 1998b8e80941Smrg uint32_t i = 0; 1999b8e80941Smrg uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings; 2000b8e80941Smrg uint64_t va; 2001b8e80941Smrg 2002b8e80941Smrg /* allocate some descriptor state for vertex buffers */ 2003b8e80941Smrg if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, 256, 2004b8e80941Smrg &vb_offset, &vb_ptr)) 2005b8e80941Smrg return; 2006b8e80941Smrg 2007b8e80941Smrg for (i = 0; i < count; i++) { 2008b8e80941Smrg uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; 2009b8e80941Smrg uint32_t offset; 2010b8e80941Smrg struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer; 2011b8e80941Smrg uint32_t stride = cmd_buffer->state.pipeline->binding_stride[i]; 2012b8e80941Smrg 2013b8e80941Smrg if (!buffer) 2014b8e80941Smrg continue; 2015b8e80941Smrg 2016b8e80941Smrg va = radv_buffer_get_va(buffer->bo); 2017b8e80941Smrg 2018b8e80941Smrg offset = cmd_buffer->vertex_bindings[i].offset; 2019b8e80941Smrg va += offset + buffer->offset; 2020b8e80941Smrg desc[0] = va; 2021b8e80941Smrg desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); 2022b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride) 2023b8e80941Smrg desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1; 2024b8e80941Smrg else 2025b8e80941Smrg desc[2] = buffer->size - offset; 2026b8e80941Smrg desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2027b8e80941Smrg S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2028b8e80941Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2029b8e80941Smrg S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2030b8e80941Smrg S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | 2031b8e80941Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 2032b8e80941Smrg } 2033b8e80941Smrg 2034b8e80941Smrg va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 2035b8e80941Smrg va += vb_offset; 2036b8e80941Smrg 2037b8e80941Smrg radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, 2038b8e80941Smrg AC_UD_VS_VERTEX_BUFFERS, va); 2039b8e80941Smrg 2040b8e80941Smrg cmd_buffer->state.vb_va = va; 2041b8e80941Smrg cmd_buffer->state.vb_size = count * 16; 2042b8e80941Smrg cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS; 2043b8e80941Smrg } 2044b8e80941Smrg cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER; 2045b8e80941Smrg} 2046b8e80941Smrg 2047b8e80941Smrgstatic void 2048b8e80941Smrgradv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va) 2049b8e80941Smrg{ 2050b8e80941Smrg struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; 2051b8e80941Smrg struct radv_userdata_info *loc; 2052b8e80941Smrg uint32_t base_reg; 2053b8e80941Smrg 2054b8e80941Smrg for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) { 2055b8e80941Smrg if (!radv_get_shader(pipeline, stage)) 2056b8e80941Smrg continue; 2057b8e80941Smrg 2058b8e80941Smrg loc = radv_lookup_user_sgpr(pipeline, stage, 2059b8e80941Smrg AC_UD_STREAMOUT_BUFFERS); 2060b8e80941Smrg if (loc->sgpr_idx == -1) 2061b8e80941Smrg continue; 2062b8e80941Smrg 2063b8e80941Smrg base_reg = pipeline->user_data_0[stage]; 2064b8e80941Smrg 2065b8e80941Smrg radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, 2066b8e80941Smrg base_reg + loc->sgpr_idx * 4, va, false); 2067b8e80941Smrg } 2068b8e80941Smrg 2069b8e80941Smrg if (pipeline->gs_copy_shader) { 2070b8e80941Smrg loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS]; 2071b8e80941Smrg if (loc->sgpr_idx != -1) { 2072b8e80941Smrg base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; 2073b8e80941Smrg 2074b8e80941Smrg radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, 2075b8e80941Smrg base_reg + loc->sgpr_idx * 4, va, false); 2076b8e80941Smrg } 2077b8e80941Smrg } 2078b8e80941Smrg} 2079b8e80941Smrg 2080b8e80941Smrgstatic void 2081b8e80941Smrgradv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) 2082b8e80941Smrg{ 2083b8e80941Smrg if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) { 2084b8e80941Smrg struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; 2085b8e80941Smrg struct radv_streamout_state *so = &cmd_buffer->state.streamout; 2086b8e80941Smrg unsigned so_offset; 2087b8e80941Smrg void *so_ptr; 2088b8e80941Smrg uint64_t va; 2089b8e80941Smrg 2090b8e80941Smrg /* Allocate some descriptor state for streamout buffers. */ 2091b8e80941Smrg if (!radv_cmd_buffer_upload_alloc(cmd_buffer, 2092b8e80941Smrg MAX_SO_BUFFERS * 16, 256, 2093b8e80941Smrg &so_offset, &so_ptr)) 2094b8e80941Smrg return; 2095b8e80941Smrg 2096b8e80941Smrg for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) { 2097b8e80941Smrg struct radv_buffer *buffer = sb[i].buffer; 2098b8e80941Smrg uint32_t *desc = &((uint32_t *)so_ptr)[i * 4]; 2099b8e80941Smrg 2100b8e80941Smrg if (!(so->enabled_mask & (1 << i))) 2101b8e80941Smrg continue; 2102b8e80941Smrg 2103b8e80941Smrg va = radv_buffer_get_va(buffer->bo) + buffer->offset; 2104b8e80941Smrg 2105b8e80941Smrg va += sb[i].offset; 2106b8e80941Smrg 2107b8e80941Smrg /* Set the descriptor. 2108b8e80941Smrg * 2109b8e80941Smrg * On VI, the format must be non-INVALID, otherwise 2110b8e80941Smrg * the buffer will be considered not bound and store 2111b8e80941Smrg * instructions will be no-ops. 2112b8e80941Smrg */ 2113b8e80941Smrg desc[0] = va; 2114b8e80941Smrg desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); 2115b8e80941Smrg desc[2] = 0xffffffff; 2116b8e80941Smrg desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2117b8e80941Smrg S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2118b8e80941Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2119b8e80941Smrg S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2120b8e80941Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 2121b8e80941Smrg } 2122b8e80941Smrg 2123b8e80941Smrg va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 2124b8e80941Smrg va += so_offset; 2125b8e80941Smrg 2126b8e80941Smrg radv_emit_streamout_buffers(cmd_buffer, va); 2127b8e80941Smrg } 2128b8e80941Smrg 2129b8e80941Smrg cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER; 2130b8e80941Smrg} 2131b8e80941Smrg 2132b8e80941Smrgstatic void 2133b8e80941Smrgradv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) 2134b8e80941Smrg{ 2135b8e80941Smrg radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty); 2136b8e80941Smrg radv_flush_streamout_descriptors(cmd_buffer); 2137b8e80941Smrg radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); 2138b8e80941Smrg radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); 2139b8e80941Smrg} 2140b8e80941Smrg 2141b8e80941Smrgstruct radv_draw_info { 2142b8e80941Smrg /** 2143b8e80941Smrg * Number of vertices. 2144b8e80941Smrg */ 2145b8e80941Smrg uint32_t count; 2146b8e80941Smrg 2147b8e80941Smrg /** 2148b8e80941Smrg * Index of the first vertex. 2149b8e80941Smrg */ 2150b8e80941Smrg int32_t vertex_offset; 2151b8e80941Smrg 2152b8e80941Smrg /** 2153b8e80941Smrg * First instance id. 2154b8e80941Smrg */ 2155b8e80941Smrg uint32_t first_instance; 2156b8e80941Smrg 2157b8e80941Smrg /** 2158b8e80941Smrg * Number of instances. 2159b8e80941Smrg */ 2160b8e80941Smrg uint32_t instance_count; 2161b8e80941Smrg 2162b8e80941Smrg /** 2163b8e80941Smrg * First index (indexed draws only). 2164b8e80941Smrg */ 2165b8e80941Smrg uint32_t first_index; 2166b8e80941Smrg 2167b8e80941Smrg /** 2168b8e80941Smrg * Whether it's an indexed draw. 2169b8e80941Smrg */ 2170b8e80941Smrg bool indexed; 2171b8e80941Smrg 2172b8e80941Smrg /** 2173b8e80941Smrg * Indirect draw parameters resource. 2174b8e80941Smrg */ 2175b8e80941Smrg struct radv_buffer *indirect; 2176b8e80941Smrg uint64_t indirect_offset; 2177b8e80941Smrg uint32_t stride; 2178b8e80941Smrg 2179b8e80941Smrg /** 2180b8e80941Smrg * Draw count parameters resource. 2181b8e80941Smrg */ 2182b8e80941Smrg struct radv_buffer *count_buffer; 2183b8e80941Smrg uint64_t count_buffer_offset; 2184b8e80941Smrg 2185b8e80941Smrg /** 2186b8e80941Smrg * Stream output parameters resource. 2187b8e80941Smrg */ 2188b8e80941Smrg struct radv_buffer *strmout_buffer; 2189b8e80941Smrg uint64_t strmout_buffer_offset; 2190b8e80941Smrg}; 2191b8e80941Smrg 2192b8e80941Smrgstatic void 2193b8e80941Smrgradv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, 2194b8e80941Smrg const struct radv_draw_info *draw_info) 2195b8e80941Smrg{ 2196b8e80941Smrg struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; 2197b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 2198b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 2199b8e80941Smrg uint32_t ia_multi_vgt_param; 2200b8e80941Smrg int32_t primitive_reset_en; 2201b8e80941Smrg 2202b8e80941Smrg /* Draw state. */ 2203b8e80941Smrg ia_multi_vgt_param = 2204b8e80941Smrg si_get_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, 2205b8e80941Smrg draw_info->indirect, 2206b8e80941Smrg !!draw_info->strmout_buffer, 2207b8e80941Smrg draw_info->indirect ? 0 : draw_info->count); 2208b8e80941Smrg 2209b8e80941Smrg if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) { 2210b8e80941Smrg if (info->chip_class >= GFX9) { 2211b8e80941Smrg radeon_set_uconfig_reg_idx(cs, 2212b8e80941Smrg R_030960_IA_MULTI_VGT_PARAM, 2213b8e80941Smrg 4, ia_multi_vgt_param); 2214b8e80941Smrg } else if (info->chip_class >= CIK) { 2215b8e80941Smrg radeon_set_context_reg_idx(cs, 2216b8e80941Smrg R_028AA8_IA_MULTI_VGT_PARAM, 2217b8e80941Smrg 1, ia_multi_vgt_param); 2218b8e80941Smrg } else { 2219b8e80941Smrg radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, 2220b8e80941Smrg ia_multi_vgt_param); 2221b8e80941Smrg } 2222b8e80941Smrg state->last_ia_multi_vgt_param = ia_multi_vgt_param; 2223b8e80941Smrg } 2224b8e80941Smrg 2225b8e80941Smrg /* Primitive restart. */ 2226b8e80941Smrg primitive_reset_en = 2227b8e80941Smrg draw_info->indexed && state->pipeline->graphics.prim_restart_enable; 2228b8e80941Smrg 2229b8e80941Smrg if (primitive_reset_en != state->last_primitive_reset_en) { 2230b8e80941Smrg state->last_primitive_reset_en = primitive_reset_en; 2231b8e80941Smrg if (info->chip_class >= GFX9) { 2232b8e80941Smrg radeon_set_uconfig_reg(cs, 2233b8e80941Smrg R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, 2234b8e80941Smrg primitive_reset_en); 2235b8e80941Smrg } else { 2236b8e80941Smrg radeon_set_context_reg(cs, 2237b8e80941Smrg R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 2238b8e80941Smrg primitive_reset_en); 2239b8e80941Smrg } 2240b8e80941Smrg } 2241b8e80941Smrg 2242b8e80941Smrg if (primitive_reset_en) { 2243b8e80941Smrg uint32_t primitive_reset_index = 2244b8e80941Smrg state->index_type ? 0xffffffffu : 0xffffu; 2245b8e80941Smrg 2246b8e80941Smrg if (primitive_reset_index != state->last_primitive_reset_index) { 2247b8e80941Smrg radeon_set_context_reg(cs, 2248b8e80941Smrg R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 2249b8e80941Smrg primitive_reset_index); 2250b8e80941Smrg state->last_primitive_reset_index = primitive_reset_index; 2251b8e80941Smrg } 2252b8e80941Smrg } 2253b8e80941Smrg 2254b8e80941Smrg if (draw_info->strmout_buffer) { 2255b8e80941Smrg uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo); 2256b8e80941Smrg 2257b8e80941Smrg va += draw_info->strmout_buffer->offset + 2258b8e80941Smrg draw_info->strmout_buffer_offset; 2259b8e80941Smrg 2260b8e80941Smrg radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, 2261b8e80941Smrg draw_info->stride); 2262b8e80941Smrg 2263b8e80941Smrg radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 2264b8e80941Smrg radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | 2265b8e80941Smrg COPY_DATA_DST_SEL(COPY_DATA_REG) | 2266b8e80941Smrg COPY_DATA_WR_CONFIRM); 2267b8e80941Smrg radeon_emit(cs, va); 2268b8e80941Smrg radeon_emit(cs, va >> 32); 2269b8e80941Smrg radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); 2270b8e80941Smrg radeon_emit(cs, 0); /* unused */ 2271b8e80941Smrg 2272b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo); 2273b8e80941Smrg } 2274b8e80941Smrg} 2275b8e80941Smrg 2276b8e80941Smrgstatic void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, 2277b8e80941Smrg VkPipelineStageFlags src_stage_mask) 2278b8e80941Smrg{ 2279b8e80941Smrg if (src_stage_mask & (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | 2280b8e80941Smrg VK_PIPELINE_STAGE_TRANSFER_BIT | 2281b8e80941Smrg VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | 2282b8e80941Smrg VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { 2283b8e80941Smrg cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; 2284b8e80941Smrg } 2285b8e80941Smrg 2286b8e80941Smrg if (src_stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 2287b8e80941Smrg VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | 2288b8e80941Smrg VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | 2289b8e80941Smrg VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | 2290b8e80941Smrg VK_PIPELINE_STAGE_TRANSFER_BIT | 2291b8e80941Smrg VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | 2292b8e80941Smrg VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | 2293b8e80941Smrg VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { 2294b8e80941Smrg cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; 2295b8e80941Smrg } else if (src_stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | 2296b8e80941Smrg VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | 2297b8e80941Smrg VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | 2298b8e80941Smrg VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | 2299b8e80941Smrg VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | 2300b8e80941Smrg VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | 2301b8e80941Smrg VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) { 2302b8e80941Smrg cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; 2303b8e80941Smrg } 2304b8e80941Smrg} 2305b8e80941Smrg 2306b8e80941Smrgstatic enum radv_cmd_flush_bits 2307b8e80941Smrgradv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, 2308b8e80941Smrg VkAccessFlags src_flags, 2309b8e80941Smrg struct radv_image *image) 2310b8e80941Smrg{ 2311b8e80941Smrg bool flush_CB_meta = true, flush_DB_meta = true; 2312b8e80941Smrg enum radv_cmd_flush_bits flush_bits = 0; 2313b8e80941Smrg uint32_t b; 2314b8e80941Smrg 2315b8e80941Smrg if (image) { 2316b8e80941Smrg if (!radv_image_has_CB_metadata(image)) 2317b8e80941Smrg flush_CB_meta = false; 2318b8e80941Smrg if (!radv_image_has_htile(image)) 2319b8e80941Smrg flush_DB_meta = false; 2320b8e80941Smrg } 2321b8e80941Smrg 2322b8e80941Smrg for_each_bit(b, src_flags) { 2323b8e80941Smrg switch ((VkAccessFlagBits)(1 << b)) { 2324b8e80941Smrg case VK_ACCESS_SHADER_WRITE_BIT: 2325b8e80941Smrg case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: 2326b8e80941Smrg case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: 2327b8e80941Smrg flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; 2328b8e80941Smrg break; 2329b8e80941Smrg case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: 2330b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; 2331b8e80941Smrg if (flush_CB_meta) 2332b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 2333b8e80941Smrg break; 2334b8e80941Smrg case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: 2335b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; 2336b8e80941Smrg if (flush_DB_meta) 2337b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 2338b8e80941Smrg break; 2339b8e80941Smrg case VK_ACCESS_TRANSFER_WRITE_BIT: 2340b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 2341b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_DB | 2342b8e80941Smrg RADV_CMD_FLAG_INV_GLOBAL_L2; 2343b8e80941Smrg 2344b8e80941Smrg if (flush_CB_meta) 2345b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 2346b8e80941Smrg if (flush_DB_meta) 2347b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 2348b8e80941Smrg break; 2349b8e80941Smrg default: 2350b8e80941Smrg break; 2351b8e80941Smrg } 2352b8e80941Smrg } 2353b8e80941Smrg return flush_bits; 2354b8e80941Smrg} 2355b8e80941Smrg 2356b8e80941Smrgstatic enum radv_cmd_flush_bits 2357b8e80941Smrgradv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, 2358b8e80941Smrg VkAccessFlags dst_flags, 2359b8e80941Smrg struct radv_image *image) 2360b8e80941Smrg{ 2361b8e80941Smrg bool flush_CB_meta = true, flush_DB_meta = true; 2362b8e80941Smrg enum radv_cmd_flush_bits flush_bits = 0; 2363b8e80941Smrg bool flush_CB = true, flush_DB = true; 2364b8e80941Smrg bool image_is_coherent = false; 2365b8e80941Smrg uint32_t b; 2366b8e80941Smrg 2367b8e80941Smrg if (image) { 2368b8e80941Smrg if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { 2369b8e80941Smrg flush_CB = false; 2370b8e80941Smrg flush_DB = false; 2371b8e80941Smrg } 2372b8e80941Smrg 2373b8e80941Smrg if (!radv_image_has_CB_metadata(image)) 2374b8e80941Smrg flush_CB_meta = false; 2375b8e80941Smrg if (!radv_image_has_htile(image)) 2376b8e80941Smrg flush_DB_meta = false; 2377b8e80941Smrg 2378b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 2379b8e80941Smrg if (image->info.samples == 1 && 2380b8e80941Smrg (image->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | 2381b8e80941Smrg VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && 2382b8e80941Smrg !vk_format_is_stencil(image->vk_format)) { 2383b8e80941Smrg /* Single-sample color and single-sample depth 2384b8e80941Smrg * (not stencil) are coherent with shaders on 2385b8e80941Smrg * GFX9. 2386b8e80941Smrg */ 2387b8e80941Smrg image_is_coherent = true; 2388b8e80941Smrg } 2389b8e80941Smrg } 2390b8e80941Smrg } 2391b8e80941Smrg 2392b8e80941Smrg for_each_bit(b, dst_flags) { 2393b8e80941Smrg switch ((VkAccessFlagBits)(1 << b)) { 2394b8e80941Smrg case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: 2395b8e80941Smrg case VK_ACCESS_INDEX_READ_BIT: 2396b8e80941Smrg case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: 2397b8e80941Smrg break; 2398b8e80941Smrg case VK_ACCESS_UNIFORM_READ_BIT: 2399b8e80941Smrg flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1; 2400b8e80941Smrg break; 2401b8e80941Smrg case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: 2402b8e80941Smrg case VK_ACCESS_TRANSFER_READ_BIT: 2403b8e80941Smrg case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: 2404b8e80941Smrg flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | 2405b8e80941Smrg RADV_CMD_FLAG_INV_GLOBAL_L2; 2406b8e80941Smrg break; 2407b8e80941Smrg case VK_ACCESS_SHADER_READ_BIT: 2408b8e80941Smrg flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1; 2409b8e80941Smrg 2410b8e80941Smrg if (!image_is_coherent) 2411b8e80941Smrg flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; 2412b8e80941Smrg break; 2413b8e80941Smrg case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: 2414b8e80941Smrg if (flush_CB) 2415b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; 2416b8e80941Smrg if (flush_CB_meta) 2417b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 2418b8e80941Smrg break; 2419b8e80941Smrg case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: 2420b8e80941Smrg if (flush_DB) 2421b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; 2422b8e80941Smrg if (flush_DB_meta) 2423b8e80941Smrg flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 2424b8e80941Smrg break; 2425b8e80941Smrg default: 2426b8e80941Smrg break; 2427b8e80941Smrg } 2428b8e80941Smrg } 2429b8e80941Smrg return flush_bits; 2430b8e80941Smrg} 2431b8e80941Smrg 2432b8e80941Smrgvoid radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, 2433b8e80941Smrg const struct radv_subpass_barrier *barrier) 2434b8e80941Smrg{ 2435b8e80941Smrg cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask, 2436b8e80941Smrg NULL); 2437b8e80941Smrg radv_stage_flush(cmd_buffer, barrier->src_stage_mask); 2438b8e80941Smrg cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, 2439b8e80941Smrg NULL); 2440b8e80941Smrg} 2441b8e80941Smrg 2442b8e80941Smrgstatic void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer, 2443b8e80941Smrg struct radv_subpass_attachment att) 2444b8e80941Smrg{ 2445b8e80941Smrg unsigned idx = att.attachment; 2446b8e80941Smrg struct radv_image_view *view = cmd_buffer->state.framebuffer->attachments[idx].attachment; 2447b8e80941Smrg VkImageSubresourceRange range; 2448b8e80941Smrg range.aspectMask = 0; 2449b8e80941Smrg range.baseMipLevel = view->base_mip; 2450b8e80941Smrg range.levelCount = 1; 2451b8e80941Smrg range.baseArrayLayer = view->base_layer; 2452b8e80941Smrg range.layerCount = cmd_buffer->state.framebuffer->layers; 2453b8e80941Smrg 2454b8e80941Smrg if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) { 2455b8e80941Smrg /* If the current subpass uses multiview, the driver might have 2456b8e80941Smrg * performed a fast color/depth clear to the whole image 2457b8e80941Smrg * (including all layers). To make sure the driver will 2458b8e80941Smrg * decompress the image correctly (if needed), we have to 2459b8e80941Smrg * account for the "real" number of layers. If the view mask is 2460b8e80941Smrg * sparse, this will decompress more layers than needed. 2461b8e80941Smrg */ 2462b8e80941Smrg range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask); 2463b8e80941Smrg } 2464b8e80941Smrg 2465b8e80941Smrg radv_handle_image_transition(cmd_buffer, 2466b8e80941Smrg view->image, 2467b8e80941Smrg cmd_buffer->state.attachments[idx].current_layout, 2468b8e80941Smrg att.layout, 0, 0, &range); 2469b8e80941Smrg 2470b8e80941Smrg cmd_buffer->state.attachments[idx].current_layout = att.layout; 2471b8e80941Smrg 2472b8e80941Smrg 2473b8e80941Smrg} 2474b8e80941Smrg 2475b8e80941Smrgvoid 2476b8e80941Smrgradv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, 2477b8e80941Smrg const struct radv_subpass *subpass) 2478b8e80941Smrg{ 2479b8e80941Smrg cmd_buffer->state.subpass = subpass; 2480b8e80941Smrg 2481b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; 2482b8e80941Smrg} 2483b8e80941Smrg 2484b8e80941Smrgstatic VkResult 2485b8e80941Smrgradv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, 2486b8e80941Smrg struct radv_render_pass *pass, 2487b8e80941Smrg const VkRenderPassBeginInfo *info) 2488b8e80941Smrg{ 2489b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 2490b8e80941Smrg 2491b8e80941Smrg if (pass->attachment_count == 0) { 2492b8e80941Smrg state->attachments = NULL; 2493b8e80941Smrg return VK_SUCCESS; 2494b8e80941Smrg } 2495b8e80941Smrg 2496b8e80941Smrg state->attachments = vk_alloc(&cmd_buffer->pool->alloc, 2497b8e80941Smrg pass->attachment_count * 2498b8e80941Smrg sizeof(state->attachments[0]), 2499b8e80941Smrg 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2500b8e80941Smrg if (state->attachments == NULL) { 2501b8e80941Smrg cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 2502b8e80941Smrg return cmd_buffer->record_result; 2503b8e80941Smrg } 2504b8e80941Smrg 2505b8e80941Smrg for (uint32_t i = 0; i < pass->attachment_count; ++i) { 2506b8e80941Smrg struct radv_render_pass_attachment *att = &pass->attachments[i]; 2507b8e80941Smrg VkImageAspectFlags att_aspects = vk_format_aspects(att->format); 2508b8e80941Smrg VkImageAspectFlags clear_aspects = 0; 2509b8e80941Smrg 2510b8e80941Smrg if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { 2511b8e80941Smrg /* color attachment */ 2512b8e80941Smrg if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 2513b8e80941Smrg clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; 2514b8e80941Smrg } 2515b8e80941Smrg } else { 2516b8e80941Smrg /* depthstencil attachment */ 2517b8e80941Smrg if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && 2518b8e80941Smrg att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 2519b8e80941Smrg clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; 2520b8e80941Smrg if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 2521b8e80941Smrg att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) 2522b8e80941Smrg clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 2523b8e80941Smrg } 2524b8e80941Smrg if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 2525b8e80941Smrg att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 2526b8e80941Smrg clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 2527b8e80941Smrg } 2528b8e80941Smrg } 2529b8e80941Smrg 2530b8e80941Smrg state->attachments[i].pending_clear_aspects = clear_aspects; 2531b8e80941Smrg state->attachments[i].cleared_views = 0; 2532b8e80941Smrg if (clear_aspects && info) { 2533b8e80941Smrg assert(info->clearValueCount > i); 2534b8e80941Smrg state->attachments[i].clear_value = info->pClearValues[i]; 2535b8e80941Smrg } 2536b8e80941Smrg 2537b8e80941Smrg state->attachments[i].current_layout = att->initial_layout; 2538b8e80941Smrg } 2539b8e80941Smrg 2540b8e80941Smrg return VK_SUCCESS; 2541b8e80941Smrg} 2542b8e80941Smrg 2543b8e80941SmrgVkResult radv_AllocateCommandBuffers( 2544b8e80941Smrg VkDevice _device, 2545b8e80941Smrg const VkCommandBufferAllocateInfo *pAllocateInfo, 2546b8e80941Smrg VkCommandBuffer *pCommandBuffers) 2547b8e80941Smrg{ 2548b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 2549b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool); 2550b8e80941Smrg 2551b8e80941Smrg VkResult result = VK_SUCCESS; 2552b8e80941Smrg uint32_t i; 2553b8e80941Smrg 2554b8e80941Smrg for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { 2555b8e80941Smrg 2556b8e80941Smrg if (!list_empty(&pool->free_cmd_buffers)) { 2557b8e80941Smrg struct radv_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link); 2558b8e80941Smrg 2559b8e80941Smrg list_del(&cmd_buffer->pool_link); 2560b8e80941Smrg list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); 2561b8e80941Smrg 2562b8e80941Smrg result = radv_reset_cmd_buffer(cmd_buffer); 2563b8e80941Smrg cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 2564b8e80941Smrg cmd_buffer->level = pAllocateInfo->level; 2565b8e80941Smrg 2566b8e80941Smrg pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer); 2567b8e80941Smrg } else { 2568b8e80941Smrg result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, 2569b8e80941Smrg &pCommandBuffers[i]); 2570b8e80941Smrg } 2571b8e80941Smrg if (result != VK_SUCCESS) 2572b8e80941Smrg break; 2573b8e80941Smrg } 2574b8e80941Smrg 2575b8e80941Smrg if (result != VK_SUCCESS) { 2576b8e80941Smrg radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, 2577b8e80941Smrg i, pCommandBuffers); 2578b8e80941Smrg 2579b8e80941Smrg /* From the Vulkan 1.0.66 spec: 2580b8e80941Smrg * 2581b8e80941Smrg * "vkAllocateCommandBuffers can be used to create multiple 2582b8e80941Smrg * command buffers. If the creation of any of those command 2583b8e80941Smrg * buffers fails, the implementation must destroy all 2584b8e80941Smrg * successfully created command buffer objects from this 2585b8e80941Smrg * command, set all entries of the pCommandBuffers array to 2586b8e80941Smrg * NULL and return the error." 2587b8e80941Smrg */ 2588b8e80941Smrg memset(pCommandBuffers, 0, 2589b8e80941Smrg sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount); 2590b8e80941Smrg } 2591b8e80941Smrg 2592b8e80941Smrg return result; 2593b8e80941Smrg} 2594b8e80941Smrg 2595b8e80941Smrgvoid radv_FreeCommandBuffers( 2596b8e80941Smrg VkDevice device, 2597b8e80941Smrg VkCommandPool commandPool, 2598b8e80941Smrg uint32_t commandBufferCount, 2599b8e80941Smrg const VkCommandBuffer *pCommandBuffers) 2600b8e80941Smrg{ 2601b8e80941Smrg for (uint32_t i = 0; i < commandBufferCount; i++) { 2602b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); 2603b8e80941Smrg 2604b8e80941Smrg if (cmd_buffer) { 2605b8e80941Smrg if (cmd_buffer->pool) { 2606b8e80941Smrg list_del(&cmd_buffer->pool_link); 2607b8e80941Smrg list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers); 2608b8e80941Smrg } else 2609b8e80941Smrg radv_cmd_buffer_destroy(cmd_buffer); 2610b8e80941Smrg 2611b8e80941Smrg } 2612b8e80941Smrg } 2613b8e80941Smrg} 2614b8e80941Smrg 2615b8e80941SmrgVkResult radv_ResetCommandBuffer( 2616b8e80941Smrg VkCommandBuffer commandBuffer, 2617b8e80941Smrg VkCommandBufferResetFlags flags) 2618b8e80941Smrg{ 2619b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2620b8e80941Smrg return radv_reset_cmd_buffer(cmd_buffer); 2621b8e80941Smrg} 2622b8e80941Smrg 2623b8e80941SmrgVkResult radv_BeginCommandBuffer( 2624b8e80941Smrg VkCommandBuffer commandBuffer, 2625b8e80941Smrg const VkCommandBufferBeginInfo *pBeginInfo) 2626b8e80941Smrg{ 2627b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2628b8e80941Smrg VkResult result = VK_SUCCESS; 2629b8e80941Smrg 2630b8e80941Smrg if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) { 2631b8e80941Smrg /* If the command buffer has already been resetted with 2632b8e80941Smrg * vkResetCommandBuffer, no need to do it again. 2633b8e80941Smrg */ 2634b8e80941Smrg result = radv_reset_cmd_buffer(cmd_buffer); 2635b8e80941Smrg if (result != VK_SUCCESS) 2636b8e80941Smrg return result; 2637b8e80941Smrg } 2638b8e80941Smrg 2639b8e80941Smrg memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); 2640b8e80941Smrg cmd_buffer->state.last_primitive_reset_en = -1; 2641b8e80941Smrg cmd_buffer->state.last_index_type = -1; 2642b8e80941Smrg cmd_buffer->state.last_num_instances = -1; 2643b8e80941Smrg cmd_buffer->state.last_vertex_offset = -1; 2644b8e80941Smrg cmd_buffer->state.last_first_instance = -1; 2645b8e80941Smrg cmd_buffer->state.predication_type = -1; 2646b8e80941Smrg cmd_buffer->usage_flags = pBeginInfo->flags; 2647b8e80941Smrg 2648b8e80941Smrg if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && 2649b8e80941Smrg (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) { 2650b8e80941Smrg assert(pBeginInfo->pInheritanceInfo); 2651b8e80941Smrg cmd_buffer->state.framebuffer = radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); 2652b8e80941Smrg cmd_buffer->state.pass = radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); 2653b8e80941Smrg 2654b8e80941Smrg struct radv_subpass *subpass = 2655b8e80941Smrg &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; 2656b8e80941Smrg 2657b8e80941Smrg result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL); 2658b8e80941Smrg if (result != VK_SUCCESS) 2659b8e80941Smrg return result; 2660b8e80941Smrg 2661b8e80941Smrg radv_cmd_buffer_set_subpass(cmd_buffer, subpass); 2662b8e80941Smrg } 2663b8e80941Smrg 2664b8e80941Smrg if (unlikely(cmd_buffer->device->trace_bo)) { 2665b8e80941Smrg struct radv_device *device = cmd_buffer->device; 2666b8e80941Smrg 2667b8e80941Smrg radv_cs_add_buffer(device->ws, cmd_buffer->cs, 2668b8e80941Smrg device->trace_bo); 2669b8e80941Smrg 2670b8e80941Smrg radv_cmd_buffer_trace_emit(cmd_buffer); 2671b8e80941Smrg } 2672b8e80941Smrg 2673b8e80941Smrg cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING; 2674b8e80941Smrg 2675b8e80941Smrg return result; 2676b8e80941Smrg} 2677b8e80941Smrg 2678b8e80941Smrgvoid radv_CmdBindVertexBuffers( 2679b8e80941Smrg VkCommandBuffer commandBuffer, 2680b8e80941Smrg uint32_t firstBinding, 2681b8e80941Smrg uint32_t bindingCount, 2682b8e80941Smrg const VkBuffer* pBuffers, 2683b8e80941Smrg const VkDeviceSize* pOffsets) 2684b8e80941Smrg{ 2685b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2686b8e80941Smrg struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings; 2687b8e80941Smrg bool changed = false; 2688b8e80941Smrg 2689b8e80941Smrg /* We have to defer setting up vertex buffer since we need the buffer 2690b8e80941Smrg * stride from the pipeline. */ 2691b8e80941Smrg 2692b8e80941Smrg assert(firstBinding + bindingCount <= MAX_VBS); 2693b8e80941Smrg for (uint32_t i = 0; i < bindingCount; i++) { 2694b8e80941Smrg uint32_t idx = firstBinding + i; 2695b8e80941Smrg 2696b8e80941Smrg if (!changed && 2697b8e80941Smrg (vb[idx].buffer != radv_buffer_from_handle(pBuffers[i]) || 2698b8e80941Smrg vb[idx].offset != pOffsets[i])) { 2699b8e80941Smrg changed = true; 2700b8e80941Smrg } 2701b8e80941Smrg 2702b8e80941Smrg vb[idx].buffer = radv_buffer_from_handle(pBuffers[i]); 2703b8e80941Smrg vb[idx].offset = pOffsets[i]; 2704b8e80941Smrg 2705b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 2706b8e80941Smrg vb[idx].buffer->bo); 2707b8e80941Smrg } 2708b8e80941Smrg 2709b8e80941Smrg if (!changed) { 2710b8e80941Smrg /* No state changes. */ 2711b8e80941Smrg return; 2712b8e80941Smrg } 2713b8e80941Smrg 2714b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER; 2715b8e80941Smrg} 2716b8e80941Smrg 2717b8e80941Smrgvoid radv_CmdBindIndexBuffer( 2718b8e80941Smrg VkCommandBuffer commandBuffer, 2719b8e80941Smrg VkBuffer buffer, 2720b8e80941Smrg VkDeviceSize offset, 2721b8e80941Smrg VkIndexType indexType) 2722b8e80941Smrg{ 2723b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2724b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer); 2725b8e80941Smrg 2726b8e80941Smrg if (cmd_buffer->state.index_buffer == index_buffer && 2727b8e80941Smrg cmd_buffer->state.index_offset == offset && 2728b8e80941Smrg cmd_buffer->state.index_type == indexType) { 2729b8e80941Smrg /* No state changes. */ 2730b8e80941Smrg return; 2731b8e80941Smrg } 2732b8e80941Smrg 2733b8e80941Smrg cmd_buffer->state.index_buffer = index_buffer; 2734b8e80941Smrg cmd_buffer->state.index_offset = offset; 2735b8e80941Smrg cmd_buffer->state.index_type = indexType; /* vk matches hw */ 2736b8e80941Smrg cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo); 2737b8e80941Smrg cmd_buffer->state.index_va += index_buffer->offset + offset; 2738b8e80941Smrg 2739b8e80941Smrg int index_size_shift = cmd_buffer->state.index_type ? 2 : 1; 2740b8e80941Smrg cmd_buffer->state.max_index_count = (index_buffer->size - offset) >> index_size_shift; 2741b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; 2742b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo); 2743b8e80941Smrg} 2744b8e80941Smrg 2745b8e80941Smrg 2746b8e80941Smrgstatic void 2747b8e80941Smrgradv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 2748b8e80941Smrg VkPipelineBindPoint bind_point, 2749b8e80941Smrg struct radv_descriptor_set *set, unsigned idx) 2750b8e80941Smrg{ 2751b8e80941Smrg struct radeon_winsys *ws = cmd_buffer->device->ws; 2752b8e80941Smrg 2753b8e80941Smrg radv_set_descriptor_set(cmd_buffer, bind_point, set, idx); 2754b8e80941Smrg 2755b8e80941Smrg assert(set); 2756b8e80941Smrg assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); 2757b8e80941Smrg 2758b8e80941Smrg if (!cmd_buffer->device->use_global_bo_list) { 2759b8e80941Smrg for (unsigned j = 0; j < set->layout->buffer_count; ++j) 2760b8e80941Smrg if (set->descriptors[j]) 2761b8e80941Smrg radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]); 2762b8e80941Smrg } 2763b8e80941Smrg 2764b8e80941Smrg if(set->bo) 2765b8e80941Smrg radv_cs_add_buffer(ws, cmd_buffer->cs, set->bo); 2766b8e80941Smrg} 2767b8e80941Smrg 2768b8e80941Smrgvoid radv_CmdBindDescriptorSets( 2769b8e80941Smrg VkCommandBuffer commandBuffer, 2770b8e80941Smrg VkPipelineBindPoint pipelineBindPoint, 2771b8e80941Smrg VkPipelineLayout _layout, 2772b8e80941Smrg uint32_t firstSet, 2773b8e80941Smrg uint32_t descriptorSetCount, 2774b8e80941Smrg const VkDescriptorSet* pDescriptorSets, 2775b8e80941Smrg uint32_t dynamicOffsetCount, 2776b8e80941Smrg const uint32_t* pDynamicOffsets) 2777b8e80941Smrg{ 2778b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2779b8e80941Smrg RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 2780b8e80941Smrg unsigned dyn_idx = 0; 2781b8e80941Smrg 2782b8e80941Smrg const bool no_dynamic_bounds = cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS; 2783b8e80941Smrg struct radv_descriptor_state *descriptors_state = 2784b8e80941Smrg radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); 2785b8e80941Smrg 2786b8e80941Smrg for (unsigned i = 0; i < descriptorSetCount; ++i) { 2787b8e80941Smrg unsigned idx = i + firstSet; 2788b8e80941Smrg RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); 2789b8e80941Smrg radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, idx); 2790b8e80941Smrg 2791b8e80941Smrg for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) { 2792b8e80941Smrg unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start; 2793b8e80941Smrg uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4; 2794b8e80941Smrg assert(dyn_idx < dynamicOffsetCount); 2795b8e80941Smrg 2796b8e80941Smrg struct radv_descriptor_range *range = set->dynamic_descriptors + j; 2797b8e80941Smrg uint64_t va = range->va + pDynamicOffsets[dyn_idx]; 2798b8e80941Smrg dst[0] = va; 2799b8e80941Smrg dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); 2800b8e80941Smrg dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size; 2801b8e80941Smrg dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2802b8e80941Smrg S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2803b8e80941Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2804b8e80941Smrg S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2805b8e80941Smrg S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2806b8e80941Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 2807b8e80941Smrg cmd_buffer->push_constant_stages |= 2808b8e80941Smrg set->layout->dynamic_shader_stages; 2809b8e80941Smrg } 2810b8e80941Smrg } 2811b8e80941Smrg} 2812b8e80941Smrg 2813b8e80941Smrgstatic bool radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 2814b8e80941Smrg struct radv_descriptor_set *set, 2815b8e80941Smrg struct radv_descriptor_set_layout *layout, 2816b8e80941Smrg VkPipelineBindPoint bind_point) 2817b8e80941Smrg{ 2818b8e80941Smrg struct radv_descriptor_state *descriptors_state = 2819b8e80941Smrg radv_get_descriptors_state(cmd_buffer, bind_point); 2820b8e80941Smrg set->size = layout->size; 2821b8e80941Smrg set->layout = layout; 2822b8e80941Smrg 2823b8e80941Smrg if (descriptors_state->push_set.capacity < set->size) { 2824b8e80941Smrg size_t new_size = MAX2(set->size, 1024); 2825b8e80941Smrg new_size = MAX2(new_size, 2 * descriptors_state->push_set.capacity); 2826b8e80941Smrg new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS); 2827b8e80941Smrg 2828b8e80941Smrg free(set->mapped_ptr); 2829b8e80941Smrg set->mapped_ptr = malloc(new_size); 2830b8e80941Smrg 2831b8e80941Smrg if (!set->mapped_ptr) { 2832b8e80941Smrg descriptors_state->push_set.capacity = 0; 2833b8e80941Smrg cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 2834b8e80941Smrg return false; 2835b8e80941Smrg } 2836b8e80941Smrg 2837b8e80941Smrg descriptors_state->push_set.capacity = new_size; 2838b8e80941Smrg } 2839b8e80941Smrg 2840b8e80941Smrg return true; 2841b8e80941Smrg} 2842b8e80941Smrg 2843b8e80941Smrgvoid radv_meta_push_descriptor_set( 2844b8e80941Smrg struct radv_cmd_buffer* cmd_buffer, 2845b8e80941Smrg VkPipelineBindPoint pipelineBindPoint, 2846b8e80941Smrg VkPipelineLayout _layout, 2847b8e80941Smrg uint32_t set, 2848b8e80941Smrg uint32_t descriptorWriteCount, 2849b8e80941Smrg const VkWriteDescriptorSet* pDescriptorWrites) 2850b8e80941Smrg{ 2851b8e80941Smrg RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 2852b8e80941Smrg struct radv_descriptor_set *push_set = &cmd_buffer->meta_push_descriptors; 2853b8e80941Smrg unsigned bo_offset; 2854b8e80941Smrg 2855b8e80941Smrg assert(set == 0); 2856b8e80941Smrg assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); 2857b8e80941Smrg 2858b8e80941Smrg push_set->size = layout->set[set].layout->size; 2859b8e80941Smrg push_set->layout = layout->set[set].layout; 2860b8e80941Smrg 2861b8e80941Smrg if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->size, 32, 2862b8e80941Smrg &bo_offset, 2863b8e80941Smrg (void**) &push_set->mapped_ptr)) 2864b8e80941Smrg return; 2865b8e80941Smrg 2866b8e80941Smrg push_set->va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 2867b8e80941Smrg push_set->va += bo_offset; 2868b8e80941Smrg 2869b8e80941Smrg radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, 2870b8e80941Smrg radv_descriptor_set_to_handle(push_set), 2871b8e80941Smrg descriptorWriteCount, pDescriptorWrites, 0, NULL); 2872b8e80941Smrg 2873b8e80941Smrg radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); 2874b8e80941Smrg} 2875b8e80941Smrg 2876b8e80941Smrgvoid radv_CmdPushDescriptorSetKHR( 2877b8e80941Smrg VkCommandBuffer commandBuffer, 2878b8e80941Smrg VkPipelineBindPoint pipelineBindPoint, 2879b8e80941Smrg VkPipelineLayout _layout, 2880b8e80941Smrg uint32_t set, 2881b8e80941Smrg uint32_t descriptorWriteCount, 2882b8e80941Smrg const VkWriteDescriptorSet* pDescriptorWrites) 2883b8e80941Smrg{ 2884b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2885b8e80941Smrg RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 2886b8e80941Smrg struct radv_descriptor_state *descriptors_state = 2887b8e80941Smrg radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); 2888b8e80941Smrg struct radv_descriptor_set *push_set = &descriptors_state->push_set.set; 2889b8e80941Smrg 2890b8e80941Smrg assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); 2891b8e80941Smrg 2892b8e80941Smrg if (!radv_init_push_descriptor_set(cmd_buffer, push_set, 2893b8e80941Smrg layout->set[set].layout, 2894b8e80941Smrg pipelineBindPoint)) 2895b8e80941Smrg return; 2896b8e80941Smrg 2897b8e80941Smrg radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, 2898b8e80941Smrg radv_descriptor_set_to_handle(push_set), 2899b8e80941Smrg descriptorWriteCount, pDescriptorWrites, 0, NULL); 2900b8e80941Smrg 2901b8e80941Smrg radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); 2902b8e80941Smrg descriptors_state->push_dirty = true; 2903b8e80941Smrg} 2904b8e80941Smrg 2905b8e80941Smrgvoid radv_CmdPushDescriptorSetWithTemplateKHR( 2906b8e80941Smrg VkCommandBuffer commandBuffer, 2907b8e80941Smrg VkDescriptorUpdateTemplate descriptorUpdateTemplate, 2908b8e80941Smrg VkPipelineLayout _layout, 2909b8e80941Smrg uint32_t set, 2910b8e80941Smrg const void* pData) 2911b8e80941Smrg{ 2912b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2913b8e80941Smrg RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 2914b8e80941Smrg RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate); 2915b8e80941Smrg struct radv_descriptor_state *descriptors_state = 2916b8e80941Smrg radv_get_descriptors_state(cmd_buffer, templ->bind_point); 2917b8e80941Smrg struct radv_descriptor_set *push_set = &descriptors_state->push_set.set; 2918b8e80941Smrg 2919b8e80941Smrg assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); 2920b8e80941Smrg 2921b8e80941Smrg if (!radv_init_push_descriptor_set(cmd_buffer, push_set, 2922b8e80941Smrg layout->set[set].layout, 2923b8e80941Smrg templ->bind_point)) 2924b8e80941Smrg return; 2925b8e80941Smrg 2926b8e80941Smrg radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set, 2927b8e80941Smrg descriptorUpdateTemplate, pData); 2928b8e80941Smrg 2929b8e80941Smrg radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set); 2930b8e80941Smrg descriptors_state->push_dirty = true; 2931b8e80941Smrg} 2932b8e80941Smrg 2933b8e80941Smrgvoid radv_CmdPushConstants(VkCommandBuffer commandBuffer, 2934b8e80941Smrg VkPipelineLayout layout, 2935b8e80941Smrg VkShaderStageFlags stageFlags, 2936b8e80941Smrg uint32_t offset, 2937b8e80941Smrg uint32_t size, 2938b8e80941Smrg const void* pValues) 2939b8e80941Smrg{ 2940b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2941b8e80941Smrg memcpy(cmd_buffer->push_constants + offset, pValues, size); 2942b8e80941Smrg cmd_buffer->push_constant_stages |= stageFlags; 2943b8e80941Smrg} 2944b8e80941Smrg 2945b8e80941SmrgVkResult radv_EndCommandBuffer( 2946b8e80941Smrg VkCommandBuffer commandBuffer) 2947b8e80941Smrg{ 2948b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2949b8e80941Smrg 2950b8e80941Smrg if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) { 2951b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class == SI) 2952b8e80941Smrg cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; 2953b8e80941Smrg si_emit_cache_flush(cmd_buffer); 2954b8e80941Smrg } 2955b8e80941Smrg 2956b8e80941Smrg /* Make sure CP DMA is idle at the end of IBs because the kernel 2957b8e80941Smrg * doesn't wait for it. 2958b8e80941Smrg */ 2959b8e80941Smrg si_cp_dma_wait_for_idle(cmd_buffer); 2960b8e80941Smrg 2961b8e80941Smrg vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); 2962b8e80941Smrg 2963b8e80941Smrg if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs)) 2964b8e80941Smrg return vk_error(cmd_buffer->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 2965b8e80941Smrg 2966b8e80941Smrg cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE; 2967b8e80941Smrg 2968b8e80941Smrg return cmd_buffer->record_result; 2969b8e80941Smrg} 2970b8e80941Smrg 2971b8e80941Smrgstatic void 2972b8e80941Smrgradv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) 2973b8e80941Smrg{ 2974b8e80941Smrg struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; 2975b8e80941Smrg 2976b8e80941Smrg if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline) 2977b8e80941Smrg return; 2978b8e80941Smrg 2979b8e80941Smrg assert(!pipeline->ctx_cs.cdw); 2980b8e80941Smrg 2981b8e80941Smrg cmd_buffer->state.emitted_compute_pipeline = pipeline; 2982b8e80941Smrg 2983b8e80941Smrg radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw); 2984b8e80941Smrg radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); 2985b8e80941Smrg 2986b8e80941Smrg cmd_buffer->compute_scratch_size_needed = 2987b8e80941Smrg MAX2(cmd_buffer->compute_scratch_size_needed, 2988b8e80941Smrg pipeline->max_waves * pipeline->scratch_bytes_per_wave); 2989b8e80941Smrg 2990b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 2991b8e80941Smrg pipeline->shaders[MESA_SHADER_COMPUTE]->bo); 2992b8e80941Smrg 2993b8e80941Smrg if (unlikely(cmd_buffer->device->trace_bo)) 2994b8e80941Smrg radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE); 2995b8e80941Smrg} 2996b8e80941Smrg 2997b8e80941Smrgstatic void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, 2998b8e80941Smrg VkPipelineBindPoint bind_point) 2999b8e80941Smrg{ 3000b8e80941Smrg struct radv_descriptor_state *descriptors_state = 3001b8e80941Smrg radv_get_descriptors_state(cmd_buffer, bind_point); 3002b8e80941Smrg 3003b8e80941Smrg descriptors_state->dirty |= descriptors_state->valid; 3004b8e80941Smrg} 3005b8e80941Smrg 3006b8e80941Smrgvoid radv_CmdBindPipeline( 3007b8e80941Smrg VkCommandBuffer commandBuffer, 3008b8e80941Smrg VkPipelineBindPoint pipelineBindPoint, 3009b8e80941Smrg VkPipeline _pipeline) 3010b8e80941Smrg{ 3011b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3012b8e80941Smrg RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); 3013b8e80941Smrg 3014b8e80941Smrg switch (pipelineBindPoint) { 3015b8e80941Smrg case VK_PIPELINE_BIND_POINT_COMPUTE: 3016b8e80941Smrg if (cmd_buffer->state.compute_pipeline == pipeline) 3017b8e80941Smrg return; 3018b8e80941Smrg radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); 3019b8e80941Smrg 3020b8e80941Smrg cmd_buffer->state.compute_pipeline = pipeline; 3021b8e80941Smrg cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT; 3022b8e80941Smrg break; 3023b8e80941Smrg case VK_PIPELINE_BIND_POINT_GRAPHICS: 3024b8e80941Smrg if (cmd_buffer->state.pipeline == pipeline) 3025b8e80941Smrg return; 3026b8e80941Smrg radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); 3027b8e80941Smrg 3028b8e80941Smrg cmd_buffer->state.pipeline = pipeline; 3029b8e80941Smrg if (!pipeline) 3030b8e80941Smrg break; 3031b8e80941Smrg 3032b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE; 3033b8e80941Smrg cmd_buffer->push_constant_stages |= pipeline->active_stages; 3034b8e80941Smrg 3035b8e80941Smrg /* the new vertex shader might not have the same user regs */ 3036b8e80941Smrg cmd_buffer->state.last_first_instance = -1; 3037b8e80941Smrg cmd_buffer->state.last_vertex_offset = -1; 3038b8e80941Smrg 3039b8e80941Smrg /* Prefetch all pipeline shaders at first draw time. */ 3040b8e80941Smrg cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS; 3041b8e80941Smrg 3042b8e80941Smrg radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state); 3043b8e80941Smrg radv_bind_streamout_state(cmd_buffer, pipeline); 3044b8e80941Smrg 3045b8e80941Smrg if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) 3046b8e80941Smrg cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size; 3047b8e80941Smrg if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed) 3048b8e80941Smrg cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size; 3049b8e80941Smrg 3050b8e80941Smrg if (radv_pipeline_has_tess(pipeline)) 3051b8e80941Smrg cmd_buffer->tess_rings_needed = true; 3052b8e80941Smrg break; 3053b8e80941Smrg default: 3054b8e80941Smrg assert(!"invalid bind point"); 3055b8e80941Smrg break; 3056b8e80941Smrg } 3057b8e80941Smrg} 3058b8e80941Smrg 3059b8e80941Smrgvoid radv_CmdSetViewport( 3060b8e80941Smrg VkCommandBuffer commandBuffer, 3061b8e80941Smrg uint32_t firstViewport, 3062b8e80941Smrg uint32_t viewportCount, 3063b8e80941Smrg const VkViewport* pViewports) 3064b8e80941Smrg{ 3065b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3066b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3067b8e80941Smrg MAYBE_UNUSED const uint32_t total_count = firstViewport + viewportCount; 3068b8e80941Smrg 3069b8e80941Smrg assert(firstViewport < MAX_VIEWPORTS); 3070b8e80941Smrg assert(total_count >= 1 && total_count <= MAX_VIEWPORTS); 3071b8e80941Smrg 3072b8e80941Smrg if (!memcmp(state->dynamic.viewport.viewports + firstViewport, 3073b8e80941Smrg pViewports, viewportCount * sizeof(*pViewports))) { 3074b8e80941Smrg return; 3075b8e80941Smrg } 3076b8e80941Smrg 3077b8e80941Smrg memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports, 3078b8e80941Smrg viewportCount * sizeof(*pViewports)); 3079b8e80941Smrg 3080b8e80941Smrg state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT; 3081b8e80941Smrg} 3082b8e80941Smrg 3083b8e80941Smrgvoid radv_CmdSetScissor( 3084b8e80941Smrg VkCommandBuffer commandBuffer, 3085b8e80941Smrg uint32_t firstScissor, 3086b8e80941Smrg uint32_t scissorCount, 3087b8e80941Smrg const VkRect2D* pScissors) 3088b8e80941Smrg{ 3089b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3090b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3091b8e80941Smrg MAYBE_UNUSED const uint32_t total_count = firstScissor + scissorCount; 3092b8e80941Smrg 3093b8e80941Smrg assert(firstScissor < MAX_SCISSORS); 3094b8e80941Smrg assert(total_count >= 1 && total_count <= MAX_SCISSORS); 3095b8e80941Smrg 3096b8e80941Smrg if (!memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors, 3097b8e80941Smrg scissorCount * sizeof(*pScissors))) { 3098b8e80941Smrg return; 3099b8e80941Smrg } 3100b8e80941Smrg 3101b8e80941Smrg memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors, 3102b8e80941Smrg scissorCount * sizeof(*pScissors)); 3103b8e80941Smrg 3104b8e80941Smrg state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; 3105b8e80941Smrg} 3106b8e80941Smrg 3107b8e80941Smrgvoid radv_CmdSetLineWidth( 3108b8e80941Smrg VkCommandBuffer commandBuffer, 3109b8e80941Smrg float lineWidth) 3110b8e80941Smrg{ 3111b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3112b8e80941Smrg 3113b8e80941Smrg if (cmd_buffer->state.dynamic.line_width == lineWidth) 3114b8e80941Smrg return; 3115b8e80941Smrg 3116b8e80941Smrg cmd_buffer->state.dynamic.line_width = lineWidth; 3117b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; 3118b8e80941Smrg} 3119b8e80941Smrg 3120b8e80941Smrgvoid radv_CmdSetDepthBias( 3121b8e80941Smrg VkCommandBuffer commandBuffer, 3122b8e80941Smrg float depthBiasConstantFactor, 3123b8e80941Smrg float depthBiasClamp, 3124b8e80941Smrg float depthBiasSlopeFactor) 3125b8e80941Smrg{ 3126b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3127b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3128b8e80941Smrg 3129b8e80941Smrg if (state->dynamic.depth_bias.bias == depthBiasConstantFactor && 3130b8e80941Smrg state->dynamic.depth_bias.clamp == depthBiasClamp && 3131b8e80941Smrg state->dynamic.depth_bias.slope == depthBiasSlopeFactor) { 3132b8e80941Smrg return; 3133b8e80941Smrg } 3134b8e80941Smrg 3135b8e80941Smrg state->dynamic.depth_bias.bias = depthBiasConstantFactor; 3136b8e80941Smrg state->dynamic.depth_bias.clamp = depthBiasClamp; 3137b8e80941Smrg state->dynamic.depth_bias.slope = depthBiasSlopeFactor; 3138b8e80941Smrg 3139b8e80941Smrg state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; 3140b8e80941Smrg} 3141b8e80941Smrg 3142b8e80941Smrgvoid radv_CmdSetBlendConstants( 3143b8e80941Smrg VkCommandBuffer commandBuffer, 3144b8e80941Smrg const float blendConstants[4]) 3145b8e80941Smrg{ 3146b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3147b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3148b8e80941Smrg 3149b8e80941Smrg if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4)) 3150b8e80941Smrg return; 3151b8e80941Smrg 3152b8e80941Smrg memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4); 3153b8e80941Smrg 3154b8e80941Smrg state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; 3155b8e80941Smrg} 3156b8e80941Smrg 3157b8e80941Smrgvoid radv_CmdSetDepthBounds( 3158b8e80941Smrg VkCommandBuffer commandBuffer, 3159b8e80941Smrg float minDepthBounds, 3160b8e80941Smrg float maxDepthBounds) 3161b8e80941Smrg{ 3162b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3163b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3164b8e80941Smrg 3165b8e80941Smrg if (state->dynamic.depth_bounds.min == minDepthBounds && 3166b8e80941Smrg state->dynamic.depth_bounds.max == maxDepthBounds) { 3167b8e80941Smrg return; 3168b8e80941Smrg } 3169b8e80941Smrg 3170b8e80941Smrg state->dynamic.depth_bounds.min = minDepthBounds; 3171b8e80941Smrg state->dynamic.depth_bounds.max = maxDepthBounds; 3172b8e80941Smrg 3173b8e80941Smrg state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; 3174b8e80941Smrg} 3175b8e80941Smrg 3176b8e80941Smrgvoid radv_CmdSetStencilCompareMask( 3177b8e80941Smrg VkCommandBuffer commandBuffer, 3178b8e80941Smrg VkStencilFaceFlags faceMask, 3179b8e80941Smrg uint32_t compareMask) 3180b8e80941Smrg{ 3181b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3182b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3183b8e80941Smrg bool front_same = state->dynamic.stencil_compare_mask.front == compareMask; 3184b8e80941Smrg bool back_same = state->dynamic.stencil_compare_mask.back == compareMask; 3185b8e80941Smrg 3186b8e80941Smrg if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && 3187b8e80941Smrg (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { 3188b8e80941Smrg return; 3189b8e80941Smrg } 3190b8e80941Smrg 3191b8e80941Smrg if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 3192b8e80941Smrg state->dynamic.stencil_compare_mask.front = compareMask; 3193b8e80941Smrg if (faceMask & VK_STENCIL_FACE_BACK_BIT) 3194b8e80941Smrg state->dynamic.stencil_compare_mask.back = compareMask; 3195b8e80941Smrg 3196b8e80941Smrg state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; 3197b8e80941Smrg} 3198b8e80941Smrg 3199b8e80941Smrgvoid radv_CmdSetStencilWriteMask( 3200b8e80941Smrg VkCommandBuffer commandBuffer, 3201b8e80941Smrg VkStencilFaceFlags faceMask, 3202b8e80941Smrg uint32_t writeMask) 3203b8e80941Smrg{ 3204b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3205b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3206b8e80941Smrg bool front_same = state->dynamic.stencil_write_mask.front == writeMask; 3207b8e80941Smrg bool back_same = state->dynamic.stencil_write_mask.back == writeMask; 3208b8e80941Smrg 3209b8e80941Smrg if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && 3210b8e80941Smrg (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { 3211b8e80941Smrg return; 3212b8e80941Smrg } 3213b8e80941Smrg 3214b8e80941Smrg if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 3215b8e80941Smrg state->dynamic.stencil_write_mask.front = writeMask; 3216b8e80941Smrg if (faceMask & VK_STENCIL_FACE_BACK_BIT) 3217b8e80941Smrg state->dynamic.stencil_write_mask.back = writeMask; 3218b8e80941Smrg 3219b8e80941Smrg state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; 3220b8e80941Smrg} 3221b8e80941Smrg 3222b8e80941Smrgvoid radv_CmdSetStencilReference( 3223b8e80941Smrg VkCommandBuffer commandBuffer, 3224b8e80941Smrg VkStencilFaceFlags faceMask, 3225b8e80941Smrg uint32_t reference) 3226b8e80941Smrg{ 3227b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3228b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3229b8e80941Smrg bool front_same = state->dynamic.stencil_reference.front == reference; 3230b8e80941Smrg bool back_same = state->dynamic.stencil_reference.back == reference; 3231b8e80941Smrg 3232b8e80941Smrg if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && 3233b8e80941Smrg (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { 3234b8e80941Smrg return; 3235b8e80941Smrg } 3236b8e80941Smrg 3237b8e80941Smrg if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 3238b8e80941Smrg cmd_buffer->state.dynamic.stencil_reference.front = reference; 3239b8e80941Smrg if (faceMask & VK_STENCIL_FACE_BACK_BIT) 3240b8e80941Smrg cmd_buffer->state.dynamic.stencil_reference.back = reference; 3241b8e80941Smrg 3242b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; 3243b8e80941Smrg} 3244b8e80941Smrg 3245b8e80941Smrgvoid radv_CmdSetDiscardRectangleEXT( 3246b8e80941Smrg VkCommandBuffer commandBuffer, 3247b8e80941Smrg uint32_t firstDiscardRectangle, 3248b8e80941Smrg uint32_t discardRectangleCount, 3249b8e80941Smrg const VkRect2D* pDiscardRectangles) 3250b8e80941Smrg{ 3251b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3252b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3253b8e80941Smrg MAYBE_UNUSED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount; 3254b8e80941Smrg 3255b8e80941Smrg assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES); 3256b8e80941Smrg assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES); 3257b8e80941Smrg 3258b8e80941Smrg if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle, 3259b8e80941Smrg pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) { 3260b8e80941Smrg return; 3261b8e80941Smrg } 3262b8e80941Smrg 3263b8e80941Smrg typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle], 3264b8e80941Smrg pDiscardRectangles, discardRectangleCount); 3265b8e80941Smrg 3266b8e80941Smrg state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE; 3267b8e80941Smrg} 3268b8e80941Smrg 3269b8e80941Smrgvoid radv_CmdExecuteCommands( 3270b8e80941Smrg VkCommandBuffer commandBuffer, 3271b8e80941Smrg uint32_t commandBufferCount, 3272b8e80941Smrg const VkCommandBuffer* pCmdBuffers) 3273b8e80941Smrg{ 3274b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer); 3275b8e80941Smrg 3276b8e80941Smrg assert(commandBufferCount > 0); 3277b8e80941Smrg 3278b8e80941Smrg /* Emit pending flushes on primary prior to executing secondary */ 3279b8e80941Smrg si_emit_cache_flush(primary); 3280b8e80941Smrg 3281b8e80941Smrg for (uint32_t i = 0; i < commandBufferCount; i++) { 3282b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); 3283b8e80941Smrg 3284b8e80941Smrg primary->scratch_size_needed = MAX2(primary->scratch_size_needed, 3285b8e80941Smrg secondary->scratch_size_needed); 3286b8e80941Smrg primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed, 3287b8e80941Smrg secondary->compute_scratch_size_needed); 3288b8e80941Smrg 3289b8e80941Smrg if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) 3290b8e80941Smrg primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; 3291b8e80941Smrg if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) 3292b8e80941Smrg primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; 3293b8e80941Smrg if (secondary->tess_rings_needed) 3294b8e80941Smrg primary->tess_rings_needed = true; 3295b8e80941Smrg if (secondary->sample_positions_needed) 3296b8e80941Smrg primary->sample_positions_needed = true; 3297b8e80941Smrg 3298b8e80941Smrg primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs); 3299b8e80941Smrg 3300b8e80941Smrg 3301b8e80941Smrg /* When the secondary command buffer is compute only we don't 3302b8e80941Smrg * need to re-emit the current graphics pipeline. 3303b8e80941Smrg */ 3304b8e80941Smrg if (secondary->state.emitted_pipeline) { 3305b8e80941Smrg primary->state.emitted_pipeline = 3306b8e80941Smrg secondary->state.emitted_pipeline; 3307b8e80941Smrg } 3308b8e80941Smrg 3309b8e80941Smrg /* When the secondary command buffer is graphics only we don't 3310b8e80941Smrg * need to re-emit the current compute pipeline. 3311b8e80941Smrg */ 3312b8e80941Smrg if (secondary->state.emitted_compute_pipeline) { 3313b8e80941Smrg primary->state.emitted_compute_pipeline = 3314b8e80941Smrg secondary->state.emitted_compute_pipeline; 3315b8e80941Smrg } 3316b8e80941Smrg 3317b8e80941Smrg /* Only re-emit the draw packets when needed. */ 3318b8e80941Smrg if (secondary->state.last_primitive_reset_en != -1) { 3319b8e80941Smrg primary->state.last_primitive_reset_en = 3320b8e80941Smrg secondary->state.last_primitive_reset_en; 3321b8e80941Smrg } 3322b8e80941Smrg 3323b8e80941Smrg if (secondary->state.last_primitive_reset_index) { 3324b8e80941Smrg primary->state.last_primitive_reset_index = 3325b8e80941Smrg secondary->state.last_primitive_reset_index; 3326b8e80941Smrg } 3327b8e80941Smrg 3328b8e80941Smrg if (secondary->state.last_ia_multi_vgt_param) { 3329b8e80941Smrg primary->state.last_ia_multi_vgt_param = 3330b8e80941Smrg secondary->state.last_ia_multi_vgt_param; 3331b8e80941Smrg } 3332b8e80941Smrg 3333b8e80941Smrg primary->state.last_first_instance = secondary->state.last_first_instance; 3334b8e80941Smrg primary->state.last_num_instances = secondary->state.last_num_instances; 3335b8e80941Smrg primary->state.last_vertex_offset = secondary->state.last_vertex_offset; 3336b8e80941Smrg 3337b8e80941Smrg if (secondary->state.last_index_type != -1) { 3338b8e80941Smrg primary->state.last_index_type = 3339b8e80941Smrg secondary->state.last_index_type; 3340b8e80941Smrg } 3341b8e80941Smrg } 3342b8e80941Smrg 3343b8e80941Smrg /* After executing commands from secondary buffers we have to dirty 3344b8e80941Smrg * some states. 3345b8e80941Smrg */ 3346b8e80941Smrg primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | 3347b8e80941Smrg RADV_CMD_DIRTY_INDEX_BUFFER | 3348b8e80941Smrg RADV_CMD_DIRTY_DYNAMIC_ALL; 3349b8e80941Smrg radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS); 3350b8e80941Smrg radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE); 3351b8e80941Smrg} 3352b8e80941Smrg 3353b8e80941SmrgVkResult radv_CreateCommandPool( 3354b8e80941Smrg VkDevice _device, 3355b8e80941Smrg const VkCommandPoolCreateInfo* pCreateInfo, 3356b8e80941Smrg const VkAllocationCallbacks* pAllocator, 3357b8e80941Smrg VkCommandPool* pCmdPool) 3358b8e80941Smrg{ 3359b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3360b8e80941Smrg struct radv_cmd_pool *pool; 3361b8e80941Smrg 3362b8e80941Smrg pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, 3363b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3364b8e80941Smrg if (pool == NULL) 3365b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3366b8e80941Smrg 3367b8e80941Smrg if (pAllocator) 3368b8e80941Smrg pool->alloc = *pAllocator; 3369b8e80941Smrg else 3370b8e80941Smrg pool->alloc = device->alloc; 3371b8e80941Smrg 3372b8e80941Smrg list_inithead(&pool->cmd_buffers); 3373b8e80941Smrg list_inithead(&pool->free_cmd_buffers); 3374b8e80941Smrg 3375b8e80941Smrg pool->queue_family_index = pCreateInfo->queueFamilyIndex; 3376b8e80941Smrg 3377b8e80941Smrg *pCmdPool = radv_cmd_pool_to_handle(pool); 3378b8e80941Smrg 3379b8e80941Smrg return VK_SUCCESS; 3380b8e80941Smrg 3381b8e80941Smrg} 3382b8e80941Smrg 3383b8e80941Smrgvoid radv_DestroyCommandPool( 3384b8e80941Smrg VkDevice _device, 3385b8e80941Smrg VkCommandPool commandPool, 3386b8e80941Smrg const VkAllocationCallbacks* pAllocator) 3387b8e80941Smrg{ 3388b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3389b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 3390b8e80941Smrg 3391b8e80941Smrg if (!pool) 3392b8e80941Smrg return; 3393b8e80941Smrg 3394b8e80941Smrg list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, 3395b8e80941Smrg &pool->cmd_buffers, pool_link) { 3396b8e80941Smrg radv_cmd_buffer_destroy(cmd_buffer); 3397b8e80941Smrg } 3398b8e80941Smrg 3399b8e80941Smrg list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, 3400b8e80941Smrg &pool->free_cmd_buffers, pool_link) { 3401b8e80941Smrg radv_cmd_buffer_destroy(cmd_buffer); 3402b8e80941Smrg } 3403b8e80941Smrg 3404b8e80941Smrg vk_free2(&device->alloc, pAllocator, pool); 3405b8e80941Smrg} 3406b8e80941Smrg 3407b8e80941SmrgVkResult radv_ResetCommandPool( 3408b8e80941Smrg VkDevice device, 3409b8e80941Smrg VkCommandPool commandPool, 3410b8e80941Smrg VkCommandPoolResetFlags flags) 3411b8e80941Smrg{ 3412b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 3413b8e80941Smrg VkResult result; 3414b8e80941Smrg 3415b8e80941Smrg list_for_each_entry(struct radv_cmd_buffer, cmd_buffer, 3416b8e80941Smrg &pool->cmd_buffers, pool_link) { 3417b8e80941Smrg result = radv_reset_cmd_buffer(cmd_buffer); 3418b8e80941Smrg if (result != VK_SUCCESS) 3419b8e80941Smrg return result; 3420b8e80941Smrg } 3421b8e80941Smrg 3422b8e80941Smrg return VK_SUCCESS; 3423b8e80941Smrg} 3424b8e80941Smrg 3425b8e80941Smrgvoid radv_TrimCommandPool( 3426b8e80941Smrg VkDevice device, 3427b8e80941Smrg VkCommandPool commandPool, 3428b8e80941Smrg VkCommandPoolTrimFlags flags) 3429b8e80941Smrg{ 3430b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 3431b8e80941Smrg 3432b8e80941Smrg if (!pool) 3433b8e80941Smrg return; 3434b8e80941Smrg 3435b8e80941Smrg list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, 3436b8e80941Smrg &pool->free_cmd_buffers, pool_link) { 3437b8e80941Smrg radv_cmd_buffer_destroy(cmd_buffer); 3438b8e80941Smrg } 3439b8e80941Smrg} 3440b8e80941Smrg 3441b8e80941Smrgstatic uint32_t 3442b8e80941Smrgradv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer) 3443b8e80941Smrg{ 3444b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3445b8e80941Smrg uint32_t subpass_id = state->subpass - state->pass->subpasses; 3446b8e80941Smrg 3447b8e80941Smrg /* The id of this subpass shouldn't exceed the number of subpasses in 3448b8e80941Smrg * this render pass minus 1. 3449b8e80941Smrg */ 3450b8e80941Smrg assert(subpass_id < state->pass->subpass_count); 3451b8e80941Smrg return subpass_id; 3452b8e80941Smrg} 3453b8e80941Smrg 3454b8e80941Smrgstatic void 3455b8e80941Smrgradv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, 3456b8e80941Smrg uint32_t subpass_id) 3457b8e80941Smrg{ 3458b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3459b8e80941Smrg struct radv_subpass *subpass = &state->pass->subpasses[subpass_id]; 3460b8e80941Smrg 3461b8e80941Smrg MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, 3462b8e80941Smrg cmd_buffer->cs, 4096); 3463b8e80941Smrg 3464b8e80941Smrg radv_subpass_barrier(cmd_buffer, &subpass->start_barrier); 3465b8e80941Smrg 3466b8e80941Smrg for (uint32_t i = 0; i < subpass->attachment_count; ++i) { 3467b8e80941Smrg const uint32_t a = subpass->attachments[i].attachment; 3468b8e80941Smrg if (a == VK_ATTACHMENT_UNUSED) 3469b8e80941Smrg continue; 3470b8e80941Smrg 3471b8e80941Smrg radv_handle_subpass_image_transition(cmd_buffer, 3472b8e80941Smrg subpass->attachments[i]); 3473b8e80941Smrg } 3474b8e80941Smrg 3475b8e80941Smrg radv_cmd_buffer_set_subpass(cmd_buffer, subpass); 3476b8e80941Smrg radv_cmd_buffer_clear_subpass(cmd_buffer); 3477b8e80941Smrg 3478b8e80941Smrg assert(cmd_buffer->cs->cdw <= cdw_max); 3479b8e80941Smrg} 3480b8e80941Smrg 3481b8e80941Smrgstatic void 3482b8e80941Smrgradv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer) 3483b8e80941Smrg{ 3484b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3485b8e80941Smrg const struct radv_subpass *subpass = state->subpass; 3486b8e80941Smrg uint32_t subpass_id = radv_get_subpass_id(cmd_buffer); 3487b8e80941Smrg 3488b8e80941Smrg radv_cmd_buffer_resolve_subpass(cmd_buffer); 3489b8e80941Smrg 3490b8e80941Smrg for (uint32_t i = 0; i < subpass->attachment_count; ++i) { 3491b8e80941Smrg const uint32_t a = subpass->attachments[i].attachment; 3492b8e80941Smrg if (a == VK_ATTACHMENT_UNUSED) 3493b8e80941Smrg continue; 3494b8e80941Smrg 3495b8e80941Smrg if (state->pass->attachments[a].last_subpass_idx != subpass_id) 3496b8e80941Smrg continue; 3497b8e80941Smrg 3498b8e80941Smrg VkImageLayout layout = state->pass->attachments[a].final_layout; 3499b8e80941Smrg radv_handle_subpass_image_transition(cmd_buffer, 3500b8e80941Smrg (struct radv_subpass_attachment){a, layout}); 3501b8e80941Smrg } 3502b8e80941Smrg} 3503b8e80941Smrg 3504b8e80941Smrgvoid radv_CmdBeginRenderPass( 3505b8e80941Smrg VkCommandBuffer commandBuffer, 3506b8e80941Smrg const VkRenderPassBeginInfo* pRenderPassBegin, 3507b8e80941Smrg VkSubpassContents contents) 3508b8e80941Smrg{ 3509b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3510b8e80941Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass); 3511b8e80941Smrg RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); 3512b8e80941Smrg VkResult result; 3513b8e80941Smrg 3514b8e80941Smrg cmd_buffer->state.framebuffer = framebuffer; 3515b8e80941Smrg cmd_buffer->state.pass = pass; 3516b8e80941Smrg cmd_buffer->state.render_area = pRenderPassBegin->renderArea; 3517b8e80941Smrg 3518b8e80941Smrg result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin); 3519b8e80941Smrg if (result != VK_SUCCESS) 3520b8e80941Smrg return; 3521b8e80941Smrg 3522b8e80941Smrg radv_cmd_buffer_begin_subpass(cmd_buffer, 0); 3523b8e80941Smrg} 3524b8e80941Smrg 3525b8e80941Smrgvoid radv_CmdBeginRenderPass2KHR( 3526b8e80941Smrg VkCommandBuffer commandBuffer, 3527b8e80941Smrg const VkRenderPassBeginInfo* pRenderPassBeginInfo, 3528b8e80941Smrg const VkSubpassBeginInfoKHR* pSubpassBeginInfo) 3529b8e80941Smrg{ 3530b8e80941Smrg radv_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo, 3531b8e80941Smrg pSubpassBeginInfo->contents); 3532b8e80941Smrg} 3533b8e80941Smrg 3534b8e80941Smrgvoid radv_CmdNextSubpass( 3535b8e80941Smrg VkCommandBuffer commandBuffer, 3536b8e80941Smrg VkSubpassContents contents) 3537b8e80941Smrg{ 3538b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3539b8e80941Smrg 3540b8e80941Smrg uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer); 3541b8e80941Smrg radv_cmd_buffer_end_subpass(cmd_buffer); 3542b8e80941Smrg radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1); 3543b8e80941Smrg} 3544b8e80941Smrg 3545b8e80941Smrgvoid radv_CmdNextSubpass2KHR( 3546b8e80941Smrg VkCommandBuffer commandBuffer, 3547b8e80941Smrg const VkSubpassBeginInfoKHR* pSubpassBeginInfo, 3548b8e80941Smrg const VkSubpassEndInfoKHR* pSubpassEndInfo) 3549b8e80941Smrg{ 3550b8e80941Smrg radv_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents); 3551b8e80941Smrg} 3552b8e80941Smrg 3553b8e80941Smrgstatic void radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index) 3554b8e80941Smrg{ 3555b8e80941Smrg struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; 3556b8e80941Smrg for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) { 3557b8e80941Smrg if (!radv_get_shader(pipeline, stage)) 3558b8e80941Smrg continue; 3559b8e80941Smrg 3560b8e80941Smrg struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX); 3561b8e80941Smrg if (loc->sgpr_idx == -1) 3562b8e80941Smrg continue; 3563b8e80941Smrg uint32_t base_reg = pipeline->user_data_0[stage]; 3564b8e80941Smrg radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index); 3565b8e80941Smrg 3566b8e80941Smrg } 3567b8e80941Smrg if (pipeline->gs_copy_shader) { 3568b8e80941Smrg struct radv_userdata_info *loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX]; 3569b8e80941Smrg if (loc->sgpr_idx != -1) { 3570b8e80941Smrg uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; 3571b8e80941Smrg radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index); 3572b8e80941Smrg } 3573b8e80941Smrg } 3574b8e80941Smrg} 3575b8e80941Smrg 3576b8e80941Smrgstatic void 3577b8e80941Smrgradv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, 3578b8e80941Smrg uint32_t vertex_count, 3579b8e80941Smrg bool use_opaque) 3580b8e80941Smrg{ 3581b8e80941Smrg radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating)); 3582b8e80941Smrg radeon_emit(cmd_buffer->cs, vertex_count); 3583b8e80941Smrg radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | 3584b8e80941Smrg S_0287F0_USE_OPAQUE(use_opaque)); 3585b8e80941Smrg} 3586b8e80941Smrg 3587b8e80941Smrgstatic void 3588b8e80941Smrgradv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, 3589b8e80941Smrg uint64_t index_va, 3590b8e80941Smrg uint32_t index_count) 3591b8e80941Smrg{ 3592b8e80941Smrg radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating)); 3593b8e80941Smrg radeon_emit(cmd_buffer->cs, cmd_buffer->state.max_index_count); 3594b8e80941Smrg radeon_emit(cmd_buffer->cs, index_va); 3595b8e80941Smrg radeon_emit(cmd_buffer->cs, index_va >> 32); 3596b8e80941Smrg radeon_emit(cmd_buffer->cs, index_count); 3597b8e80941Smrg radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA); 3598b8e80941Smrg} 3599b8e80941Smrg 3600b8e80941Smrgstatic void 3601b8e80941Smrgradv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, 3602b8e80941Smrg bool indexed, 3603b8e80941Smrg uint32_t draw_count, 3604b8e80941Smrg uint64_t count_va, 3605b8e80941Smrg uint32_t stride) 3606b8e80941Smrg{ 3607b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 3608b8e80941Smrg unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA 3609b8e80941Smrg : V_0287F0_DI_SRC_SEL_AUTO_INDEX; 3610b8e80941Smrg bool draw_id_enable = radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.needs_draw_id; 3611b8e80941Smrg uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr; 3612b8e80941Smrg bool predicating = cmd_buffer->state.predicating; 3613b8e80941Smrg assert(base_reg); 3614b8e80941Smrg 3615b8e80941Smrg /* just reset draw state for vertex data */ 3616b8e80941Smrg cmd_buffer->state.last_first_instance = -1; 3617b8e80941Smrg cmd_buffer->state.last_num_instances = -1; 3618b8e80941Smrg cmd_buffer->state.last_vertex_offset = -1; 3619b8e80941Smrg 3620b8e80941Smrg if (draw_count == 1 && !count_va && !draw_id_enable) { 3621b8e80941Smrg radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : 3622b8e80941Smrg PKT3_DRAW_INDIRECT, 3, predicating)); 3623b8e80941Smrg radeon_emit(cs, 0); 3624b8e80941Smrg radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2); 3625b8e80941Smrg radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2); 3626b8e80941Smrg radeon_emit(cs, di_src_sel); 3627b8e80941Smrg } else { 3628b8e80941Smrg radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : 3629b8e80941Smrg PKT3_DRAW_INDIRECT_MULTI, 3630b8e80941Smrg 8, predicating)); 3631b8e80941Smrg radeon_emit(cs, 0); 3632b8e80941Smrg radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2); 3633b8e80941Smrg radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2); 3634b8e80941Smrg radeon_emit(cs, (((base_reg + 8) - SI_SH_REG_OFFSET) >> 2) | 3635b8e80941Smrg S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | 3636b8e80941Smrg S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); 3637b8e80941Smrg radeon_emit(cs, draw_count); /* count */ 3638b8e80941Smrg radeon_emit(cs, count_va); /* count_addr */ 3639b8e80941Smrg radeon_emit(cs, count_va >> 32); 3640b8e80941Smrg radeon_emit(cs, stride); /* stride */ 3641b8e80941Smrg radeon_emit(cs, di_src_sel); 3642b8e80941Smrg } 3643b8e80941Smrg} 3644b8e80941Smrg 3645b8e80941Smrgstatic void 3646b8e80941Smrgradv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, 3647b8e80941Smrg const struct radv_draw_info *info) 3648b8e80941Smrg{ 3649b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3650b8e80941Smrg struct radeon_winsys *ws = cmd_buffer->device->ws; 3651b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 3652b8e80941Smrg 3653b8e80941Smrg if (info->indirect) { 3654b8e80941Smrg uint64_t va = radv_buffer_get_va(info->indirect->bo); 3655b8e80941Smrg uint64_t count_va = 0; 3656b8e80941Smrg 3657b8e80941Smrg va += info->indirect->offset + info->indirect_offset; 3658b8e80941Smrg 3659b8e80941Smrg radv_cs_add_buffer(ws, cs, info->indirect->bo); 3660b8e80941Smrg 3661b8e80941Smrg radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); 3662b8e80941Smrg radeon_emit(cs, 1); 3663b8e80941Smrg radeon_emit(cs, va); 3664b8e80941Smrg radeon_emit(cs, va >> 32); 3665b8e80941Smrg 3666b8e80941Smrg if (info->count_buffer) { 3667b8e80941Smrg count_va = radv_buffer_get_va(info->count_buffer->bo); 3668b8e80941Smrg count_va += info->count_buffer->offset + 3669b8e80941Smrg info->count_buffer_offset; 3670b8e80941Smrg 3671b8e80941Smrg radv_cs_add_buffer(ws, cs, info->count_buffer->bo); 3672b8e80941Smrg } 3673b8e80941Smrg 3674b8e80941Smrg if (!state->subpass->view_mask) { 3675b8e80941Smrg radv_cs_emit_indirect_draw_packet(cmd_buffer, 3676b8e80941Smrg info->indexed, 3677b8e80941Smrg info->count, 3678b8e80941Smrg count_va, 3679b8e80941Smrg info->stride); 3680b8e80941Smrg } else { 3681b8e80941Smrg unsigned i; 3682b8e80941Smrg for_each_bit(i, state->subpass->view_mask) { 3683b8e80941Smrg radv_emit_view_index(cmd_buffer, i); 3684b8e80941Smrg 3685b8e80941Smrg radv_cs_emit_indirect_draw_packet(cmd_buffer, 3686b8e80941Smrg info->indexed, 3687b8e80941Smrg info->count, 3688b8e80941Smrg count_va, 3689b8e80941Smrg info->stride); 3690b8e80941Smrg } 3691b8e80941Smrg } 3692b8e80941Smrg } else { 3693b8e80941Smrg assert(state->pipeline->graphics.vtx_base_sgpr); 3694b8e80941Smrg 3695b8e80941Smrg if (info->vertex_offset != state->last_vertex_offset || 3696b8e80941Smrg info->first_instance != state->last_first_instance) { 3697b8e80941Smrg radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr, 3698b8e80941Smrg state->pipeline->graphics.vtx_emit_num); 3699b8e80941Smrg 3700b8e80941Smrg radeon_emit(cs, info->vertex_offset); 3701b8e80941Smrg radeon_emit(cs, info->first_instance); 3702b8e80941Smrg if (state->pipeline->graphics.vtx_emit_num == 3) 3703b8e80941Smrg radeon_emit(cs, 0); 3704b8e80941Smrg state->last_first_instance = info->first_instance; 3705b8e80941Smrg state->last_vertex_offset = info->vertex_offset; 3706b8e80941Smrg } 3707b8e80941Smrg 3708b8e80941Smrg if (state->last_num_instances != info->instance_count) { 3709b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false)); 3710b8e80941Smrg radeon_emit(cs, info->instance_count); 3711b8e80941Smrg state->last_num_instances = info->instance_count; 3712b8e80941Smrg } 3713b8e80941Smrg 3714b8e80941Smrg if (info->indexed) { 3715b8e80941Smrg int index_size = state->index_type ? 4 : 2; 3716b8e80941Smrg uint64_t index_va; 3717b8e80941Smrg 3718b8e80941Smrg index_va = state->index_va; 3719b8e80941Smrg index_va += info->first_index * index_size; 3720b8e80941Smrg 3721b8e80941Smrg if (!state->subpass->view_mask) { 3722b8e80941Smrg radv_cs_emit_draw_indexed_packet(cmd_buffer, 3723b8e80941Smrg index_va, 3724b8e80941Smrg info->count); 3725b8e80941Smrg } else { 3726b8e80941Smrg unsigned i; 3727b8e80941Smrg for_each_bit(i, state->subpass->view_mask) { 3728b8e80941Smrg radv_emit_view_index(cmd_buffer, i); 3729b8e80941Smrg 3730b8e80941Smrg radv_cs_emit_draw_indexed_packet(cmd_buffer, 3731b8e80941Smrg index_va, 3732b8e80941Smrg info->count); 3733b8e80941Smrg } 3734b8e80941Smrg } 3735b8e80941Smrg } else { 3736b8e80941Smrg if (!state->subpass->view_mask) { 3737b8e80941Smrg radv_cs_emit_draw_packet(cmd_buffer, 3738b8e80941Smrg info->count, 3739b8e80941Smrg !!info->strmout_buffer); 3740b8e80941Smrg } else { 3741b8e80941Smrg unsigned i; 3742b8e80941Smrg for_each_bit(i, state->subpass->view_mask) { 3743b8e80941Smrg radv_emit_view_index(cmd_buffer, i); 3744b8e80941Smrg 3745b8e80941Smrg radv_cs_emit_draw_packet(cmd_buffer, 3746b8e80941Smrg info->count, 3747b8e80941Smrg !!info->strmout_buffer); 3748b8e80941Smrg } 3749b8e80941Smrg } 3750b8e80941Smrg } 3751b8e80941Smrg } 3752b8e80941Smrg} 3753b8e80941Smrg 3754b8e80941Smrg/* 3755b8e80941Smrg * Vega and raven have a bug which triggers if there are multiple context 3756b8e80941Smrg * register contexts active at the same time with different scissor values. 3757b8e80941Smrg * 3758b8e80941Smrg * There are two possible workarounds: 3759b8e80941Smrg * 1) Wait for PS_PARTIAL_FLUSH every time the scissor is changed. That way 3760b8e80941Smrg * there is only ever 1 active set of scissor values at the same time. 3761b8e80941Smrg * 3762b8e80941Smrg * 2) Whenever the hardware switches contexts we have to set the scissor 3763b8e80941Smrg * registers again even if it is a noop. That way the new context gets 3764b8e80941Smrg * the correct scissor values. 3765b8e80941Smrg * 3766b8e80941Smrg * This implements option 2. radv_need_late_scissor_emission needs to 3767b8e80941Smrg * return true on affected HW if radv_emit_all_graphics_states sets 3768b8e80941Smrg * any context registers. 3769b8e80941Smrg */ 3770b8e80941Smrgstatic bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, 3771b8e80941Smrg const struct radv_draw_info *info) 3772b8e80941Smrg{ 3773b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 3774b8e80941Smrg 3775b8e80941Smrg if (!cmd_buffer->device->physical_device->has_scissor_bug) 3776b8e80941Smrg return false; 3777b8e80941Smrg 3778b8e80941Smrg if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer) 3779b8e80941Smrg return true; 3780b8e80941Smrg 3781b8e80941Smrg uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; 3782b8e80941Smrg 3783b8e80941Smrg /* Index, vertex and streamout buffers don't change context regs, and 3784b8e80941Smrg * pipeline is already handled. 3785b8e80941Smrg */ 3786b8e80941Smrg used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | 3787b8e80941Smrg RADV_CMD_DIRTY_VERTEX_BUFFER | 3788b8e80941Smrg RADV_CMD_DIRTY_STREAMOUT_BUFFER | 3789b8e80941Smrg RADV_CMD_DIRTY_PIPELINE); 3790b8e80941Smrg 3791b8e80941Smrg if (cmd_buffer->state.dirty & used_states) 3792b8e80941Smrg return true; 3793b8e80941Smrg 3794b8e80941Smrg if (info->indexed && state->pipeline->graphics.prim_restart_enable && 3795b8e80941Smrg (state->index_type ? 0xffffffffu : 0xffffu) != state->last_primitive_reset_index) 3796b8e80941Smrg return true; 3797b8e80941Smrg 3798b8e80941Smrg return false; 3799b8e80941Smrg} 3800b8e80941Smrg 3801b8e80941Smrgstatic void 3802b8e80941Smrgradv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, 3803b8e80941Smrg const struct radv_draw_info *info) 3804b8e80941Smrg{ 3805b8e80941Smrg bool late_scissor_emission; 3806b8e80941Smrg 3807b8e80941Smrg if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) || 3808b8e80941Smrg cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) 3809b8e80941Smrg radv_emit_rbplus_state(cmd_buffer); 3810b8e80941Smrg 3811b8e80941Smrg if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) 3812b8e80941Smrg radv_emit_graphics_pipeline(cmd_buffer); 3813b8e80941Smrg 3814b8e80941Smrg /* This should be before the cmd_buffer->state.dirty is cleared 3815b8e80941Smrg * (excluding RADV_CMD_DIRTY_PIPELINE) and after 3816b8e80941Smrg * cmd_buffer->state.context_roll_without_scissor_emitted is set. */ 3817b8e80941Smrg late_scissor_emission = 3818b8e80941Smrg radv_need_late_scissor_emission(cmd_buffer, info); 3819b8e80941Smrg 3820b8e80941Smrg if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) 3821b8e80941Smrg radv_emit_framebuffer_state(cmd_buffer); 3822b8e80941Smrg 3823b8e80941Smrg if (info->indexed) { 3824b8e80941Smrg if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER) 3825b8e80941Smrg radv_emit_index_buffer(cmd_buffer); 3826b8e80941Smrg } else { 3827b8e80941Smrg /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, 3828b8e80941Smrg * so the state must be re-emitted before the next indexed 3829b8e80941Smrg * draw. 3830b8e80941Smrg */ 3831b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { 3832b8e80941Smrg cmd_buffer->state.last_index_type = -1; 3833b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; 3834b8e80941Smrg } 3835b8e80941Smrg } 3836b8e80941Smrg 3837b8e80941Smrg radv_cmd_buffer_flush_dynamic_state(cmd_buffer); 3838b8e80941Smrg 3839b8e80941Smrg radv_emit_draw_registers(cmd_buffer, info); 3840b8e80941Smrg 3841b8e80941Smrg if (late_scissor_emission) 3842b8e80941Smrg radv_emit_scissor(cmd_buffer); 3843b8e80941Smrg} 3844b8e80941Smrg 3845b8e80941Smrgstatic void 3846b8e80941Smrgradv_draw(struct radv_cmd_buffer *cmd_buffer, 3847b8e80941Smrg const struct radv_draw_info *info) 3848b8e80941Smrg{ 3849b8e80941Smrg struct radeon_info *rad_info = 3850b8e80941Smrg &cmd_buffer->device->physical_device->rad_info; 3851b8e80941Smrg bool has_prefetch = 3852b8e80941Smrg cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; 3853b8e80941Smrg bool pipeline_is_dirty = 3854b8e80941Smrg (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) && 3855b8e80941Smrg cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline; 3856b8e80941Smrg 3857b8e80941Smrg MAYBE_UNUSED unsigned cdw_max = 3858b8e80941Smrg radeon_check_space(cmd_buffer->device->ws, 3859b8e80941Smrg cmd_buffer->cs, 4096); 3860b8e80941Smrg 3861b8e80941Smrg if (likely(!info->indirect)) { 3862b8e80941Smrg /* SI-CI treat instance_count==0 as instance_count==1. There is 3863b8e80941Smrg * no workaround for indirect draws, but we can at least skip 3864b8e80941Smrg * direct draws. 3865b8e80941Smrg */ 3866b8e80941Smrg if (unlikely(!info->instance_count)) 3867b8e80941Smrg return; 3868b8e80941Smrg 3869b8e80941Smrg /* Handle count == 0. */ 3870b8e80941Smrg if (unlikely(!info->count && !info->strmout_buffer)) 3871b8e80941Smrg return; 3872b8e80941Smrg } 3873b8e80941Smrg 3874b8e80941Smrg /* Use optimal packet order based on whether we need to sync the 3875b8e80941Smrg * pipeline. 3876b8e80941Smrg */ 3877b8e80941Smrg if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | 3878b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_DB | 3879b8e80941Smrg RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 3880b8e80941Smrg RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { 3881b8e80941Smrg /* If we have to wait for idle, set all states first, so that 3882b8e80941Smrg * all SET packets are processed in parallel with previous draw 3883b8e80941Smrg * calls. Then upload descriptors, set shader pointers, and 3884b8e80941Smrg * draw, and prefetch at the end. This ensures that the time 3885b8e80941Smrg * the CUs are idle is very short. (there are only SET_SH 3886b8e80941Smrg * packets between the wait and the draw) 3887b8e80941Smrg */ 3888b8e80941Smrg radv_emit_all_graphics_states(cmd_buffer, info); 3889b8e80941Smrg si_emit_cache_flush(cmd_buffer); 3890b8e80941Smrg /* <-- CUs are idle here --> */ 3891b8e80941Smrg 3892b8e80941Smrg radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty); 3893b8e80941Smrg 3894b8e80941Smrg radv_emit_draw_packets(cmd_buffer, info); 3895b8e80941Smrg /* <-- CUs are busy here --> */ 3896b8e80941Smrg 3897b8e80941Smrg /* Start prefetches after the draw has been started. Both will 3898b8e80941Smrg * run in parallel, but starting the draw first is more 3899b8e80941Smrg * important. 3900b8e80941Smrg */ 3901b8e80941Smrg if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) { 3902b8e80941Smrg radv_emit_prefetch_L2(cmd_buffer, 3903b8e80941Smrg cmd_buffer->state.pipeline, false); 3904b8e80941Smrg } 3905b8e80941Smrg } else { 3906b8e80941Smrg /* If we don't wait for idle, start prefetches first, then set 3907b8e80941Smrg * states, and draw at the end. 3908b8e80941Smrg */ 3909b8e80941Smrg si_emit_cache_flush(cmd_buffer); 3910b8e80941Smrg 3911b8e80941Smrg if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) { 3912b8e80941Smrg /* Only prefetch the vertex shader and VBO descriptors 3913b8e80941Smrg * in order to start the draw as soon as possible. 3914b8e80941Smrg */ 3915b8e80941Smrg radv_emit_prefetch_L2(cmd_buffer, 3916b8e80941Smrg cmd_buffer->state.pipeline, true); 3917b8e80941Smrg } 3918b8e80941Smrg 3919b8e80941Smrg radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty); 3920b8e80941Smrg 3921b8e80941Smrg radv_emit_all_graphics_states(cmd_buffer, info); 3922b8e80941Smrg radv_emit_draw_packets(cmd_buffer, info); 3923b8e80941Smrg 3924b8e80941Smrg /* Prefetch the remaining shaders after the draw has been 3925b8e80941Smrg * started. 3926b8e80941Smrg */ 3927b8e80941Smrg if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) { 3928b8e80941Smrg radv_emit_prefetch_L2(cmd_buffer, 3929b8e80941Smrg cmd_buffer->state.pipeline, false); 3930b8e80941Smrg } 3931b8e80941Smrg } 3932b8e80941Smrg 3933b8e80941Smrg /* Workaround for a VGT hang when streamout is enabled. 3934b8e80941Smrg * It must be done after drawing. 3935b8e80941Smrg */ 3936b8e80941Smrg if (cmd_buffer->state.streamout.streamout_enabled && 3937b8e80941Smrg (rad_info->family == CHIP_HAWAII || 3938b8e80941Smrg rad_info->family == CHIP_TONGA || 3939b8e80941Smrg rad_info->family == CHIP_FIJI)) { 3940b8e80941Smrg cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC; 3941b8e80941Smrg } 3942b8e80941Smrg 3943b8e80941Smrg assert(cmd_buffer->cs->cdw <= cdw_max); 3944b8e80941Smrg radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH); 3945b8e80941Smrg} 3946b8e80941Smrg 3947b8e80941Smrgvoid radv_CmdDraw( 3948b8e80941Smrg VkCommandBuffer commandBuffer, 3949b8e80941Smrg uint32_t vertexCount, 3950b8e80941Smrg uint32_t instanceCount, 3951b8e80941Smrg uint32_t firstVertex, 3952b8e80941Smrg uint32_t firstInstance) 3953b8e80941Smrg{ 3954b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3955b8e80941Smrg struct radv_draw_info info = {}; 3956b8e80941Smrg 3957b8e80941Smrg info.count = vertexCount; 3958b8e80941Smrg info.instance_count = instanceCount; 3959b8e80941Smrg info.first_instance = firstInstance; 3960b8e80941Smrg info.vertex_offset = firstVertex; 3961b8e80941Smrg 3962b8e80941Smrg radv_draw(cmd_buffer, &info); 3963b8e80941Smrg} 3964b8e80941Smrg 3965b8e80941Smrgvoid radv_CmdDrawIndexed( 3966b8e80941Smrg VkCommandBuffer commandBuffer, 3967b8e80941Smrg uint32_t indexCount, 3968b8e80941Smrg uint32_t instanceCount, 3969b8e80941Smrg uint32_t firstIndex, 3970b8e80941Smrg int32_t vertexOffset, 3971b8e80941Smrg uint32_t firstInstance) 3972b8e80941Smrg{ 3973b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3974b8e80941Smrg struct radv_draw_info info = {}; 3975b8e80941Smrg 3976b8e80941Smrg info.indexed = true; 3977b8e80941Smrg info.count = indexCount; 3978b8e80941Smrg info.instance_count = instanceCount; 3979b8e80941Smrg info.first_index = firstIndex; 3980b8e80941Smrg info.vertex_offset = vertexOffset; 3981b8e80941Smrg info.first_instance = firstInstance; 3982b8e80941Smrg 3983b8e80941Smrg radv_draw(cmd_buffer, &info); 3984b8e80941Smrg} 3985b8e80941Smrg 3986b8e80941Smrgvoid radv_CmdDrawIndirect( 3987b8e80941Smrg VkCommandBuffer commandBuffer, 3988b8e80941Smrg VkBuffer _buffer, 3989b8e80941Smrg VkDeviceSize offset, 3990b8e80941Smrg uint32_t drawCount, 3991b8e80941Smrg uint32_t stride) 3992b8e80941Smrg{ 3993b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3994b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 3995b8e80941Smrg struct radv_draw_info info = {}; 3996b8e80941Smrg 3997b8e80941Smrg info.count = drawCount; 3998b8e80941Smrg info.indirect = buffer; 3999b8e80941Smrg info.indirect_offset = offset; 4000b8e80941Smrg info.stride = stride; 4001b8e80941Smrg 4002b8e80941Smrg radv_draw(cmd_buffer, &info); 4003b8e80941Smrg} 4004b8e80941Smrg 4005b8e80941Smrgvoid radv_CmdDrawIndexedIndirect( 4006b8e80941Smrg VkCommandBuffer commandBuffer, 4007b8e80941Smrg VkBuffer _buffer, 4008b8e80941Smrg VkDeviceSize offset, 4009b8e80941Smrg uint32_t drawCount, 4010b8e80941Smrg uint32_t stride) 4011b8e80941Smrg{ 4012b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4013b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 4014b8e80941Smrg struct radv_draw_info info = {}; 4015b8e80941Smrg 4016b8e80941Smrg info.indexed = true; 4017b8e80941Smrg info.count = drawCount; 4018b8e80941Smrg info.indirect = buffer; 4019b8e80941Smrg info.indirect_offset = offset; 4020b8e80941Smrg info.stride = stride; 4021b8e80941Smrg 4022b8e80941Smrg radv_draw(cmd_buffer, &info); 4023b8e80941Smrg} 4024b8e80941Smrg 4025b8e80941Smrgvoid radv_CmdDrawIndirectCountAMD( 4026b8e80941Smrg VkCommandBuffer commandBuffer, 4027b8e80941Smrg VkBuffer _buffer, 4028b8e80941Smrg VkDeviceSize offset, 4029b8e80941Smrg VkBuffer _countBuffer, 4030b8e80941Smrg VkDeviceSize countBufferOffset, 4031b8e80941Smrg uint32_t maxDrawCount, 4032b8e80941Smrg uint32_t stride) 4033b8e80941Smrg{ 4034b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4035b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 4036b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); 4037b8e80941Smrg struct radv_draw_info info = {}; 4038b8e80941Smrg 4039b8e80941Smrg info.count = maxDrawCount; 4040b8e80941Smrg info.indirect = buffer; 4041b8e80941Smrg info.indirect_offset = offset; 4042b8e80941Smrg info.count_buffer = count_buffer; 4043b8e80941Smrg info.count_buffer_offset = countBufferOffset; 4044b8e80941Smrg info.stride = stride; 4045b8e80941Smrg 4046b8e80941Smrg radv_draw(cmd_buffer, &info); 4047b8e80941Smrg} 4048b8e80941Smrg 4049b8e80941Smrgvoid radv_CmdDrawIndexedIndirectCountAMD( 4050b8e80941Smrg VkCommandBuffer commandBuffer, 4051b8e80941Smrg VkBuffer _buffer, 4052b8e80941Smrg VkDeviceSize offset, 4053b8e80941Smrg VkBuffer _countBuffer, 4054b8e80941Smrg VkDeviceSize countBufferOffset, 4055b8e80941Smrg uint32_t maxDrawCount, 4056b8e80941Smrg uint32_t stride) 4057b8e80941Smrg{ 4058b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4059b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 4060b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); 4061b8e80941Smrg struct radv_draw_info info = {}; 4062b8e80941Smrg 4063b8e80941Smrg info.indexed = true; 4064b8e80941Smrg info.count = maxDrawCount; 4065b8e80941Smrg info.indirect = buffer; 4066b8e80941Smrg info.indirect_offset = offset; 4067b8e80941Smrg info.count_buffer = count_buffer; 4068b8e80941Smrg info.count_buffer_offset = countBufferOffset; 4069b8e80941Smrg info.stride = stride; 4070b8e80941Smrg 4071b8e80941Smrg radv_draw(cmd_buffer, &info); 4072b8e80941Smrg} 4073b8e80941Smrg 4074b8e80941Smrgvoid radv_CmdDrawIndirectCountKHR( 4075b8e80941Smrg VkCommandBuffer commandBuffer, 4076b8e80941Smrg VkBuffer _buffer, 4077b8e80941Smrg VkDeviceSize offset, 4078b8e80941Smrg VkBuffer _countBuffer, 4079b8e80941Smrg VkDeviceSize countBufferOffset, 4080b8e80941Smrg uint32_t maxDrawCount, 4081b8e80941Smrg uint32_t stride) 4082b8e80941Smrg{ 4083b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4084b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 4085b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); 4086b8e80941Smrg struct radv_draw_info info = {}; 4087b8e80941Smrg 4088b8e80941Smrg info.count = maxDrawCount; 4089b8e80941Smrg info.indirect = buffer; 4090b8e80941Smrg info.indirect_offset = offset; 4091b8e80941Smrg info.count_buffer = count_buffer; 4092b8e80941Smrg info.count_buffer_offset = countBufferOffset; 4093b8e80941Smrg info.stride = stride; 4094b8e80941Smrg 4095b8e80941Smrg radv_draw(cmd_buffer, &info); 4096b8e80941Smrg} 4097b8e80941Smrg 4098b8e80941Smrgvoid radv_CmdDrawIndexedIndirectCountKHR( 4099b8e80941Smrg VkCommandBuffer commandBuffer, 4100b8e80941Smrg VkBuffer _buffer, 4101b8e80941Smrg VkDeviceSize offset, 4102b8e80941Smrg VkBuffer _countBuffer, 4103b8e80941Smrg VkDeviceSize countBufferOffset, 4104b8e80941Smrg uint32_t maxDrawCount, 4105b8e80941Smrg uint32_t stride) 4106b8e80941Smrg{ 4107b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4108b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 4109b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); 4110b8e80941Smrg struct radv_draw_info info = {}; 4111b8e80941Smrg 4112b8e80941Smrg info.indexed = true; 4113b8e80941Smrg info.count = maxDrawCount; 4114b8e80941Smrg info.indirect = buffer; 4115b8e80941Smrg info.indirect_offset = offset; 4116b8e80941Smrg info.count_buffer = count_buffer; 4117b8e80941Smrg info.count_buffer_offset = countBufferOffset; 4118b8e80941Smrg info.stride = stride; 4119b8e80941Smrg 4120b8e80941Smrg radv_draw(cmd_buffer, &info); 4121b8e80941Smrg} 4122b8e80941Smrg 4123b8e80941Smrgstruct radv_dispatch_info { 4124b8e80941Smrg /** 4125b8e80941Smrg * Determine the layout of the grid (in block units) to be used. 4126b8e80941Smrg */ 4127b8e80941Smrg uint32_t blocks[3]; 4128b8e80941Smrg 4129b8e80941Smrg /** 4130b8e80941Smrg * A starting offset for the grid. If unaligned is set, the offset 4131b8e80941Smrg * must still be aligned. 4132b8e80941Smrg */ 4133b8e80941Smrg uint32_t offsets[3]; 4134b8e80941Smrg /** 4135b8e80941Smrg * Whether it's an unaligned compute dispatch. 4136b8e80941Smrg */ 4137b8e80941Smrg bool unaligned; 4138b8e80941Smrg 4139b8e80941Smrg /** 4140b8e80941Smrg * Indirect compute parameters resource. 4141b8e80941Smrg */ 4142b8e80941Smrg struct radv_buffer *indirect; 4143b8e80941Smrg uint64_t indirect_offset; 4144b8e80941Smrg}; 4145b8e80941Smrg 4146b8e80941Smrgstatic void 4147b8e80941Smrgradv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, 4148b8e80941Smrg const struct radv_dispatch_info *info) 4149b8e80941Smrg{ 4150b8e80941Smrg struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; 4151b8e80941Smrg struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; 4152b8e80941Smrg unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator; 4153b8e80941Smrg struct radeon_winsys *ws = cmd_buffer->device->ws; 4154b8e80941Smrg bool predicating = cmd_buffer->state.predicating; 4155b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 4156b8e80941Smrg struct radv_userdata_info *loc; 4157b8e80941Smrg 4158b8e80941Smrg loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, 4159b8e80941Smrg AC_UD_CS_GRID_SIZE); 4160b8e80941Smrg 4161b8e80941Smrg MAYBE_UNUSED unsigned cdw_max = radeon_check_space(ws, cs, 25); 4162b8e80941Smrg 4163b8e80941Smrg if (info->indirect) { 4164b8e80941Smrg uint64_t va = radv_buffer_get_va(info->indirect->bo); 4165b8e80941Smrg 4166b8e80941Smrg va += info->indirect->offset + info->indirect_offset; 4167b8e80941Smrg 4168b8e80941Smrg radv_cs_add_buffer(ws, cs, info->indirect->bo); 4169b8e80941Smrg 4170b8e80941Smrg if (loc->sgpr_idx != -1) { 4171b8e80941Smrg for (unsigned i = 0; i < 3; ++i) { 4172b8e80941Smrg radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 4173b8e80941Smrg radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | 4174b8e80941Smrg COPY_DATA_DST_SEL(COPY_DATA_REG)); 4175b8e80941Smrg radeon_emit(cs, (va + 4 * i)); 4176b8e80941Smrg radeon_emit(cs, (va + 4 * i) >> 32); 4177b8e80941Smrg radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 4178b8e80941Smrg + loc->sgpr_idx * 4) >> 2) + i); 4179b8e80941Smrg radeon_emit(cs, 0); 4180b8e80941Smrg } 4181b8e80941Smrg } 4182b8e80941Smrg 4183b8e80941Smrg if (radv_cmd_buffer_uses_mec(cmd_buffer)) { 4184b8e80941Smrg radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, predicating) | 4185b8e80941Smrg PKT3_SHADER_TYPE_S(1)); 4186b8e80941Smrg radeon_emit(cs, va); 4187b8e80941Smrg radeon_emit(cs, va >> 32); 4188b8e80941Smrg radeon_emit(cs, dispatch_initiator); 4189b8e80941Smrg } else { 4190b8e80941Smrg radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | 4191b8e80941Smrg PKT3_SHADER_TYPE_S(1)); 4192b8e80941Smrg radeon_emit(cs, 1); 4193b8e80941Smrg radeon_emit(cs, va); 4194b8e80941Smrg radeon_emit(cs, va >> 32); 4195b8e80941Smrg 4196b8e80941Smrg radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, predicating) | 4197b8e80941Smrg PKT3_SHADER_TYPE_S(1)); 4198b8e80941Smrg radeon_emit(cs, 0); 4199b8e80941Smrg radeon_emit(cs, dispatch_initiator); 4200b8e80941Smrg } 4201b8e80941Smrg } else { 4202b8e80941Smrg unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] }; 4203b8e80941Smrg unsigned offsets[3] = { info->offsets[0], info->offsets[1], info->offsets[2] }; 4204b8e80941Smrg 4205b8e80941Smrg if (info->unaligned) { 4206b8e80941Smrg unsigned *cs_block_size = compute_shader->info.cs.block_size; 4207b8e80941Smrg unsigned remainder[3]; 4208b8e80941Smrg 4209b8e80941Smrg /* If aligned, these should be an entire block size, 4210b8e80941Smrg * not 0. 4211b8e80941Smrg */ 4212b8e80941Smrg remainder[0] = blocks[0] + cs_block_size[0] - 4213b8e80941Smrg align_u32_npot(blocks[0], cs_block_size[0]); 4214b8e80941Smrg remainder[1] = blocks[1] + cs_block_size[1] - 4215b8e80941Smrg align_u32_npot(blocks[1], cs_block_size[1]); 4216b8e80941Smrg remainder[2] = blocks[2] + cs_block_size[2] - 4217b8e80941Smrg align_u32_npot(blocks[2], cs_block_size[2]); 4218b8e80941Smrg 4219b8e80941Smrg blocks[0] = round_up_u32(blocks[0], cs_block_size[0]); 4220b8e80941Smrg blocks[1] = round_up_u32(blocks[1], cs_block_size[1]); 4221b8e80941Smrg blocks[2] = round_up_u32(blocks[2], cs_block_size[2]); 4222b8e80941Smrg 4223b8e80941Smrg for(unsigned i = 0; i < 3; ++i) { 4224b8e80941Smrg assert(offsets[i] % cs_block_size[i] == 0); 4225b8e80941Smrg offsets[i] /= cs_block_size[i]; 4226b8e80941Smrg } 4227b8e80941Smrg 4228b8e80941Smrg radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); 4229b8e80941Smrg radeon_emit(cs, 4230b8e80941Smrg S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) | 4231b8e80941Smrg S_00B81C_NUM_THREAD_PARTIAL(remainder[0])); 4232b8e80941Smrg radeon_emit(cs, 4233b8e80941Smrg S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) | 4234b8e80941Smrg S_00B81C_NUM_THREAD_PARTIAL(remainder[1])); 4235b8e80941Smrg radeon_emit(cs, 4236b8e80941Smrg S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) | 4237b8e80941Smrg S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); 4238b8e80941Smrg 4239b8e80941Smrg dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); 4240b8e80941Smrg } 4241b8e80941Smrg 4242b8e80941Smrg if (loc->sgpr_idx != -1) { 4243b8e80941Smrg assert(loc->num_sgprs == 3); 4244b8e80941Smrg 4245b8e80941Smrg radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + 4246b8e80941Smrg loc->sgpr_idx * 4, 3); 4247b8e80941Smrg radeon_emit(cs, blocks[0]); 4248b8e80941Smrg radeon_emit(cs, blocks[1]); 4249b8e80941Smrg radeon_emit(cs, blocks[2]); 4250b8e80941Smrg } 4251b8e80941Smrg 4252b8e80941Smrg if (offsets[0] || offsets[1] || offsets[2]) { 4253b8e80941Smrg radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); 4254b8e80941Smrg radeon_emit(cs, offsets[0]); 4255b8e80941Smrg radeon_emit(cs, offsets[1]); 4256b8e80941Smrg radeon_emit(cs, offsets[2]); 4257b8e80941Smrg 4258b8e80941Smrg /* The blocks in the packet are not counts but end values. */ 4259b8e80941Smrg for (unsigned i = 0; i < 3; ++i) 4260b8e80941Smrg blocks[i] += offsets[i]; 4261b8e80941Smrg } else { 4262b8e80941Smrg dispatch_initiator |= S_00B800_FORCE_START_AT_000(1); 4263b8e80941Smrg } 4264b8e80941Smrg 4265b8e80941Smrg radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | 4266b8e80941Smrg PKT3_SHADER_TYPE_S(1)); 4267b8e80941Smrg radeon_emit(cs, blocks[0]); 4268b8e80941Smrg radeon_emit(cs, blocks[1]); 4269b8e80941Smrg radeon_emit(cs, blocks[2]); 4270b8e80941Smrg radeon_emit(cs, dispatch_initiator); 4271b8e80941Smrg } 4272b8e80941Smrg 4273b8e80941Smrg assert(cmd_buffer->cs->cdw <= cdw_max); 4274b8e80941Smrg} 4275b8e80941Smrg 4276b8e80941Smrgstatic void 4277b8e80941Smrgradv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer) 4278b8e80941Smrg{ 4279b8e80941Smrg radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT); 4280b8e80941Smrg radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT); 4281b8e80941Smrg} 4282b8e80941Smrg 4283b8e80941Smrgstatic void 4284b8e80941Smrgradv_dispatch(struct radv_cmd_buffer *cmd_buffer, 4285b8e80941Smrg const struct radv_dispatch_info *info) 4286b8e80941Smrg{ 4287b8e80941Smrg struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; 4288b8e80941Smrg bool has_prefetch = 4289b8e80941Smrg cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; 4290b8e80941Smrg bool pipeline_is_dirty = pipeline && 4291b8e80941Smrg pipeline != cmd_buffer->state.emitted_compute_pipeline; 4292b8e80941Smrg 4293b8e80941Smrg if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | 4294b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_DB | 4295b8e80941Smrg RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 4296b8e80941Smrg RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { 4297b8e80941Smrg /* If we have to wait for idle, set all states first, so that 4298b8e80941Smrg * all SET packets are processed in parallel with previous draw 4299b8e80941Smrg * calls. Then upload descriptors, set shader pointers, and 4300b8e80941Smrg * dispatch, and prefetch at the end. This ensures that the 4301b8e80941Smrg * time the CUs are idle is very short. (there are only SET_SH 4302b8e80941Smrg * packets between the wait and the draw) 4303b8e80941Smrg */ 4304b8e80941Smrg radv_emit_compute_pipeline(cmd_buffer); 4305b8e80941Smrg si_emit_cache_flush(cmd_buffer); 4306b8e80941Smrg /* <-- CUs are idle here --> */ 4307b8e80941Smrg 4308b8e80941Smrg radv_upload_compute_shader_descriptors(cmd_buffer); 4309b8e80941Smrg 4310b8e80941Smrg radv_emit_dispatch_packets(cmd_buffer, info); 4311b8e80941Smrg /* <-- CUs are busy here --> */ 4312b8e80941Smrg 4313b8e80941Smrg /* Start prefetches after the dispatch has been started. Both 4314b8e80941Smrg * will run in parallel, but starting the dispatch first is 4315b8e80941Smrg * more important. 4316b8e80941Smrg */ 4317b8e80941Smrg if (has_prefetch && pipeline_is_dirty) { 4318b8e80941Smrg radv_emit_shader_prefetch(cmd_buffer, 4319b8e80941Smrg pipeline->shaders[MESA_SHADER_COMPUTE]); 4320b8e80941Smrg } 4321b8e80941Smrg } else { 4322b8e80941Smrg /* If we don't wait for idle, start prefetches first, then set 4323b8e80941Smrg * states, and dispatch at the end. 4324b8e80941Smrg */ 4325b8e80941Smrg si_emit_cache_flush(cmd_buffer); 4326b8e80941Smrg 4327b8e80941Smrg if (has_prefetch && pipeline_is_dirty) { 4328b8e80941Smrg radv_emit_shader_prefetch(cmd_buffer, 4329b8e80941Smrg pipeline->shaders[MESA_SHADER_COMPUTE]); 4330b8e80941Smrg } 4331b8e80941Smrg 4332b8e80941Smrg radv_upload_compute_shader_descriptors(cmd_buffer); 4333b8e80941Smrg 4334b8e80941Smrg radv_emit_compute_pipeline(cmd_buffer); 4335b8e80941Smrg radv_emit_dispatch_packets(cmd_buffer, info); 4336b8e80941Smrg } 4337b8e80941Smrg 4338b8e80941Smrg radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH); 4339b8e80941Smrg} 4340b8e80941Smrg 4341b8e80941Smrgvoid radv_CmdDispatchBase( 4342b8e80941Smrg VkCommandBuffer commandBuffer, 4343b8e80941Smrg uint32_t base_x, 4344b8e80941Smrg uint32_t base_y, 4345b8e80941Smrg uint32_t base_z, 4346b8e80941Smrg uint32_t x, 4347b8e80941Smrg uint32_t y, 4348b8e80941Smrg uint32_t z) 4349b8e80941Smrg{ 4350b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4351b8e80941Smrg struct radv_dispatch_info info = {}; 4352b8e80941Smrg 4353b8e80941Smrg info.blocks[0] = x; 4354b8e80941Smrg info.blocks[1] = y; 4355b8e80941Smrg info.blocks[2] = z; 4356b8e80941Smrg 4357b8e80941Smrg info.offsets[0] = base_x; 4358b8e80941Smrg info.offsets[1] = base_y; 4359b8e80941Smrg info.offsets[2] = base_z; 4360b8e80941Smrg radv_dispatch(cmd_buffer, &info); 4361b8e80941Smrg} 4362b8e80941Smrg 4363b8e80941Smrgvoid radv_CmdDispatch( 4364b8e80941Smrg VkCommandBuffer commandBuffer, 4365b8e80941Smrg uint32_t x, 4366b8e80941Smrg uint32_t y, 4367b8e80941Smrg uint32_t z) 4368b8e80941Smrg{ 4369b8e80941Smrg radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z); 4370b8e80941Smrg} 4371b8e80941Smrg 4372b8e80941Smrgvoid radv_CmdDispatchIndirect( 4373b8e80941Smrg VkCommandBuffer commandBuffer, 4374b8e80941Smrg VkBuffer _buffer, 4375b8e80941Smrg VkDeviceSize offset) 4376b8e80941Smrg{ 4377b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4378b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 4379b8e80941Smrg struct radv_dispatch_info info = {}; 4380b8e80941Smrg 4381b8e80941Smrg info.indirect = buffer; 4382b8e80941Smrg info.indirect_offset = offset; 4383b8e80941Smrg 4384b8e80941Smrg radv_dispatch(cmd_buffer, &info); 4385b8e80941Smrg} 4386b8e80941Smrg 4387b8e80941Smrgvoid radv_unaligned_dispatch( 4388b8e80941Smrg struct radv_cmd_buffer *cmd_buffer, 4389b8e80941Smrg uint32_t x, 4390b8e80941Smrg uint32_t y, 4391b8e80941Smrg uint32_t z) 4392b8e80941Smrg{ 4393b8e80941Smrg struct radv_dispatch_info info = {}; 4394b8e80941Smrg 4395b8e80941Smrg info.blocks[0] = x; 4396b8e80941Smrg info.blocks[1] = y; 4397b8e80941Smrg info.blocks[2] = z; 4398b8e80941Smrg info.unaligned = 1; 4399b8e80941Smrg 4400b8e80941Smrg radv_dispatch(cmd_buffer, &info); 4401b8e80941Smrg} 4402b8e80941Smrg 4403b8e80941Smrgvoid radv_CmdEndRenderPass( 4404b8e80941Smrg VkCommandBuffer commandBuffer) 4405b8e80941Smrg{ 4406b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4407b8e80941Smrg 4408b8e80941Smrg radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier); 4409b8e80941Smrg 4410b8e80941Smrg radv_cmd_buffer_end_subpass(cmd_buffer); 4411b8e80941Smrg 4412b8e80941Smrg vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); 4413b8e80941Smrg 4414b8e80941Smrg cmd_buffer->state.pass = NULL; 4415b8e80941Smrg cmd_buffer->state.subpass = NULL; 4416b8e80941Smrg cmd_buffer->state.attachments = NULL; 4417b8e80941Smrg cmd_buffer->state.framebuffer = NULL; 4418b8e80941Smrg} 4419b8e80941Smrg 4420b8e80941Smrgvoid radv_CmdEndRenderPass2KHR( 4421b8e80941Smrg VkCommandBuffer commandBuffer, 4422b8e80941Smrg const VkSubpassEndInfoKHR* pSubpassEndInfo) 4423b8e80941Smrg{ 4424b8e80941Smrg radv_CmdEndRenderPass(commandBuffer); 4425b8e80941Smrg} 4426b8e80941Smrg 4427b8e80941Smrg/* 4428b8e80941Smrg * For HTILE we have the following interesting clear words: 4429b8e80941Smrg * 0xfffff30f: Uncompressed, full depth range, for depth+stencil HTILE 4430b8e80941Smrg * 0xfffc000f: Uncompressed, full depth range, for depth only HTILE. 4431b8e80941Smrg * 0xfffffff0: Clear depth to 1.0 4432b8e80941Smrg * 0x00000000: Clear depth to 0.0 4433b8e80941Smrg */ 4434b8e80941Smrgstatic void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, 4435b8e80941Smrg struct radv_image *image, 4436b8e80941Smrg const VkImageSubresourceRange *range, 4437b8e80941Smrg uint32_t clear_word) 4438b8e80941Smrg{ 4439b8e80941Smrg assert(range->baseMipLevel == 0); 4440b8e80941Smrg assert(range->levelCount == 1 || range->levelCount == VK_REMAINING_ARRAY_LAYERS); 4441b8e80941Smrg unsigned layer_count = radv_get_layerCount(image, range); 4442b8e80941Smrg uint64_t size = image->planes[0].surface.htile_slice_size * layer_count; 4443b8e80941Smrg VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT; 4444b8e80941Smrg uint64_t offset = image->offset + image->htile_offset + 4445b8e80941Smrg image->planes[0].surface.htile_slice_size * range->baseArrayLayer; 4446b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 4447b8e80941Smrg VkClearDepthStencilValue value = {}; 4448b8e80941Smrg 4449b8e80941Smrg state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | 4450b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 4451b8e80941Smrg 4452b8e80941Smrg state->flush_bits |= radv_fill_buffer(cmd_buffer, image->bo, offset, 4453b8e80941Smrg size, clear_word); 4454b8e80941Smrg 4455b8e80941Smrg state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 4456b8e80941Smrg 4457b8e80941Smrg if (vk_format_is_stencil(image->vk_format)) 4458b8e80941Smrg aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 4459b8e80941Smrg 4460b8e80941Smrg radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects); 4461b8e80941Smrg 4462b8e80941Smrg if (radv_image_is_tc_compat_htile(image)) { 4463b8e80941Smrg /* Initialize the TC-compat metada value to 0 because by 4464b8e80941Smrg * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only 4465b8e80941Smrg * need have to conditionally update its value when performing 4466b8e80941Smrg * a fast depth clear. 4467b8e80941Smrg */ 4468b8e80941Smrg radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0); 4469b8e80941Smrg } 4470b8e80941Smrg} 4471b8e80941Smrg 4472b8e80941Smrgstatic void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, 4473b8e80941Smrg struct radv_image *image, 4474b8e80941Smrg VkImageLayout src_layout, 4475b8e80941Smrg VkImageLayout dst_layout, 4476b8e80941Smrg unsigned src_queue_mask, 4477b8e80941Smrg unsigned dst_queue_mask, 4478b8e80941Smrg const VkImageSubresourceRange *range) 4479b8e80941Smrg{ 4480b8e80941Smrg if (!radv_image_has_htile(image)) 4481b8e80941Smrg return; 4482b8e80941Smrg 4483b8e80941Smrg if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { 4484b8e80941Smrg uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f; 4485b8e80941Smrg 4486b8e80941Smrg if (radv_layout_is_htile_compressed(image, dst_layout, 4487b8e80941Smrg dst_queue_mask)) { 4488b8e80941Smrg clear_value = 0; 4489b8e80941Smrg } 4490b8e80941Smrg 4491b8e80941Smrg radv_initialize_htile(cmd_buffer, image, range, clear_value); 4492b8e80941Smrg } else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) && 4493b8e80941Smrg radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) { 4494b8e80941Smrg uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f; 4495b8e80941Smrg radv_initialize_htile(cmd_buffer, image, range, clear_value); 4496b8e80941Smrg } else if (radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) && 4497b8e80941Smrg !radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) { 4498b8e80941Smrg VkImageSubresourceRange local_range = *range; 4499b8e80941Smrg local_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; 4500b8e80941Smrg local_range.baseMipLevel = 0; 4501b8e80941Smrg local_range.levelCount = 1; 4502b8e80941Smrg 4503b8e80941Smrg cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | 4504b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 4505b8e80941Smrg 4506b8e80941Smrg radv_decompress_depth_image_inplace(cmd_buffer, image, &local_range); 4507b8e80941Smrg 4508b8e80941Smrg cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | 4509b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 4510b8e80941Smrg } 4511b8e80941Smrg} 4512b8e80941Smrg 4513b8e80941Smrgstatic void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer, 4514b8e80941Smrg struct radv_image *image, uint32_t value) 4515b8e80941Smrg{ 4516b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 4517b8e80941Smrg 4518b8e80941Smrg state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 4519b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4520b8e80941Smrg 4521b8e80941Smrg state->flush_bits |= radv_clear_cmask(cmd_buffer, image, value); 4522b8e80941Smrg 4523b8e80941Smrg state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4524b8e80941Smrg} 4525b8e80941Smrg 4526b8e80941Smrgvoid radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer, 4527b8e80941Smrg struct radv_image *image) 4528b8e80941Smrg{ 4529b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 4530b8e80941Smrg static const uint32_t fmask_clear_values[4] = { 4531b8e80941Smrg 0x00000000, 4532b8e80941Smrg 0x02020202, 4533b8e80941Smrg 0xE4E4E4E4, 4534b8e80941Smrg 0x76543210 4535b8e80941Smrg }; 4536b8e80941Smrg uint32_t log2_samples = util_logbase2(image->info.samples); 4537b8e80941Smrg uint32_t value = fmask_clear_values[log2_samples]; 4538b8e80941Smrg 4539b8e80941Smrg state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 4540b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4541b8e80941Smrg 4542b8e80941Smrg state->flush_bits |= radv_clear_fmask(cmd_buffer, image, value); 4543b8e80941Smrg 4544b8e80941Smrg state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4545b8e80941Smrg} 4546b8e80941Smrg 4547b8e80941Smrgvoid radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer, 4548b8e80941Smrg struct radv_image *image, uint32_t value) 4549b8e80941Smrg{ 4550b8e80941Smrg struct radv_cmd_state *state = &cmd_buffer->state; 4551b8e80941Smrg 4552b8e80941Smrg state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 4553b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4554b8e80941Smrg 4555b8e80941Smrg state->flush_bits |= radv_clear_dcc(cmd_buffer, image, value); 4556b8e80941Smrg 4557b8e80941Smrg state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 4558b8e80941Smrg RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4559b8e80941Smrg} 4560b8e80941Smrg 4561b8e80941Smrg/** 4562b8e80941Smrg * Initialize DCC/FMASK/CMASK metadata for a color image. 4563b8e80941Smrg */ 4564b8e80941Smrgstatic void radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, 4565b8e80941Smrg struct radv_image *image, 4566b8e80941Smrg VkImageLayout src_layout, 4567b8e80941Smrg VkImageLayout dst_layout, 4568b8e80941Smrg unsigned src_queue_mask, 4569b8e80941Smrg unsigned dst_queue_mask) 4570b8e80941Smrg{ 4571b8e80941Smrg if (radv_image_has_cmask(image)) { 4572b8e80941Smrg uint32_t value = 0xffffffffu; /* Fully expanded mode. */ 4573b8e80941Smrg 4574b8e80941Smrg /* TODO: clarify this. */ 4575b8e80941Smrg if (radv_image_has_fmask(image)) { 4576b8e80941Smrg value = 0xccccccccu; 4577b8e80941Smrg } 4578b8e80941Smrg 4579b8e80941Smrg radv_initialise_cmask(cmd_buffer, image, value); 4580b8e80941Smrg } 4581b8e80941Smrg 4582b8e80941Smrg if (radv_image_has_fmask(image)) { 4583b8e80941Smrg radv_initialize_fmask(cmd_buffer, image); 4584b8e80941Smrg } 4585b8e80941Smrg 4586b8e80941Smrg if (radv_image_has_dcc(image)) { 4587b8e80941Smrg uint32_t value = 0xffffffffu; /* Fully expanded mode. */ 4588b8e80941Smrg bool need_decompress_pass = false; 4589b8e80941Smrg 4590b8e80941Smrg if (radv_layout_dcc_compressed(image, dst_layout, 4591b8e80941Smrg dst_queue_mask)) { 4592b8e80941Smrg value = 0x20202020u; 4593b8e80941Smrg need_decompress_pass = true; 4594b8e80941Smrg } 4595b8e80941Smrg 4596b8e80941Smrg radv_initialize_dcc(cmd_buffer, image, value); 4597b8e80941Smrg 4598b8e80941Smrg radv_update_fce_metadata(cmd_buffer, image, 4599b8e80941Smrg need_decompress_pass); 4600b8e80941Smrg } 4601b8e80941Smrg 4602b8e80941Smrg if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) { 4603b8e80941Smrg uint32_t color_values[2] = {}; 4604b8e80941Smrg radv_set_color_clear_metadata(cmd_buffer, image, color_values); 4605b8e80941Smrg } 4606b8e80941Smrg} 4607b8e80941Smrg 4608b8e80941Smrg/** 4609b8e80941Smrg * Handle color image transitions for DCC/FMASK/CMASK. 4610b8e80941Smrg */ 4611b8e80941Smrgstatic void radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, 4612b8e80941Smrg struct radv_image *image, 4613b8e80941Smrg VkImageLayout src_layout, 4614b8e80941Smrg VkImageLayout dst_layout, 4615b8e80941Smrg unsigned src_queue_mask, 4616b8e80941Smrg unsigned dst_queue_mask, 4617b8e80941Smrg const VkImageSubresourceRange *range) 4618b8e80941Smrg{ 4619b8e80941Smrg if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { 4620b8e80941Smrg radv_init_color_image_metadata(cmd_buffer, image, 4621b8e80941Smrg src_layout, dst_layout, 4622b8e80941Smrg src_queue_mask, dst_queue_mask); 4623b8e80941Smrg return; 4624b8e80941Smrg } 4625b8e80941Smrg 4626b8e80941Smrg if (radv_image_has_dcc(image)) { 4627b8e80941Smrg if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) { 4628b8e80941Smrg radv_initialize_dcc(cmd_buffer, image, 0xffffffffu); 4629b8e80941Smrg } else if (radv_layout_dcc_compressed(image, src_layout, src_queue_mask) && 4630b8e80941Smrg !radv_layout_dcc_compressed(image, dst_layout, dst_queue_mask)) { 4631b8e80941Smrg radv_decompress_dcc(cmd_buffer, image, range); 4632b8e80941Smrg } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && 4633b8e80941Smrg !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { 4634b8e80941Smrg radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); 4635b8e80941Smrg } 4636b8e80941Smrg } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) { 4637b8e80941Smrg if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && 4638b8e80941Smrg !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { 4639b8e80941Smrg radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); 4640b8e80941Smrg } 4641b8e80941Smrg 4642b8e80941Smrg if (radv_image_has_fmask(image)) { 4643b8e80941Smrg if (src_layout != VK_IMAGE_LAYOUT_GENERAL && 4644b8e80941Smrg dst_layout == VK_IMAGE_LAYOUT_GENERAL) { 4645b8e80941Smrg radv_expand_fmask_image_inplace(cmd_buffer, image, range); 4646b8e80941Smrg } 4647b8e80941Smrg } 4648b8e80941Smrg } 4649b8e80941Smrg} 4650b8e80941Smrg 4651b8e80941Smrgstatic void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, 4652b8e80941Smrg struct radv_image *image, 4653b8e80941Smrg VkImageLayout src_layout, 4654b8e80941Smrg VkImageLayout dst_layout, 4655b8e80941Smrg uint32_t src_family, 4656b8e80941Smrg uint32_t dst_family, 4657b8e80941Smrg const VkImageSubresourceRange *range) 4658b8e80941Smrg{ 4659b8e80941Smrg if (image->exclusive && src_family != dst_family) { 4660b8e80941Smrg /* This is an acquire or a release operation and there will be 4661b8e80941Smrg * a corresponding release/acquire. Do the transition in the 4662b8e80941Smrg * most flexible queue. */ 4663b8e80941Smrg 4664b8e80941Smrg assert(src_family == cmd_buffer->queue_family_index || 4665b8e80941Smrg dst_family == cmd_buffer->queue_family_index); 4666b8e80941Smrg 4667b8e80941Smrg if (src_family == VK_QUEUE_FAMILY_EXTERNAL) 4668b8e80941Smrg return; 4669b8e80941Smrg 4670b8e80941Smrg if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER) 4671b8e80941Smrg return; 4672b8e80941Smrg 4673b8e80941Smrg if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && 4674b8e80941Smrg (src_family == RADV_QUEUE_GENERAL || 4675b8e80941Smrg dst_family == RADV_QUEUE_GENERAL)) 4676b8e80941Smrg return; 4677b8e80941Smrg } 4678b8e80941Smrg 4679b8e80941Smrg if (src_layout == dst_layout) 4680b8e80941Smrg return; 4681b8e80941Smrg 4682b8e80941Smrg unsigned src_queue_mask = 4683b8e80941Smrg radv_image_queue_family_mask(image, src_family, 4684b8e80941Smrg cmd_buffer->queue_family_index); 4685b8e80941Smrg unsigned dst_queue_mask = 4686b8e80941Smrg radv_image_queue_family_mask(image, dst_family, 4687b8e80941Smrg cmd_buffer->queue_family_index); 4688b8e80941Smrg 4689b8e80941Smrg if (vk_format_is_depth(image->vk_format)) { 4690b8e80941Smrg radv_handle_depth_image_transition(cmd_buffer, image, 4691b8e80941Smrg src_layout, dst_layout, 4692b8e80941Smrg src_queue_mask, dst_queue_mask, 4693b8e80941Smrg range); 4694b8e80941Smrg } else { 4695b8e80941Smrg radv_handle_color_image_transition(cmd_buffer, image, 4696b8e80941Smrg src_layout, dst_layout, 4697b8e80941Smrg src_queue_mask, dst_queue_mask, 4698b8e80941Smrg range); 4699b8e80941Smrg } 4700b8e80941Smrg} 4701b8e80941Smrg 4702b8e80941Smrgstruct radv_barrier_info { 4703b8e80941Smrg uint32_t eventCount; 4704b8e80941Smrg const VkEvent *pEvents; 4705b8e80941Smrg VkPipelineStageFlags srcStageMask; 4706b8e80941Smrg VkPipelineStageFlags dstStageMask; 4707b8e80941Smrg}; 4708b8e80941Smrg 4709b8e80941Smrgstatic void 4710b8e80941Smrgradv_barrier(struct radv_cmd_buffer *cmd_buffer, 4711b8e80941Smrg uint32_t memoryBarrierCount, 4712b8e80941Smrg const VkMemoryBarrier *pMemoryBarriers, 4713b8e80941Smrg uint32_t bufferMemoryBarrierCount, 4714b8e80941Smrg const VkBufferMemoryBarrier *pBufferMemoryBarriers, 4715b8e80941Smrg uint32_t imageMemoryBarrierCount, 4716b8e80941Smrg const VkImageMemoryBarrier *pImageMemoryBarriers, 4717b8e80941Smrg const struct radv_barrier_info *info) 4718b8e80941Smrg{ 4719b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 4720b8e80941Smrg enum radv_cmd_flush_bits src_flush_bits = 0; 4721b8e80941Smrg enum radv_cmd_flush_bits dst_flush_bits = 0; 4722b8e80941Smrg 4723b8e80941Smrg for (unsigned i = 0; i < info->eventCount; ++i) { 4724b8e80941Smrg RADV_FROM_HANDLE(radv_event, event, info->pEvents[i]); 4725b8e80941Smrg uint64_t va = radv_buffer_get_va(event->bo); 4726b8e80941Smrg 4727b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); 4728b8e80941Smrg 4729b8e80941Smrg MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7); 4730b8e80941Smrg 4731b8e80941Smrg radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff); 4732b8e80941Smrg assert(cmd_buffer->cs->cdw <= cdw_max); 4733b8e80941Smrg } 4734b8e80941Smrg 4735b8e80941Smrg for (uint32_t i = 0; i < memoryBarrierCount; i++) { 4736b8e80941Smrg src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask, 4737b8e80941Smrg NULL); 4738b8e80941Smrg dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask, 4739b8e80941Smrg NULL); 4740b8e80941Smrg } 4741b8e80941Smrg 4742b8e80941Smrg for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { 4743b8e80941Smrg src_flush_bits |= radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask, 4744b8e80941Smrg NULL); 4745b8e80941Smrg dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask, 4746b8e80941Smrg NULL); 4747b8e80941Smrg } 4748b8e80941Smrg 4749b8e80941Smrg for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { 4750b8e80941Smrg RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); 4751b8e80941Smrg 4752b8e80941Smrg src_flush_bits |= radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask, 4753b8e80941Smrg image); 4754b8e80941Smrg dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask, 4755b8e80941Smrg image); 4756b8e80941Smrg } 4757b8e80941Smrg 4758b8e80941Smrg /* The Vulkan spec 1.1.98 says: 4759b8e80941Smrg * 4760b8e80941Smrg * "An execution dependency with only 4761b8e80941Smrg * VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask 4762b8e80941Smrg * will only prevent that stage from executing in subsequently 4763b8e80941Smrg * submitted commands. As this stage does not perform any actual 4764b8e80941Smrg * execution, this is not observable - in effect, it does not delay 4765b8e80941Smrg * processing of subsequent commands. Similarly an execution dependency 4766b8e80941Smrg * with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask 4767b8e80941Smrg * will effectively not wait for any prior commands to complete." 4768b8e80941Smrg */ 4769b8e80941Smrg if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT) 4770b8e80941Smrg radv_stage_flush(cmd_buffer, info->srcStageMask); 4771b8e80941Smrg cmd_buffer->state.flush_bits |= src_flush_bits; 4772b8e80941Smrg 4773b8e80941Smrg for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { 4774b8e80941Smrg RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); 4775b8e80941Smrg radv_handle_image_transition(cmd_buffer, image, 4776b8e80941Smrg pImageMemoryBarriers[i].oldLayout, 4777b8e80941Smrg pImageMemoryBarriers[i].newLayout, 4778b8e80941Smrg pImageMemoryBarriers[i].srcQueueFamilyIndex, 4779b8e80941Smrg pImageMemoryBarriers[i].dstQueueFamilyIndex, 4780b8e80941Smrg &pImageMemoryBarriers[i].subresourceRange); 4781b8e80941Smrg } 4782b8e80941Smrg 4783b8e80941Smrg /* Make sure CP DMA is idle because the driver might have performed a 4784b8e80941Smrg * DMA operation for copying or filling buffers/images. 4785b8e80941Smrg */ 4786b8e80941Smrg if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | 4787b8e80941Smrg VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)) 4788b8e80941Smrg si_cp_dma_wait_for_idle(cmd_buffer); 4789b8e80941Smrg 4790b8e80941Smrg cmd_buffer->state.flush_bits |= dst_flush_bits; 4791b8e80941Smrg} 4792b8e80941Smrg 4793b8e80941Smrgvoid radv_CmdPipelineBarrier( 4794b8e80941Smrg VkCommandBuffer commandBuffer, 4795b8e80941Smrg VkPipelineStageFlags srcStageMask, 4796b8e80941Smrg VkPipelineStageFlags destStageMask, 4797b8e80941Smrg VkBool32 byRegion, 4798b8e80941Smrg uint32_t memoryBarrierCount, 4799b8e80941Smrg const VkMemoryBarrier* pMemoryBarriers, 4800b8e80941Smrg uint32_t bufferMemoryBarrierCount, 4801b8e80941Smrg const VkBufferMemoryBarrier* pBufferMemoryBarriers, 4802b8e80941Smrg uint32_t imageMemoryBarrierCount, 4803b8e80941Smrg const VkImageMemoryBarrier* pImageMemoryBarriers) 4804b8e80941Smrg{ 4805b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4806b8e80941Smrg struct radv_barrier_info info; 4807b8e80941Smrg 4808b8e80941Smrg info.eventCount = 0; 4809b8e80941Smrg info.pEvents = NULL; 4810b8e80941Smrg info.srcStageMask = srcStageMask; 4811b8e80941Smrg info.dstStageMask = destStageMask; 4812b8e80941Smrg 4813b8e80941Smrg radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, 4814b8e80941Smrg bufferMemoryBarrierCount, pBufferMemoryBarriers, 4815b8e80941Smrg imageMemoryBarrierCount, pImageMemoryBarriers, &info); 4816b8e80941Smrg} 4817b8e80941Smrg 4818b8e80941Smrg 4819b8e80941Smrgstatic void write_event(struct radv_cmd_buffer *cmd_buffer, 4820b8e80941Smrg struct radv_event *event, 4821b8e80941Smrg VkPipelineStageFlags stageMask, 4822b8e80941Smrg unsigned value) 4823b8e80941Smrg{ 4824b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 4825b8e80941Smrg uint64_t va = radv_buffer_get_va(event->bo); 4826b8e80941Smrg 4827b8e80941Smrg si_emit_cache_flush(cmd_buffer); 4828b8e80941Smrg 4829b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); 4830b8e80941Smrg 4831b8e80941Smrg MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 21); 4832b8e80941Smrg 4833b8e80941Smrg /* Flags that only require a top-of-pipe event. */ 4834b8e80941Smrg VkPipelineStageFlags top_of_pipe_flags = 4835b8e80941Smrg VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; 4836b8e80941Smrg 4837b8e80941Smrg /* Flags that only require a post-index-fetch event. */ 4838b8e80941Smrg VkPipelineStageFlags post_index_fetch_flags = 4839b8e80941Smrg top_of_pipe_flags | 4840b8e80941Smrg VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | 4841b8e80941Smrg VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; 4842b8e80941Smrg 4843b8e80941Smrg /* Make sure CP DMA is idle because the driver might have performed a 4844b8e80941Smrg * DMA operation for copying or filling buffers/images. 4845b8e80941Smrg */ 4846b8e80941Smrg if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | 4847b8e80941Smrg VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)) 4848b8e80941Smrg si_cp_dma_wait_for_idle(cmd_buffer); 4849b8e80941Smrg 4850b8e80941Smrg /* TODO: Emit EOS events for syncing PS/CS stages. */ 4851b8e80941Smrg 4852b8e80941Smrg if (!(stageMask & ~top_of_pipe_flags)) { 4853b8e80941Smrg /* Just need to sync the PFP engine. */ 4854b8e80941Smrg radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); 4855b8e80941Smrg radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | 4856b8e80941Smrg S_370_WR_CONFIRM(1) | 4857b8e80941Smrg S_370_ENGINE_SEL(V_370_PFP)); 4858b8e80941Smrg radeon_emit(cs, va); 4859b8e80941Smrg radeon_emit(cs, va >> 32); 4860b8e80941Smrg radeon_emit(cs, value); 4861b8e80941Smrg } else if (!(stageMask & ~post_index_fetch_flags)) { 4862b8e80941Smrg /* Sync ME because PFP reads index and indirect buffers. */ 4863b8e80941Smrg radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); 4864b8e80941Smrg radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | 4865b8e80941Smrg S_370_WR_CONFIRM(1) | 4866b8e80941Smrg S_370_ENGINE_SEL(V_370_ME)); 4867b8e80941Smrg radeon_emit(cs, va); 4868b8e80941Smrg radeon_emit(cs, va >> 32); 4869b8e80941Smrg radeon_emit(cs, value); 4870b8e80941Smrg } else { 4871b8e80941Smrg /* Otherwise, sync all prior GPU work using an EOP event. */ 4872b8e80941Smrg si_cs_emit_write_event_eop(cs, 4873b8e80941Smrg cmd_buffer->device->physical_device->rad_info.chip_class, 4874b8e80941Smrg radv_cmd_buffer_uses_mec(cmd_buffer), 4875b8e80941Smrg V_028A90_BOTTOM_OF_PIPE_TS, 0, 4876b8e80941Smrg EOP_DATA_SEL_VALUE_32BIT, va, value, 4877b8e80941Smrg cmd_buffer->gfx9_eop_bug_va); 4878b8e80941Smrg } 4879b8e80941Smrg 4880b8e80941Smrg assert(cmd_buffer->cs->cdw <= cdw_max); 4881b8e80941Smrg} 4882b8e80941Smrg 4883b8e80941Smrgvoid radv_CmdSetEvent(VkCommandBuffer commandBuffer, 4884b8e80941Smrg VkEvent _event, 4885b8e80941Smrg VkPipelineStageFlags stageMask) 4886b8e80941Smrg{ 4887b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4888b8e80941Smrg RADV_FROM_HANDLE(radv_event, event, _event); 4889b8e80941Smrg 4890b8e80941Smrg write_event(cmd_buffer, event, stageMask, 1); 4891b8e80941Smrg} 4892b8e80941Smrg 4893b8e80941Smrgvoid radv_CmdResetEvent(VkCommandBuffer commandBuffer, 4894b8e80941Smrg VkEvent _event, 4895b8e80941Smrg VkPipelineStageFlags stageMask) 4896b8e80941Smrg{ 4897b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4898b8e80941Smrg RADV_FROM_HANDLE(radv_event, event, _event); 4899b8e80941Smrg 4900b8e80941Smrg write_event(cmd_buffer, event, stageMask, 0); 4901b8e80941Smrg} 4902b8e80941Smrg 4903b8e80941Smrgvoid radv_CmdWaitEvents(VkCommandBuffer commandBuffer, 4904b8e80941Smrg uint32_t eventCount, 4905b8e80941Smrg const VkEvent* pEvents, 4906b8e80941Smrg VkPipelineStageFlags srcStageMask, 4907b8e80941Smrg VkPipelineStageFlags dstStageMask, 4908b8e80941Smrg uint32_t memoryBarrierCount, 4909b8e80941Smrg const VkMemoryBarrier* pMemoryBarriers, 4910b8e80941Smrg uint32_t bufferMemoryBarrierCount, 4911b8e80941Smrg const VkBufferMemoryBarrier* pBufferMemoryBarriers, 4912b8e80941Smrg uint32_t imageMemoryBarrierCount, 4913b8e80941Smrg const VkImageMemoryBarrier* pImageMemoryBarriers) 4914b8e80941Smrg{ 4915b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4916b8e80941Smrg struct radv_barrier_info info; 4917b8e80941Smrg 4918b8e80941Smrg info.eventCount = eventCount; 4919b8e80941Smrg info.pEvents = pEvents; 4920b8e80941Smrg info.srcStageMask = 0; 4921b8e80941Smrg 4922b8e80941Smrg radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, 4923b8e80941Smrg bufferMemoryBarrierCount, pBufferMemoryBarriers, 4924b8e80941Smrg imageMemoryBarrierCount, pImageMemoryBarriers, &info); 4925b8e80941Smrg} 4926b8e80941Smrg 4927b8e80941Smrg 4928b8e80941Smrgvoid radv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, 4929b8e80941Smrg uint32_t deviceMask) 4930b8e80941Smrg{ 4931b8e80941Smrg /* No-op */ 4932b8e80941Smrg} 4933b8e80941Smrg 4934b8e80941Smrg/* VK_EXT_conditional_rendering */ 4935b8e80941Smrgvoid radv_CmdBeginConditionalRenderingEXT( 4936b8e80941Smrg VkCommandBuffer commandBuffer, 4937b8e80941Smrg const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin) 4938b8e80941Smrg{ 4939b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4940b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer); 4941b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 4942b8e80941Smrg bool draw_visible = true; 4943b8e80941Smrg uint64_t pred_value = 0; 4944b8e80941Smrg uint64_t va, new_va; 4945b8e80941Smrg unsigned pred_offset; 4946b8e80941Smrg 4947b8e80941Smrg va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset; 4948b8e80941Smrg 4949b8e80941Smrg /* By default, if the 32-bit value at offset in buffer memory is zero, 4950b8e80941Smrg * then the rendering commands are discarded, otherwise they are 4951b8e80941Smrg * executed as normal. If the inverted flag is set, all commands are 4952b8e80941Smrg * discarded if the value is non zero. 4953b8e80941Smrg */ 4954b8e80941Smrg if (pConditionalRenderingBegin->flags & 4955b8e80941Smrg VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) { 4956b8e80941Smrg draw_visible = false; 4957b8e80941Smrg } 4958b8e80941Smrg 4959b8e80941Smrg si_emit_cache_flush(cmd_buffer); 4960b8e80941Smrg 4961b8e80941Smrg /* From the Vulkan spec 1.1.107: 4962b8e80941Smrg * 4963b8e80941Smrg * "If the 32-bit value at offset in buffer memory is zero, then the 4964b8e80941Smrg * rendering commands are discarded, otherwise they are executed as 4965b8e80941Smrg * normal. If the value of the predicate in buffer memory changes while 4966b8e80941Smrg * conditional rendering is active, the rendering commands may be 4967b8e80941Smrg * discarded in an implementation-dependent way. Some implementations 4968b8e80941Smrg * may latch the value of the predicate upon beginning conditional 4969b8e80941Smrg * rendering while others may read it before every rendering command." 4970b8e80941Smrg * 4971b8e80941Smrg * But, the AMD hardware treats the predicate as a 64-bit value which 4972b8e80941Smrg * means we need a workaround in the driver. Luckily, it's not required 4973b8e80941Smrg * to support if the value changes when predication is active. 4974b8e80941Smrg * 4975b8e80941Smrg * The workaround is as follows: 4976b8e80941Smrg * 1) allocate a 64-value in the upload BO and initialize it to 0 4977b8e80941Smrg * 2) copy the 32-bit predicate value to the upload BO 4978b8e80941Smrg * 3) use the new allocated VA address for predication 4979b8e80941Smrg * 4980b8e80941Smrg * Based on the conditionalrender demo, it's faster to do the COPY_DATA 4981b8e80941Smrg * in ME (+ sync PFP) instead of PFP. 4982b8e80941Smrg */ 4983b8e80941Smrg radv_cmd_buffer_upload_data(cmd_buffer, 8, 16, &pred_value, &pred_offset); 4984b8e80941Smrg 4985b8e80941Smrg new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; 4986b8e80941Smrg 4987b8e80941Smrg radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 4988b8e80941Smrg radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | 4989b8e80941Smrg COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 4990b8e80941Smrg COPY_DATA_WR_CONFIRM); 4991b8e80941Smrg radeon_emit(cs, va); 4992b8e80941Smrg radeon_emit(cs, va >> 32); 4993b8e80941Smrg radeon_emit(cs, new_va); 4994b8e80941Smrg radeon_emit(cs, new_va >> 32); 4995b8e80941Smrg 4996b8e80941Smrg radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 4997b8e80941Smrg radeon_emit(cs, 0); 4998b8e80941Smrg 4999b8e80941Smrg /* Enable predication for this command buffer. */ 5000b8e80941Smrg si_emit_set_predication_state(cmd_buffer, draw_visible, new_va); 5001b8e80941Smrg cmd_buffer->state.predicating = true; 5002b8e80941Smrg 5003b8e80941Smrg /* Store conditional rendering user info. */ 5004b8e80941Smrg cmd_buffer->state.predication_type = draw_visible; 5005b8e80941Smrg cmd_buffer->state.predication_va = new_va; 5006b8e80941Smrg} 5007b8e80941Smrg 5008b8e80941Smrgvoid radv_CmdEndConditionalRenderingEXT( 5009b8e80941Smrg VkCommandBuffer commandBuffer) 5010b8e80941Smrg{ 5011b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5012b8e80941Smrg 5013b8e80941Smrg /* Disable predication for this command buffer. */ 5014b8e80941Smrg si_emit_set_predication_state(cmd_buffer, false, 0); 5015b8e80941Smrg cmd_buffer->state.predicating = false; 5016b8e80941Smrg 5017b8e80941Smrg /* Reset conditional rendering user info. */ 5018b8e80941Smrg cmd_buffer->state.predication_type = -1; 5019b8e80941Smrg cmd_buffer->state.predication_va = 0; 5020b8e80941Smrg} 5021b8e80941Smrg 5022b8e80941Smrg/* VK_EXT_transform_feedback */ 5023b8e80941Smrgvoid radv_CmdBindTransformFeedbackBuffersEXT( 5024b8e80941Smrg VkCommandBuffer commandBuffer, 5025b8e80941Smrg uint32_t firstBinding, 5026b8e80941Smrg uint32_t bindingCount, 5027b8e80941Smrg const VkBuffer* pBuffers, 5028b8e80941Smrg const VkDeviceSize* pOffsets, 5029b8e80941Smrg const VkDeviceSize* pSizes) 5030b8e80941Smrg{ 5031b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5032b8e80941Smrg struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; 5033b8e80941Smrg uint8_t enabled_mask = 0; 5034b8e80941Smrg 5035b8e80941Smrg assert(firstBinding + bindingCount <= MAX_SO_BUFFERS); 5036b8e80941Smrg for (uint32_t i = 0; i < bindingCount; i++) { 5037b8e80941Smrg uint32_t idx = firstBinding + i; 5038b8e80941Smrg 5039b8e80941Smrg sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]); 5040b8e80941Smrg sb[idx].offset = pOffsets[i]; 5041b8e80941Smrg sb[idx].size = pSizes[i]; 5042b8e80941Smrg 5043b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 5044b8e80941Smrg sb[idx].buffer->bo); 5045b8e80941Smrg 5046b8e80941Smrg enabled_mask |= 1 << idx; 5047b8e80941Smrg } 5048b8e80941Smrg 5049b8e80941Smrg cmd_buffer->state.streamout.enabled_mask |= enabled_mask; 5050b8e80941Smrg 5051b8e80941Smrg cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; 5052b8e80941Smrg} 5053b8e80941Smrg 5054b8e80941Smrgstatic void 5055b8e80941Smrgradv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer) 5056b8e80941Smrg{ 5057b8e80941Smrg struct radv_streamout_state *so = &cmd_buffer->state.streamout; 5058b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 5059b8e80941Smrg 5060b8e80941Smrg radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2); 5061b8e80941Smrg radeon_emit(cs, 5062b8e80941Smrg S_028B94_STREAMOUT_0_EN(so->streamout_enabled) | 5063b8e80941Smrg S_028B94_RAST_STREAM(0) | 5064b8e80941Smrg S_028B94_STREAMOUT_1_EN(so->streamout_enabled) | 5065b8e80941Smrg S_028B94_STREAMOUT_2_EN(so->streamout_enabled) | 5066b8e80941Smrg S_028B94_STREAMOUT_3_EN(so->streamout_enabled)); 5067b8e80941Smrg radeon_emit(cs, so->hw_enabled_mask & 5068b8e80941Smrg so->enabled_stream_buffers_mask); 5069b8e80941Smrg 5070b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = true; 5071b8e80941Smrg} 5072b8e80941Smrg 5073b8e80941Smrgstatic void 5074b8e80941Smrgradv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) 5075b8e80941Smrg{ 5076b8e80941Smrg struct radv_streamout_state *so = &cmd_buffer->state.streamout; 5077b8e80941Smrg bool old_streamout_enabled = so->streamout_enabled; 5078b8e80941Smrg uint32_t old_hw_enabled_mask = so->hw_enabled_mask; 5079b8e80941Smrg 5080b8e80941Smrg so->streamout_enabled = enable; 5081b8e80941Smrg 5082b8e80941Smrg so->hw_enabled_mask = so->enabled_mask | 5083b8e80941Smrg (so->enabled_mask << 4) | 5084b8e80941Smrg (so->enabled_mask << 8) | 5085b8e80941Smrg (so->enabled_mask << 12); 5086b8e80941Smrg 5087b8e80941Smrg if ((old_streamout_enabled != so->streamout_enabled) || 5088b8e80941Smrg (old_hw_enabled_mask != so->hw_enabled_mask)) 5089b8e80941Smrg radv_emit_streamout_enable(cmd_buffer); 5090b8e80941Smrg} 5091b8e80941Smrg 5092b8e80941Smrgstatic void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) 5093b8e80941Smrg{ 5094b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 5095b8e80941Smrg unsigned reg_strmout_cntl; 5096b8e80941Smrg 5097b8e80941Smrg /* The register is at different places on different ASICs. */ 5098b8e80941Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { 5099b8e80941Smrg reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; 5100b8e80941Smrg radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); 5101b8e80941Smrg } else { 5102b8e80941Smrg reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; 5103b8e80941Smrg radeon_set_config_reg(cs, reg_strmout_cntl, 0); 5104b8e80941Smrg } 5105b8e80941Smrg 5106b8e80941Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 5107b8e80941Smrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); 5108b8e80941Smrg 5109b8e80941Smrg radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 5110b8e80941Smrg radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ 5111b8e80941Smrg radeon_emit(cs, reg_strmout_cntl >> 2); /* register */ 5112b8e80941Smrg radeon_emit(cs, 0); 5113b8e80941Smrg radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ 5114b8e80941Smrg radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ 5115b8e80941Smrg radeon_emit(cs, 4); /* poll interval */ 5116b8e80941Smrg} 5117b8e80941Smrg 5118b8e80941Smrgvoid radv_CmdBeginTransformFeedbackEXT( 5119b8e80941Smrg VkCommandBuffer commandBuffer, 5120b8e80941Smrg uint32_t firstCounterBuffer, 5121b8e80941Smrg uint32_t counterBufferCount, 5122b8e80941Smrg const VkBuffer* pCounterBuffers, 5123b8e80941Smrg const VkDeviceSize* pCounterBufferOffsets) 5124b8e80941Smrg{ 5125b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5126b8e80941Smrg struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; 5127b8e80941Smrg struct radv_streamout_state *so = &cmd_buffer->state.streamout; 5128b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 5129b8e80941Smrg uint32_t i; 5130b8e80941Smrg 5131b8e80941Smrg radv_flush_vgt_streamout(cmd_buffer); 5132b8e80941Smrg 5133b8e80941Smrg assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); 5134b8e80941Smrg for_each_bit(i, so->enabled_mask) { 5135b8e80941Smrg int32_t counter_buffer_idx = i - firstCounterBuffer; 5136b8e80941Smrg if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) 5137b8e80941Smrg counter_buffer_idx = -1; 5138b8e80941Smrg 5139b8e80941Smrg /* SI binds streamout buffers as shader resources. 5140b8e80941Smrg * VGT only counts primitives and tells the shader through 5141b8e80941Smrg * SGPRs what to do. 5142b8e80941Smrg */ 5143b8e80941Smrg radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); 5144b8e80941Smrg radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ 5145b8e80941Smrg radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */ 5146b8e80941Smrg 5147b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = true; 5148b8e80941Smrg 5149b8e80941Smrg if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { 5150b8e80941Smrg /* The array of counter buffers is optional. */ 5151b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); 5152b8e80941Smrg uint64_t va = radv_buffer_get_va(buffer->bo); 5153b8e80941Smrg 5154b8e80941Smrg va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx]; 5155b8e80941Smrg 5156b8e80941Smrg /* Append */ 5157b8e80941Smrg radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); 5158b8e80941Smrg radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | 5159b8e80941Smrg STRMOUT_DATA_TYPE(1) | /* offset in bytes */ 5160b8e80941Smrg STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ 5161b8e80941Smrg radeon_emit(cs, 0); /* unused */ 5162b8e80941Smrg radeon_emit(cs, 0); /* unused */ 5163b8e80941Smrg radeon_emit(cs, va); /* src address lo */ 5164b8e80941Smrg radeon_emit(cs, va >> 32); /* src address hi */ 5165b8e80941Smrg 5166b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); 5167b8e80941Smrg } else { 5168b8e80941Smrg /* Start from the beginning. */ 5169b8e80941Smrg radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); 5170b8e80941Smrg radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | 5171b8e80941Smrg STRMOUT_DATA_TYPE(1) | /* offset in bytes */ 5172b8e80941Smrg STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ 5173b8e80941Smrg radeon_emit(cs, 0); /* unused */ 5174b8e80941Smrg radeon_emit(cs, 0); /* unused */ 5175b8e80941Smrg radeon_emit(cs, 0); /* unused */ 5176b8e80941Smrg radeon_emit(cs, 0); /* unused */ 5177b8e80941Smrg } 5178b8e80941Smrg } 5179b8e80941Smrg 5180b8e80941Smrg radv_set_streamout_enable(cmd_buffer, true); 5181b8e80941Smrg} 5182b8e80941Smrg 5183b8e80941Smrgvoid radv_CmdEndTransformFeedbackEXT( 5184b8e80941Smrg VkCommandBuffer commandBuffer, 5185b8e80941Smrg uint32_t firstCounterBuffer, 5186b8e80941Smrg uint32_t counterBufferCount, 5187b8e80941Smrg const VkBuffer* pCounterBuffers, 5188b8e80941Smrg const VkDeviceSize* pCounterBufferOffsets) 5189b8e80941Smrg{ 5190b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5191b8e80941Smrg struct radv_streamout_state *so = &cmd_buffer->state.streamout; 5192b8e80941Smrg struct radeon_cmdbuf *cs = cmd_buffer->cs; 5193b8e80941Smrg uint32_t i; 5194b8e80941Smrg 5195b8e80941Smrg radv_flush_vgt_streamout(cmd_buffer); 5196b8e80941Smrg 5197b8e80941Smrg assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); 5198b8e80941Smrg for_each_bit(i, so->enabled_mask) { 5199b8e80941Smrg int32_t counter_buffer_idx = i - firstCounterBuffer; 5200b8e80941Smrg if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) 5201b8e80941Smrg counter_buffer_idx = -1; 5202b8e80941Smrg 5203b8e80941Smrg if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { 5204b8e80941Smrg /* The array of counters buffer is optional. */ 5205b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); 5206b8e80941Smrg uint64_t va = radv_buffer_get_va(buffer->bo); 5207b8e80941Smrg 5208b8e80941Smrg va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx]; 5209b8e80941Smrg 5210b8e80941Smrg radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); 5211b8e80941Smrg radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | 5212b8e80941Smrg STRMOUT_DATA_TYPE(1) | /* offset in bytes */ 5213b8e80941Smrg STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | 5214b8e80941Smrg STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ 5215b8e80941Smrg radeon_emit(cs, va); /* dst address lo */ 5216b8e80941Smrg radeon_emit(cs, va >> 32); /* dst address hi */ 5217b8e80941Smrg radeon_emit(cs, 0); /* unused */ 5218b8e80941Smrg radeon_emit(cs, 0); /* unused */ 5219b8e80941Smrg 5220b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); 5221b8e80941Smrg } 5222b8e80941Smrg 5223b8e80941Smrg /* Deactivate transform feedback by zeroing the buffer size. 5224b8e80941Smrg * The counters (primitives generated, primitives emitted) may 5225b8e80941Smrg * be enabled even if there is not buffer bound. This ensures 5226b8e80941Smrg * that the primitives-emitted query won't increment. 5227b8e80941Smrg */ 5228b8e80941Smrg radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0); 5229b8e80941Smrg 5230b8e80941Smrg cmd_buffer->state.context_roll_without_scissor_emitted = true; 5231b8e80941Smrg } 5232b8e80941Smrg 5233b8e80941Smrg radv_set_streamout_enable(cmd_buffer, false); 5234b8e80941Smrg} 5235b8e80941Smrg 5236b8e80941Smrgvoid radv_CmdDrawIndirectByteCountEXT( 5237b8e80941Smrg VkCommandBuffer commandBuffer, 5238b8e80941Smrg uint32_t instanceCount, 5239b8e80941Smrg uint32_t firstInstance, 5240b8e80941Smrg VkBuffer _counterBuffer, 5241b8e80941Smrg VkDeviceSize counterBufferOffset, 5242b8e80941Smrg uint32_t counterOffset, 5243b8e80941Smrg uint32_t vertexStride) 5244b8e80941Smrg{ 5245b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5246b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer); 5247b8e80941Smrg struct radv_draw_info info = {}; 5248b8e80941Smrg 5249b8e80941Smrg info.instance_count = instanceCount; 5250b8e80941Smrg info.first_instance = firstInstance; 5251b8e80941Smrg info.strmout_buffer = counterBuffer; 5252b8e80941Smrg info.strmout_buffer_offset = counterBufferOffset; 5253b8e80941Smrg info.stride = vertexStride; 5254b8e80941Smrg 5255b8e80941Smrg radv_draw(cmd_buffer, &info); 5256b8e80941Smrg} 5257