1/* 2 * Copyright © 2019 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <string.h> 25#include <stdlib.h> 26#include <assert.h> 27 28#include <vulkan/vulkan.h> 29#include <vulkan/vk_layer.h> 30 31#include "imgui.h" 32 33#include "overlay_params.h" 34 35#include "util/debug.h" 36#include "util/hash_table.h" 37#include "util/list.h" 38#include "util/ralloc.h" 39#include "util/os_time.h" 40#include "util/simple_mtx.h" 41 42#include "vk_enum_to_str.h" 43#include "vk_util.h" 44 45/* Mapped from VkInstace/VkPhysicalDevice */ 46struct instance_data { 47 struct vk_instance_dispatch_table vtable; 48 VkInstance instance; 49 50 struct overlay_params params; 51 bool pipeline_statistics_enabled; 52 53 bool first_line_printed; 54}; 55 56struct frame_stat { 57 uint64_t stats[OVERLAY_PARAM_ENABLED_MAX]; 58}; 59 60/* Mapped from VkDevice */ 61struct queue_data; 62struct device_data { 63 struct instance_data *instance; 64 65 PFN_vkSetDeviceLoaderData set_device_loader_data; 66 67 struct vk_device_dispatch_table vtable; 68 VkPhysicalDevice physical_device; 69 VkDevice device; 70 71 VkPhysicalDeviceProperties properties; 72 73 struct queue_data *graphic_queue; 74 75 struct queue_data **queues; 76 uint32_t n_queues; 77 78 /* For a single frame */ 79 struct frame_stat frame_stats; 80}; 81 82/* Mapped from VkCommandBuffer */ 83struct command_buffer_data { 84 struct device_data *device; 85 86 VkCommandBufferLevel level; 87 88 VkCommandBuffer cmd_buffer; 89 VkQueryPool pipeline_query_pool; 90 VkQueryPool timestamp_query_pool; 91 uint32_t query_index; 92 93 struct frame_stat stats; 94 95 struct list_head link; /* link into queue_data::running_command_buffer */ 96}; 97 98/* Mapped from VkQueue */ 99struct queue_data { 100 struct device_data *device; 101 102 VkQueue queue; 103 VkQueueFlags flags; 104 uint32_t family_index; 105 uint64_t timestamp_mask; 106 107 VkFence queries_fence; 108 109 struct list_head running_command_buffer; 110}; 111 112struct overlay_draw { 113 struct list_head link; 114 115 VkCommandBuffer command_buffer; 116 117 VkSemaphore semaphore; 118 VkFence fence; 119 120 VkBuffer vertex_buffer; 121 VkDeviceMemory vertex_buffer_mem; 122 VkDeviceSize vertex_buffer_size; 123 124 VkBuffer index_buffer; 125 VkDeviceMemory index_buffer_mem; 126 VkDeviceSize index_buffer_size; 127}; 128 129/* Mapped from VkSwapchainKHR */ 130struct swapchain_data { 131 struct device_data *device; 132 133 VkSwapchainKHR swapchain; 134 unsigned width, height; 135 VkFormat format; 136 137 uint32_t n_images; 138 VkImage *images; 139 VkImageView *image_views; 140 VkFramebuffer *framebuffers; 141 142 VkRenderPass render_pass; 143 144 VkDescriptorPool descriptor_pool; 145 VkDescriptorSetLayout descriptor_layout; 146 VkDescriptorSet descriptor_set; 147 148 VkSampler font_sampler; 149 150 VkPipelineLayout pipeline_layout; 151 VkPipeline pipeline; 152 153 VkCommandPool command_pool; 154 155 struct list_head draws; /* List of struct overlay_draw */ 156 157 bool font_uploaded; 158 VkImage font_image; 159 VkImageView font_image_view; 160 VkDeviceMemory font_mem; 161 VkBuffer upload_font_buffer; 162 VkDeviceMemory upload_font_buffer_mem; 163 164 /**/ 165 ImGuiContext* imgui_context; 166 ImVec2 window_size; 167 168 /**/ 169 uint64_t n_frames; 170 uint64_t last_present_time; 171 172 unsigned n_frames_since_update; 173 uint64_t last_fps_update; 174 double fps; 175 176 enum overlay_param_enabled stat_selector; 177 double time_dividor; 178 struct frame_stat stats_min, stats_max; 179 struct frame_stat frames_stats[200]; 180 181 /* Over a single frame */ 182 struct frame_stat frame_stats; 183 184 /* Over fps_sampling_period */ 185 struct frame_stat accumulated_stats; 186}; 187 188static const VkQueryPipelineStatisticFlags overlay_query_flags = 189 VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | 190 VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | 191 VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | 192 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | 193 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | 194 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | 195 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | 196 VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | 197 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | 198 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | 199 VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT; 200#define OVERLAY_QUERY_COUNT (11) 201 202static struct hash_table_u64 *vk_object_to_data = NULL; 203static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP; 204 205thread_local ImGuiContext* __MesaImGui; 206 207static inline void ensure_vk_object_map(void) 208{ 209 if (!vk_object_to_data) 210 vk_object_to_data = _mesa_hash_table_u64_create(NULL); 211} 212 213#define HKEY(obj) ((uint64_t)(obj)) 214#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data(HKEY(obj))) 215#define FIND_CMD_BUFFER_DATA(obj) ((struct command_buffer_data *)find_object_data(HKEY(obj))) 216#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data(HKEY(obj))) 217#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data(HKEY(obj))) 218#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data(HKEY(obj))) 219#define FIND_INSTANCE_DATA(obj) ((struct instance_data *)find_object_data(HKEY(obj))) 220static void *find_object_data(uint64_t obj) 221{ 222 simple_mtx_lock(&vk_object_to_data_mutex); 223 ensure_vk_object_map(); 224 void *data = _mesa_hash_table_u64_search(vk_object_to_data, obj); 225 simple_mtx_unlock(&vk_object_to_data_mutex); 226 return data; 227} 228 229static void map_object(uint64_t obj, void *data) 230{ 231 simple_mtx_lock(&vk_object_to_data_mutex); 232 ensure_vk_object_map(); 233 _mesa_hash_table_u64_insert(vk_object_to_data, obj, data); 234 simple_mtx_unlock(&vk_object_to_data_mutex); 235} 236 237static void unmap_object(uint64_t obj) 238{ 239 simple_mtx_lock(&vk_object_to_data_mutex); 240 _mesa_hash_table_u64_remove(vk_object_to_data, obj); 241 simple_mtx_unlock(&vk_object_to_data_mutex); 242} 243 244/**/ 245 246#define VK_CHECK(expr) \ 247 do { \ 248 VkResult __result = (expr); \ 249 if (__result != VK_SUCCESS) { \ 250 fprintf(stderr, "'%s' line %i failed with %s\n", \ 251 #expr, __LINE__, vk_Result_to_str(__result)); \ 252 } \ 253 } while (0) 254 255/**/ 256 257static VkLayerInstanceCreateInfo *get_instance_chain_info(const VkInstanceCreateInfo *pCreateInfo, 258 VkLayerFunction func) 259{ 260 vk_foreach_struct(item, pCreateInfo->pNext) { 261 if (item->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && 262 ((VkLayerInstanceCreateInfo *) item)->function == func) 263 return (VkLayerInstanceCreateInfo *) item; 264 } 265 unreachable("instance chain info not found"); 266 return NULL; 267} 268 269static VkLayerDeviceCreateInfo *get_device_chain_info(const VkDeviceCreateInfo *pCreateInfo, 270 VkLayerFunction func) 271{ 272 vk_foreach_struct(item, pCreateInfo->pNext) { 273 if (item->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO && 274 ((VkLayerDeviceCreateInfo *) item)->function == func) 275 return (VkLayerDeviceCreateInfo *)item; 276 } 277 unreachable("device chain info not found"); 278 return NULL; 279} 280 281static struct VkBaseOutStructure * 282clone_chain(const struct VkBaseInStructure *chain) 283{ 284 struct VkBaseOutStructure *head = NULL, *tail = NULL; 285 286 vk_foreach_struct_const(item, chain) { 287 size_t item_size = vk_structure_type_size(item); 288 struct VkBaseOutStructure *new_item = 289 (struct VkBaseOutStructure *)malloc(item_size);; 290 291 memcpy(new_item, item, item_size); 292 293 if (!head) 294 head = new_item; 295 if (tail) 296 tail->pNext = new_item; 297 tail = new_item; 298 } 299 300 return head; 301} 302 303static void 304free_chain(struct VkBaseOutStructure *chain) 305{ 306 while (chain) { 307 void *node = chain; 308 chain = chain->pNext; 309 free(node); 310 } 311} 312 313/**/ 314 315static void check_vk_result(VkResult err) 316{ 317 if (err != VK_SUCCESS) 318 printf("ERROR!\n"); 319} 320 321static struct instance_data *new_instance_data(VkInstance instance) 322{ 323 struct instance_data *data = rzalloc(NULL, struct instance_data); 324 data->instance = instance; 325 map_object(HKEY(data->instance), data); 326 return data; 327} 328 329static void destroy_instance_data(struct instance_data *data) 330{ 331 if (data->params.output_file) 332 fclose(data->params.output_file); 333 unmap_object(HKEY(data->instance)); 334 ralloc_free(data); 335} 336 337static void instance_data_map_physical_devices(struct instance_data *instance_data, 338 bool map) 339{ 340 uint32_t physicalDeviceCount = 0; 341 instance_data->vtable.EnumeratePhysicalDevices(instance_data->instance, 342 &physicalDeviceCount, 343 NULL); 344 345 VkPhysicalDevice *physicalDevices = (VkPhysicalDevice *) malloc(sizeof(VkPhysicalDevice) * physicalDeviceCount); 346 instance_data->vtable.EnumeratePhysicalDevices(instance_data->instance, 347 &physicalDeviceCount, 348 physicalDevices); 349 350 for (uint32_t i = 0; i < physicalDeviceCount; i++) { 351 if (map) 352 map_object(HKEY(physicalDevices[i]), instance_data); 353 else 354 unmap_object(HKEY(physicalDevices[i])); 355 } 356 357 free(physicalDevices); 358} 359 360/**/ 361static struct device_data *new_device_data(VkDevice device, struct instance_data *instance) 362{ 363 struct device_data *data = rzalloc(NULL, struct device_data); 364 data->instance = instance; 365 data->device = device; 366 map_object(HKEY(data->device), data); 367 return data; 368} 369 370static struct queue_data *new_queue_data(VkQueue queue, 371 const VkQueueFamilyProperties *family_props, 372 uint32_t family_index, 373 struct device_data *device_data) 374{ 375 struct queue_data *data = rzalloc(device_data, struct queue_data); 376 data->device = device_data; 377 data->queue = queue; 378 data->flags = family_props->queueFlags; 379 data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1; 380 data->family_index = family_index; 381 LIST_INITHEAD(&data->running_command_buffer); 382 map_object(HKEY(data->queue), data); 383 384 /* Fence synchronizing access to queries on that queue. */ 385 VkFenceCreateInfo fence_info = {}; 386 fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; 387 fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; 388 VkResult err = device_data->vtable.CreateFence(device_data->device, 389 &fence_info, 390 NULL, 391 &data->queries_fence); 392 check_vk_result(err); 393 394 if (data->flags & VK_QUEUE_GRAPHICS_BIT) 395 device_data->graphic_queue = data; 396 397 return data; 398} 399 400static void destroy_queue(struct queue_data *data) 401{ 402 struct device_data *device_data = data->device; 403 device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL); 404 unmap_object(HKEY(data->queue)); 405 ralloc_free(data); 406} 407 408static void device_map_queues(struct device_data *data, 409 const VkDeviceCreateInfo *pCreateInfo) 410{ 411 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) 412 data->n_queues += pCreateInfo->pQueueCreateInfos[i].queueCount; 413 data->queues = ralloc_array(data, struct queue_data *, data->n_queues); 414 415 struct instance_data *instance_data = data->instance; 416 uint32_t n_family_props; 417 instance_data->vtable.GetPhysicalDeviceQueueFamilyProperties(data->physical_device, 418 &n_family_props, 419 NULL); 420 VkQueueFamilyProperties *family_props = 421 (VkQueueFamilyProperties *)malloc(sizeof(VkQueueFamilyProperties) * n_family_props); 422 instance_data->vtable.GetPhysicalDeviceQueueFamilyProperties(data->physical_device, 423 &n_family_props, 424 family_props); 425 426 uint32_t queue_index = 0; 427 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { 428 for (uint32_t j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) { 429 VkQueue queue; 430 data->vtable.GetDeviceQueue(data->device, 431 pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, 432 j, &queue); 433 434 VK_CHECK(data->set_device_loader_data(data->device, queue)); 435 436 data->queues[queue_index++] = 437 new_queue_data(queue, &family_props[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex], 438 pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, data); 439 } 440 } 441 442 free(family_props); 443} 444 445static void device_unmap_queues(struct device_data *data) 446{ 447 for (uint32_t i = 0; i < data->n_queues; i++) 448 destroy_queue(data->queues[i]); 449} 450 451static void destroy_device_data(struct device_data *data) 452{ 453 unmap_object(HKEY(data->device)); 454 ralloc_free(data); 455} 456 457/**/ 458static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer, 459 VkCommandBufferLevel level, 460 VkQueryPool pipeline_query_pool, 461 VkQueryPool timestamp_query_pool, 462 uint32_t query_index, 463 struct device_data *device_data) 464{ 465 struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data); 466 data->device = device_data; 467 data->cmd_buffer = cmd_buffer; 468 data->level = level; 469 data->pipeline_query_pool = pipeline_query_pool; 470 data->timestamp_query_pool = timestamp_query_pool; 471 data->query_index = query_index; 472 list_inithead(&data->link); 473 map_object(HKEY(data->cmd_buffer), data); 474 return data; 475} 476 477static void destroy_command_buffer_data(struct command_buffer_data *data) 478{ 479 unmap_object(HKEY(data->cmd_buffer)); 480 list_delinit(&data->link); 481 ralloc_free(data); 482} 483 484/**/ 485static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain, 486 struct device_data *device_data) 487{ 488 struct instance_data *instance_data = device_data->instance; 489 struct swapchain_data *data = rzalloc(NULL, struct swapchain_data); 490 data->device = device_data; 491 data->swapchain = swapchain; 492 data->window_size = ImVec2(instance_data->params.width, instance_data->params.height); 493 list_inithead(&data->draws); 494 map_object(HKEY(data->swapchain), data); 495 return data; 496} 497 498static void destroy_swapchain_data(struct swapchain_data *data) 499{ 500 unmap_object(HKEY(data->swapchain)); 501 ralloc_free(data); 502} 503 504struct overlay_draw *get_overlay_draw(struct swapchain_data *data) 505{ 506 struct device_data *device_data = data->device; 507 struct overlay_draw *draw = list_empty(&data->draws) ? 508 NULL : list_first_entry(&data->draws, struct overlay_draw, link); 509 510 VkSemaphoreCreateInfo sem_info = {}; 511 sem_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; 512 513 if (draw && device_data->vtable.GetFenceStatus(device_data->device, draw->fence) == VK_SUCCESS) { 514 list_del(&draw->link); 515 VK_CHECK(device_data->vtable.ResetFences(device_data->device, 516 1, &draw->fence)); 517 list_addtail(&draw->link, &data->draws); 518 return draw; 519 } 520 521 draw = rzalloc(data, struct overlay_draw); 522 523 VkCommandBufferAllocateInfo cmd_buffer_info = {}; 524 cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; 525 cmd_buffer_info.commandPool = data->command_pool; 526 cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; 527 cmd_buffer_info.commandBufferCount = 1; 528 VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device, 529 &cmd_buffer_info, 530 &draw->command_buffer)); 531 VK_CHECK(device_data->set_device_loader_data(device_data->device, 532 draw->command_buffer)); 533 534 535 VkFenceCreateInfo fence_info = {}; 536 fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; 537 VK_CHECK(device_data->vtable.CreateFence(device_data->device, 538 &fence_info, 539 NULL, 540 &draw->fence)); 541 542 VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info, 543 NULL, &draw->semaphore)); 544 545 list_addtail(&draw->link, &data->draws); 546 547 return draw; 548} 549 550static const char *param_unit(enum overlay_param_enabled param) 551{ 552 switch (param) { 553 case OVERLAY_PARAM_ENABLED_frame_timing: 554 case OVERLAY_PARAM_ENABLED_acquire_timing: 555 return "(us)"; 556 case OVERLAY_PARAM_ENABLED_gpu_timing: 557 return "(ns)"; 558 default: 559 return ""; 560 } 561} 562 563static void snapshot_swapchain_frame(struct swapchain_data *data) 564{ 565 struct device_data *device_data = data->device; 566 struct instance_data *instance_data = device_data->instance; 567 uint32_t f_idx = data->n_frames % ARRAY_SIZE(data->frames_stats); 568 uint64_t now = os_time_get(); /* us */ 569 570 if (data->last_present_time) { 571 data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame_timing] = 572 now - data->last_present_time; 573 } 574 575 memset(&data->frames_stats[f_idx], 0, sizeof(data->frames_stats[f_idx])); 576 for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 577 data->frames_stats[f_idx].stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; 578 data->accumulated_stats.stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; 579 } 580 581 if (data->last_fps_update) { 582 double elapsed = (double)(now - data->last_fps_update); /* us */ 583 if (elapsed >= instance_data->params.fps_sampling_period) { 584 data->fps = 1000000.0f * data->n_frames_since_update / elapsed; 585 if (instance_data->params.output_file) { 586 if (!instance_data->first_line_printed) { 587 bool first_column = true; 588 589 instance_data->first_line_printed = true; 590 591#define OVERLAY_PARAM_BOOL(name) \ 592 if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_##name]) { \ 593 fprintf(instance_data->params.output_file, \ 594 "%s%s%s", first_column ? "" : ", ", #name, \ 595 param_unit(OVERLAY_PARAM_ENABLED_##name)); \ 596 first_column = false; \ 597 } 598#define OVERLAY_PARAM_CUSTOM(name) 599 OVERLAY_PARAMS 600#undef OVERLAY_PARAM_BOOL 601#undef OVERLAY_PARAM_CUSTOM 602 fprintf(instance_data->params.output_file, "\n"); 603 } 604 605 for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 606 if (!instance_data->params.enabled[s]) 607 continue; 608 if (s == OVERLAY_PARAM_ENABLED_fps) { 609 fprintf(instance_data->params.output_file, 610 "%s%.2f", s == 0 ? "" : ", ", data->fps); 611 } else { 612 fprintf(instance_data->params.output_file, 613 "%s%" PRIu64, s == 0 ? "" : ", ", 614 data->accumulated_stats.stats[s]); 615 } 616 } 617 fprintf(instance_data->params.output_file, "\n"); 618 fflush(instance_data->params.output_file); 619 } 620 621 memset(&data->accumulated_stats, 0, sizeof(data->accumulated_stats)); 622 data->n_frames_since_update = 0; 623 data->last_fps_update = now; 624 } 625 } else { 626 data->last_fps_update = now; 627 } 628 629 memset(&device_data->frame_stats, 0, sizeof(device_data->frame_stats)); 630 memset(&data->frame_stats, 0, sizeof(device_data->frame_stats)); 631 632 data->last_present_time = now; 633 data->n_frames++; 634 data->n_frames_since_update++; 635} 636 637static float get_time_stat(void *_data, int _idx) 638{ 639 struct swapchain_data *data = (struct swapchain_data *) _data; 640 if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames) 641 return 0.0f; 642 int idx = ARRAY_SIZE(data->frames_stats) + 643 data->n_frames < ARRAY_SIZE(data->frames_stats) ? 644 _idx - data->n_frames : 645 _idx + data->n_frames; 646 idx %= ARRAY_SIZE(data->frames_stats); 647 /* Time stats are in us. */ 648 return data->frames_stats[idx].stats[data->stat_selector] / data->time_dividor; 649} 650 651static float get_stat(void *_data, int _idx) 652{ 653 struct swapchain_data *data = (struct swapchain_data *) _data; 654 if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames) 655 return 0.0f; 656 int idx = ARRAY_SIZE(data->frames_stats) + 657 data->n_frames < ARRAY_SIZE(data->frames_stats) ? 658 _idx - data->n_frames : 659 _idx + data->n_frames; 660 idx %= ARRAY_SIZE(data->frames_stats); 661 return data->frames_stats[idx].stats[data->stat_selector]; 662} 663 664static void position_layer(struct swapchain_data *data) 665 666{ 667 struct device_data *device_data = data->device; 668 struct instance_data *instance_data = device_data->instance; 669 const float margin = 10.0f; 670 671 ImGui::SetNextWindowBgAlpha(0.5); 672 ImGui::SetNextWindowSize(data->window_size, ImGuiCond_Always); 673 switch (instance_data->params.position) { 674 case LAYER_POSITION_TOP_LEFT: 675 ImGui::SetNextWindowPos(ImVec2(margin, margin), ImGuiCond_Always); 676 break; 677 case LAYER_POSITION_TOP_RIGHT: 678 ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, margin), 679 ImGuiCond_Always); 680 break; 681 case LAYER_POSITION_BOTTOM_LEFT: 682 ImGui::SetNextWindowPos(ImVec2(margin, data->height - data->window_size.y - margin), 683 ImGuiCond_Always); 684 break; 685 case LAYER_POSITION_BOTTOM_RIGHT: 686 ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, 687 data->height - data->window_size.y - margin), 688 ImGuiCond_Always); 689 break; 690 } 691} 692 693static void compute_swapchain_display(struct swapchain_data *data) 694{ 695 struct device_data *device_data = data->device; 696 struct instance_data *instance_data = device_data->instance; 697 698 ImGui::SetCurrentContext(data->imgui_context); 699 ImGui::NewFrame(); 700 position_layer(data); 701 ImGui::Begin("Mesa overlay"); 702 ImGui::Text("Device: %s", device_data->properties.deviceName); 703 704 const char *format_name = vk_Format_to_str(data->format); 705 format_name = format_name ? (format_name + strlen("VK_FORMAT_")) : "unknown"; 706 ImGui::Text("Swapchain format: %s", format_name); 707 ImGui::Text("Frames: %" PRIu64, data->n_frames); 708 if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_fps]) 709 ImGui::Text("FPS: %.2f" , data->fps); 710 711 /* Recompute min/max */ 712 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 713 data->stats_min.stats[s] = UINT64_MAX; 714 data->stats_max.stats[s] = 0; 715 } 716 for (uint32_t f = 0; f < MIN2(data->n_frames, ARRAY_SIZE(data->frames_stats)); f++) { 717 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 718 data->stats_min.stats[s] = MIN2(data->frames_stats[f].stats[s], 719 data->stats_min.stats[s]); 720 data->stats_max.stats[s] = MAX2(data->frames_stats[f].stats[s], 721 data->stats_max.stats[s]); 722 } 723 } 724 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 725 assert(data->stats_min.stats[s] != UINT64_MAX); 726 } 727 728 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 729 if (!instance_data->params.enabled[s] || 730 s == OVERLAY_PARAM_ENABLED_fps || 731 s == OVERLAY_PARAM_ENABLED_frame) 732 continue; 733 734 char hash[40]; 735 snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]); 736 data->stat_selector = (enum overlay_param_enabled) s; 737 data->time_dividor = 1000.0f; 738 if (s == OVERLAY_PARAM_ENABLED_gpu_timing) 739 data->time_dividor = 1000000.0f; 740 741 if (s == OVERLAY_PARAM_ENABLED_frame_timing || 742 s == OVERLAY_PARAM_ENABLED_acquire_timing || 743 s == OVERLAY_PARAM_ENABLED_gpu_timing) { 744 double min_time = data->stats_min.stats[s] / data->time_dividor; 745 double max_time = data->stats_max.stats[s] / data->time_dividor; 746 ImGui::PlotHistogram(hash, get_time_stat, data, 747 ARRAY_SIZE(data->frames_stats), 0, 748 NULL, min_time, max_time, 749 ImVec2(ImGui::GetContentRegionAvailWidth(), 30)); 750 ImGui::Text("%s: %.3fms [%.3f, %.3f]", overlay_param_names[s], 751 get_time_stat(data, ARRAY_SIZE(data->frames_stats) - 1), 752 min_time, max_time); 753 } else { 754 ImGui::PlotHistogram(hash, get_stat, data, 755 ARRAY_SIZE(data->frames_stats), 0, 756 NULL, 757 data->stats_min.stats[s], 758 data->stats_max.stats[s], 759 ImVec2(ImGui::GetContentRegionAvailWidth(), 30)); 760 ImGui::Text("%s: %.0f [%" PRIu64 ", %" PRIu64 "]", overlay_param_names[s], 761 get_stat(data, ARRAY_SIZE(data->frames_stats) - 1), 762 data->stats_min.stats[s], data->stats_max.stats[s]); 763 } 764 } 765 data->window_size = ImVec2(data->window_size.x, ImGui::GetCursorPosY() + 10.0f); 766 ImGui::End(); 767 ImGui::EndFrame(); 768 ImGui::Render(); 769} 770 771static uint32_t vk_memory_type(struct device_data *data, 772 VkMemoryPropertyFlags properties, 773 uint32_t type_bits) 774{ 775 VkPhysicalDeviceMemoryProperties prop; 776 data->instance->vtable.GetPhysicalDeviceMemoryProperties(data->physical_device, &prop); 777 for (uint32_t i = 0; i < prop.memoryTypeCount; i++) 778 if ((prop.memoryTypes[i].propertyFlags & properties) == properties && type_bits & (1<<i)) 779 return i; 780 return 0xFFFFFFFF; // Unable to find memoryType 781} 782 783static void ensure_swapchain_fonts(struct swapchain_data *data, 784 VkCommandBuffer command_buffer) 785{ 786 if (data->font_uploaded) 787 return; 788 789 data->font_uploaded = true; 790 791 struct device_data *device_data = data->device; 792 ImGuiIO& io = ImGui::GetIO(); 793 unsigned char* pixels; 794 int width, height; 795 io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); 796 size_t upload_size = width * height * 4 * sizeof(char); 797 798 /* Upload buffer */ 799 VkBufferCreateInfo buffer_info = {}; 800 buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 801 buffer_info.size = upload_size; 802 buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; 803 buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 804 VK_CHECK(device_data->vtable.CreateBuffer(device_data->device, &buffer_info, 805 NULL, &data->upload_font_buffer)); 806 VkMemoryRequirements upload_buffer_req; 807 device_data->vtable.GetBufferMemoryRequirements(device_data->device, 808 data->upload_font_buffer, 809 &upload_buffer_req); 810 VkMemoryAllocateInfo upload_alloc_info = {}; 811 upload_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 812 upload_alloc_info.allocationSize = upload_buffer_req.size; 813 upload_alloc_info.memoryTypeIndex = vk_memory_type(device_data, 814 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 815 upload_buffer_req.memoryTypeBits); 816 VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, 817 &upload_alloc_info, 818 NULL, 819 &data->upload_font_buffer_mem)); 820 VK_CHECK(device_data->vtable.BindBufferMemory(device_data->device, 821 data->upload_font_buffer, 822 data->upload_font_buffer_mem, 0)); 823 824 /* Upload to Buffer */ 825 char* map = NULL; 826 VK_CHECK(device_data->vtable.MapMemory(device_data->device, 827 data->upload_font_buffer_mem, 828 0, upload_size, 0, (void**)(&map))); 829 memcpy(map, pixels, upload_size); 830 VkMappedMemoryRange range[1] = {}; 831 range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; 832 range[0].memory = data->upload_font_buffer_mem; 833 range[0].size = upload_size; 834 VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 1, range)); 835 device_data->vtable.UnmapMemory(device_data->device, 836 data->upload_font_buffer_mem); 837 838 /* Copy buffer to image */ 839 VkImageMemoryBarrier copy_barrier[1] = {}; 840 copy_barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 841 copy_barrier[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 842 copy_barrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; 843 copy_barrier[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; 844 copy_barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 845 copy_barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 846 copy_barrier[0].image = data->font_image; 847 copy_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 848 copy_barrier[0].subresourceRange.levelCount = 1; 849 copy_barrier[0].subresourceRange.layerCount = 1; 850 device_data->vtable.CmdPipelineBarrier(command_buffer, 851 VK_PIPELINE_STAGE_HOST_BIT, 852 VK_PIPELINE_STAGE_TRANSFER_BIT, 853 0, 0, NULL, 0, NULL, 854 1, copy_barrier); 855 856 VkBufferImageCopy region = {}; 857 region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 858 region.imageSubresource.layerCount = 1; 859 region.imageExtent.width = width; 860 region.imageExtent.height = height; 861 region.imageExtent.depth = 1; 862 device_data->vtable.CmdCopyBufferToImage(command_buffer, 863 data->upload_font_buffer, 864 data->font_image, 865 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 866 1, ®ion); 867 868 VkImageMemoryBarrier use_barrier[1] = {}; 869 use_barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 870 use_barrier[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 871 use_barrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; 872 use_barrier[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; 873 use_barrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; 874 use_barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 875 use_barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 876 use_barrier[0].image = data->font_image; 877 use_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 878 use_barrier[0].subresourceRange.levelCount = 1; 879 use_barrier[0].subresourceRange.layerCount = 1; 880 device_data->vtable.CmdPipelineBarrier(command_buffer, 881 VK_PIPELINE_STAGE_TRANSFER_BIT, 882 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 883 0, 884 0, NULL, 885 0, NULL, 886 1, use_barrier); 887 888 /* Store our identifier */ 889 io.Fonts->TexID = (ImTextureID)(intptr_t)data->font_image; 890} 891 892static void CreateOrResizeBuffer(struct device_data *data, 893 VkBuffer *buffer, 894 VkDeviceMemory *buffer_memory, 895 VkDeviceSize *buffer_size, 896 size_t new_size, VkBufferUsageFlagBits usage) 897{ 898 if (*buffer != VK_NULL_HANDLE) 899 data->vtable.DestroyBuffer(data->device, *buffer, NULL); 900 if (*buffer_memory) 901 data->vtable.FreeMemory(data->device, *buffer_memory, NULL); 902 903 VkBufferCreateInfo buffer_info = {}; 904 buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 905 buffer_info.size = new_size; 906 buffer_info.usage = usage; 907 buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 908 VK_CHECK(data->vtable.CreateBuffer(data->device, &buffer_info, NULL, buffer)); 909 910 VkMemoryRequirements req; 911 data->vtable.GetBufferMemoryRequirements(data->device, *buffer, &req); 912 VkMemoryAllocateInfo alloc_info = {}; 913 alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 914 alloc_info.allocationSize = req.size; 915 alloc_info.memoryTypeIndex = 916 vk_memory_type(data, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, req.memoryTypeBits); 917 VK_CHECK(data->vtable.AllocateMemory(data->device, &alloc_info, NULL, buffer_memory)); 918 919 VK_CHECK(data->vtable.BindBufferMemory(data->device, *buffer, *buffer_memory, 0)); 920 *buffer_size = new_size; 921} 922 923static struct overlay_draw *render_swapchain_display(struct swapchain_data *data, 924 struct queue_data *present_queue, 925 const VkSemaphore *wait_semaphores, 926 unsigned n_wait_semaphores, 927 unsigned image_index) 928{ 929 ImDrawData* draw_data = ImGui::GetDrawData(); 930 if (draw_data->TotalVtxCount == 0) 931 return NULL; 932 933 struct device_data *device_data = data->device; 934 struct overlay_draw *draw = get_overlay_draw(data); 935 936 device_data->vtable.ResetCommandBuffer(draw->command_buffer, 0); 937 938 VkRenderPassBeginInfo render_pass_info = {}; 939 render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; 940 render_pass_info.renderPass = data->render_pass; 941 render_pass_info.framebuffer = data->framebuffers[image_index]; 942 render_pass_info.renderArea.extent.width = data->width; 943 render_pass_info.renderArea.extent.height = data->height; 944 945 VkCommandBufferBeginInfo buffer_begin_info = {}; 946 buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 947 948 device_data->vtable.BeginCommandBuffer(draw->command_buffer, &buffer_begin_info); 949 950 ensure_swapchain_fonts(data, draw->command_buffer); 951 952 /* Bounce the image to display back to color attachment layout for 953 * rendering on top of it. 954 */ 955 VkImageMemoryBarrier imb; 956 imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 957 imb.pNext = nullptr; 958 imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 959 imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 960 imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; 961 imb.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 962 imb.image = data->images[image_index]; 963 imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 964 imb.subresourceRange.baseMipLevel = 0; 965 imb.subresourceRange.levelCount = 1; 966 imb.subresourceRange.baseArrayLayer = 0; 967 imb.subresourceRange.layerCount = 1; 968 imb.srcQueueFamilyIndex = present_queue->family_index; 969 imb.dstQueueFamilyIndex = device_data->graphic_queue->family_index; 970 device_data->vtable.CmdPipelineBarrier(draw->command_buffer, 971 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 972 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 973 0, /* dependency flags */ 974 0, nullptr, /* memory barriers */ 975 0, nullptr, /* buffer memory barriers */ 976 1, &imb); /* image memory barriers */ 977 978 device_data->vtable.CmdBeginRenderPass(draw->command_buffer, &render_pass_info, 979 VK_SUBPASS_CONTENTS_INLINE); 980 981 /* Create/Resize vertex & index buffers */ 982 size_t vertex_size = draw_data->TotalVtxCount * sizeof(ImDrawVert); 983 size_t index_size = draw_data->TotalIdxCount * sizeof(ImDrawIdx); 984 if (draw->vertex_buffer_size < vertex_size) { 985 CreateOrResizeBuffer(device_data, 986 &draw->vertex_buffer, 987 &draw->vertex_buffer_mem, 988 &draw->vertex_buffer_size, 989 vertex_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); 990 } 991 if (draw->index_buffer_size < index_size) { 992 CreateOrResizeBuffer(device_data, 993 &draw->index_buffer, 994 &draw->index_buffer_mem, 995 &draw->index_buffer_size, 996 index_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT); 997 } 998 999 /* Upload vertex & index data */ 1000 ImDrawVert* vtx_dst = NULL; 1001 ImDrawIdx* idx_dst = NULL; 1002 VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->vertex_buffer_mem, 1003 0, vertex_size, 0, (void**)(&vtx_dst))); 1004 VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->index_buffer_mem, 1005 0, index_size, 0, (void**)(&idx_dst))); 1006 for (int n = 0; n < draw_data->CmdListsCount; n++) 1007 { 1008 const ImDrawList* cmd_list = draw_data->CmdLists[n]; 1009 memcpy(vtx_dst, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert)); 1010 memcpy(idx_dst, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx)); 1011 vtx_dst += cmd_list->VtxBuffer.Size; 1012 idx_dst += cmd_list->IdxBuffer.Size; 1013 } 1014 VkMappedMemoryRange range[2] = {}; 1015 range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; 1016 range[0].memory = draw->vertex_buffer_mem; 1017 range[0].size = VK_WHOLE_SIZE; 1018 range[1].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; 1019 range[1].memory = draw->index_buffer_mem; 1020 range[1].size = VK_WHOLE_SIZE; 1021 VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range)); 1022 device_data->vtable.UnmapMemory(device_data->device, draw->vertex_buffer_mem); 1023 device_data->vtable.UnmapMemory(device_data->device, draw->index_buffer_mem); 1024 1025 /* Bind pipeline and descriptor sets */ 1026 device_data->vtable.CmdBindPipeline(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline); 1027 VkDescriptorSet desc_set[1] = { data->descriptor_set }; 1028 device_data->vtable.CmdBindDescriptorSets(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, 1029 data->pipeline_layout, 0, 1, desc_set, 0, NULL); 1030 1031 /* Bind vertex & index buffers */ 1032 VkBuffer vertex_buffers[1] = { draw->vertex_buffer }; 1033 VkDeviceSize vertex_offset[1] = { 0 }; 1034 device_data->vtable.CmdBindVertexBuffers(draw->command_buffer, 0, 1, vertex_buffers, vertex_offset); 1035 device_data->vtable.CmdBindIndexBuffer(draw->command_buffer, draw->index_buffer, 0, VK_INDEX_TYPE_UINT16); 1036 1037 /* Setup viewport */ 1038 VkViewport viewport; 1039 viewport.x = 0; 1040 viewport.y = 0; 1041 viewport.width = draw_data->DisplaySize.x; 1042 viewport.height = draw_data->DisplaySize.y; 1043 viewport.minDepth = 0.0f; 1044 viewport.maxDepth = 1.0f; 1045 device_data->vtable.CmdSetViewport(draw->command_buffer, 0, 1, &viewport); 1046 1047 1048 /* Setup scale and translation through push constants : 1049 * 1050 * Our visible imgui space lies from draw_data->DisplayPos (top left) to 1051 * draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayMin 1052 * is typically (0,0) for single viewport apps. 1053 */ 1054 float scale[2]; 1055 scale[0] = 2.0f / draw_data->DisplaySize.x; 1056 scale[1] = 2.0f / draw_data->DisplaySize.y; 1057 float translate[2]; 1058 translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0]; 1059 translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1]; 1060 device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, 1061 VK_SHADER_STAGE_VERTEX_BIT, 1062 sizeof(float) * 0, sizeof(float) * 2, scale); 1063 device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, 1064 VK_SHADER_STAGE_VERTEX_BIT, 1065 sizeof(float) * 2, sizeof(float) * 2, translate); 1066 1067 // Render the command lists: 1068 int vtx_offset = 0; 1069 int idx_offset = 0; 1070 ImVec2 display_pos = draw_data->DisplayPos; 1071 for (int n = 0; n < draw_data->CmdListsCount; n++) 1072 { 1073 const ImDrawList* cmd_list = draw_data->CmdLists[n]; 1074 for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) 1075 { 1076 const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; 1077 // Apply scissor/clipping rectangle 1078 // FIXME: We could clamp width/height based on clamped min/max values. 1079 VkRect2D scissor; 1080 scissor.offset.x = (int32_t)(pcmd->ClipRect.x - display_pos.x) > 0 ? (int32_t)(pcmd->ClipRect.x - display_pos.x) : 0; 1081 scissor.offset.y = (int32_t)(pcmd->ClipRect.y - display_pos.y) > 0 ? (int32_t)(pcmd->ClipRect.y - display_pos.y) : 0; 1082 scissor.extent.width = (uint32_t)(pcmd->ClipRect.z - pcmd->ClipRect.x); 1083 scissor.extent.height = (uint32_t)(pcmd->ClipRect.w - pcmd->ClipRect.y + 1); // FIXME: Why +1 here? 1084 device_data->vtable.CmdSetScissor(draw->command_buffer, 0, 1, &scissor); 1085 1086 // Draw 1087 device_data->vtable.CmdDrawIndexed(draw->command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0); 1088 1089 idx_offset += pcmd->ElemCount; 1090 } 1091 vtx_offset += cmd_list->VtxBuffer.Size; 1092 } 1093 1094 device_data->vtable.CmdEndRenderPass(draw->command_buffer); 1095 1096 /* Bounce the image to display back to present layout. */ 1097 imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 1098 imb.pNext = nullptr; 1099 imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 1100 imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 1101 imb.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 1102 imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; 1103 imb.image = data->images[image_index]; 1104 imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1105 imb.subresourceRange.baseMipLevel = 0; 1106 imb.subresourceRange.levelCount = 1; 1107 imb.subresourceRange.baseArrayLayer = 0; 1108 imb.subresourceRange.layerCount = 1; 1109 imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index; 1110 imb.dstQueueFamilyIndex = present_queue->family_index; 1111 device_data->vtable.CmdPipelineBarrier(draw->command_buffer, 1112 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 1113 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 1114 0, /* dependency flags */ 1115 0, nullptr, /* memory barriers */ 1116 0, nullptr, /* buffer memory barriers */ 1117 1, &imb); /* image memory barriers */ 1118 1119 device_data->vtable.EndCommandBuffer(draw->command_buffer); 1120 1121 VkSubmitInfo submit_info = {}; 1122 VkPipelineStageFlags stage_wait = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; 1123 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 1124 submit_info.commandBufferCount = 1; 1125 submit_info.pCommandBuffers = &draw->command_buffer; 1126 submit_info.pWaitDstStageMask = &stage_wait; 1127 submit_info.waitSemaphoreCount = n_wait_semaphores; 1128 submit_info.pWaitSemaphores = wait_semaphores; 1129 submit_info.signalSemaphoreCount = 1; 1130 submit_info.pSignalSemaphores = &draw->semaphore; 1131 1132 device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence); 1133 1134 return draw; 1135} 1136 1137static const uint32_t overlay_vert_spv[] = { 1138#include "overlay.vert.spv.h" 1139}; 1140static const uint32_t overlay_frag_spv[] = { 1141#include "overlay.frag.spv.h" 1142}; 1143 1144static void setup_swapchain_data_pipeline(struct swapchain_data *data) 1145{ 1146 struct device_data *device_data = data->device; 1147 VkShaderModule vert_module, frag_module; 1148 1149 /* Create shader modules */ 1150 VkShaderModuleCreateInfo vert_info = {}; 1151 vert_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 1152 vert_info.codeSize = sizeof(overlay_vert_spv); 1153 vert_info.pCode = overlay_vert_spv; 1154 VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device, 1155 &vert_info, NULL, &vert_module)); 1156 VkShaderModuleCreateInfo frag_info = {}; 1157 frag_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 1158 frag_info.codeSize = sizeof(overlay_frag_spv); 1159 frag_info.pCode = (uint32_t*)overlay_frag_spv; 1160 VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device, 1161 &frag_info, NULL, &frag_module)); 1162 1163 /* Font sampler */ 1164 VkSamplerCreateInfo sampler_info = {}; 1165 sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; 1166 sampler_info.magFilter = VK_FILTER_LINEAR; 1167 sampler_info.minFilter = VK_FILTER_LINEAR; 1168 sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; 1169 sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; 1170 sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; 1171 sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; 1172 sampler_info.minLod = -1000; 1173 sampler_info.maxLod = 1000; 1174 sampler_info.maxAnisotropy = 1.0f; 1175 VK_CHECK(device_data->vtable.CreateSampler(device_data->device, &sampler_info, 1176 NULL, &data->font_sampler)); 1177 1178 /* Descriptor pool */ 1179 VkDescriptorPoolSize sampler_pool_size = {}; 1180 sampler_pool_size.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 1181 sampler_pool_size.descriptorCount = 1; 1182 VkDescriptorPoolCreateInfo desc_pool_info = {}; 1183 desc_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; 1184 desc_pool_info.maxSets = 1; 1185 desc_pool_info.poolSizeCount = 1; 1186 desc_pool_info.pPoolSizes = &sampler_pool_size; 1187 VK_CHECK(device_data->vtable.CreateDescriptorPool(device_data->device, 1188 &desc_pool_info, 1189 NULL, &data->descriptor_pool)); 1190 1191 /* Descriptor layout */ 1192 VkSampler sampler[1] = { data->font_sampler }; 1193 VkDescriptorSetLayoutBinding binding[1] = {}; 1194 binding[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 1195 binding[0].descriptorCount = 1; 1196 binding[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; 1197 binding[0].pImmutableSamplers = sampler; 1198 VkDescriptorSetLayoutCreateInfo set_layout_info = {}; 1199 set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; 1200 set_layout_info.bindingCount = 1; 1201 set_layout_info.pBindings = binding; 1202 VK_CHECK(device_data->vtable.CreateDescriptorSetLayout(device_data->device, 1203 &set_layout_info, 1204 NULL, &data->descriptor_layout)); 1205 1206 /* Descriptor set */ 1207 VkDescriptorSetAllocateInfo alloc_info = {}; 1208 alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; 1209 alloc_info.descriptorPool = data->descriptor_pool; 1210 alloc_info.descriptorSetCount = 1; 1211 alloc_info.pSetLayouts = &data->descriptor_layout; 1212 VK_CHECK(device_data->vtable.AllocateDescriptorSets(device_data->device, 1213 &alloc_info, 1214 &data->descriptor_set)); 1215 1216 /* Constants: we are using 'vec2 offset' and 'vec2 scale' instead of a full 1217 * 3d projection matrix 1218 */ 1219 VkPushConstantRange push_constants[1] = {}; 1220 push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; 1221 push_constants[0].offset = sizeof(float) * 0; 1222 push_constants[0].size = sizeof(float) * 4; 1223 VkPipelineLayoutCreateInfo layout_info = {}; 1224 layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; 1225 layout_info.setLayoutCount = 1; 1226 layout_info.pSetLayouts = &data->descriptor_layout; 1227 layout_info.pushConstantRangeCount = 1; 1228 layout_info.pPushConstantRanges = push_constants; 1229 VK_CHECK(device_data->vtable.CreatePipelineLayout(device_data->device, 1230 &layout_info, 1231 NULL, &data->pipeline_layout)); 1232 1233 VkPipelineShaderStageCreateInfo stage[2] = {}; 1234 stage[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 1235 stage[0].stage = VK_SHADER_STAGE_VERTEX_BIT; 1236 stage[0].module = vert_module; 1237 stage[0].pName = "main"; 1238 stage[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 1239 stage[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; 1240 stage[1].module = frag_module; 1241 stage[1].pName = "main"; 1242 1243 VkVertexInputBindingDescription binding_desc[1] = {}; 1244 binding_desc[0].stride = sizeof(ImDrawVert); 1245 binding_desc[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX; 1246 1247 VkVertexInputAttributeDescription attribute_desc[3] = {}; 1248 attribute_desc[0].location = 0; 1249 attribute_desc[0].binding = binding_desc[0].binding; 1250 attribute_desc[0].format = VK_FORMAT_R32G32_SFLOAT; 1251 attribute_desc[0].offset = IM_OFFSETOF(ImDrawVert, pos); 1252 attribute_desc[1].location = 1; 1253 attribute_desc[1].binding = binding_desc[0].binding; 1254 attribute_desc[1].format = VK_FORMAT_R32G32_SFLOAT; 1255 attribute_desc[1].offset = IM_OFFSETOF(ImDrawVert, uv); 1256 attribute_desc[2].location = 2; 1257 attribute_desc[2].binding = binding_desc[0].binding; 1258 attribute_desc[2].format = VK_FORMAT_R8G8B8A8_UNORM; 1259 attribute_desc[2].offset = IM_OFFSETOF(ImDrawVert, col); 1260 1261 VkPipelineVertexInputStateCreateInfo vertex_info = {}; 1262 vertex_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; 1263 vertex_info.vertexBindingDescriptionCount = 1; 1264 vertex_info.pVertexBindingDescriptions = binding_desc; 1265 vertex_info.vertexAttributeDescriptionCount = 3; 1266 vertex_info.pVertexAttributeDescriptions = attribute_desc; 1267 1268 VkPipelineInputAssemblyStateCreateInfo ia_info = {}; 1269 ia_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; 1270 ia_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; 1271 1272 VkPipelineViewportStateCreateInfo viewport_info = {}; 1273 viewport_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; 1274 viewport_info.viewportCount = 1; 1275 viewport_info.scissorCount = 1; 1276 1277 VkPipelineRasterizationStateCreateInfo raster_info = {}; 1278 raster_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; 1279 raster_info.polygonMode = VK_POLYGON_MODE_FILL; 1280 raster_info.cullMode = VK_CULL_MODE_NONE; 1281 raster_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; 1282 raster_info.lineWidth = 1.0f; 1283 1284 VkPipelineMultisampleStateCreateInfo ms_info = {}; 1285 ms_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; 1286 ms_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; 1287 1288 VkPipelineColorBlendAttachmentState color_attachment[1] = {}; 1289 color_attachment[0].blendEnable = VK_TRUE; 1290 color_attachment[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; 1291 color_attachment[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; 1292 color_attachment[0].colorBlendOp = VK_BLEND_OP_ADD; 1293 color_attachment[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; 1294 color_attachment[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; 1295 color_attachment[0].alphaBlendOp = VK_BLEND_OP_ADD; 1296 color_attachment[0].colorWriteMask = VK_COLOR_COMPONENT_R_BIT | 1297 VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; 1298 1299 VkPipelineDepthStencilStateCreateInfo depth_info = {}; 1300 depth_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; 1301 1302 VkPipelineColorBlendStateCreateInfo blend_info = {}; 1303 blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; 1304 blend_info.attachmentCount = 1; 1305 blend_info.pAttachments = color_attachment; 1306 1307 VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }; 1308 VkPipelineDynamicStateCreateInfo dynamic_state = {}; 1309 dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; 1310 dynamic_state.dynamicStateCount = (uint32_t)IM_ARRAYSIZE(dynamic_states); 1311 dynamic_state.pDynamicStates = dynamic_states; 1312 1313 VkGraphicsPipelineCreateInfo info = {}; 1314 info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; 1315 info.flags = 0; 1316 info.stageCount = 2; 1317 info.pStages = stage; 1318 info.pVertexInputState = &vertex_info; 1319 info.pInputAssemblyState = &ia_info; 1320 info.pViewportState = &viewport_info; 1321 info.pRasterizationState = &raster_info; 1322 info.pMultisampleState = &ms_info; 1323 info.pDepthStencilState = &depth_info; 1324 info.pColorBlendState = &blend_info; 1325 info.pDynamicState = &dynamic_state; 1326 info.layout = data->pipeline_layout; 1327 info.renderPass = data->render_pass; 1328 VK_CHECK( 1329 device_data->vtable.CreateGraphicsPipelines(device_data->device, VK_NULL_HANDLE, 1330 1, &info, 1331 NULL, &data->pipeline)); 1332 1333 device_data->vtable.DestroyShaderModule(device_data->device, vert_module, NULL); 1334 device_data->vtable.DestroyShaderModule(device_data->device, frag_module, NULL); 1335 1336 ImGuiIO& io = ImGui::GetIO(); 1337 unsigned char* pixels; 1338 int width, height; 1339 io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); 1340 1341 /* Font image */ 1342 VkImageCreateInfo image_info = {}; 1343 image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; 1344 image_info.imageType = VK_IMAGE_TYPE_2D; 1345 image_info.format = VK_FORMAT_R8G8B8A8_UNORM; 1346 image_info.extent.width = width; 1347 image_info.extent.height = height; 1348 image_info.extent.depth = 1; 1349 image_info.mipLevels = 1; 1350 image_info.arrayLayers = 1; 1351 image_info.samples = VK_SAMPLE_COUNT_1_BIT; 1352 image_info.tiling = VK_IMAGE_TILING_OPTIMAL; 1353 image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; 1354 image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 1355 image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; 1356 VK_CHECK(device_data->vtable.CreateImage(device_data->device, &image_info, 1357 NULL, &data->font_image)); 1358 VkMemoryRequirements font_image_req; 1359 device_data->vtable.GetImageMemoryRequirements(device_data->device, 1360 data->font_image, &font_image_req); 1361 VkMemoryAllocateInfo image_alloc_info = {}; 1362 image_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 1363 image_alloc_info.allocationSize = font_image_req.size; 1364 image_alloc_info.memoryTypeIndex = vk_memory_type(device_data, 1365 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 1366 font_image_req.memoryTypeBits); 1367 VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, &image_alloc_info, 1368 NULL, &data->font_mem)); 1369 VK_CHECK(device_data->vtable.BindImageMemory(device_data->device, 1370 data->font_image, 1371 data->font_mem, 0)); 1372 1373 /* Font image view */ 1374 VkImageViewCreateInfo view_info = {}; 1375 view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; 1376 view_info.image = data->font_image; 1377 view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; 1378 view_info.format = VK_FORMAT_R8G8B8A8_UNORM; 1379 view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1380 view_info.subresourceRange.levelCount = 1; 1381 view_info.subresourceRange.layerCount = 1; 1382 VK_CHECK(device_data->vtable.CreateImageView(device_data->device, &view_info, 1383 NULL, &data->font_image_view)); 1384 1385 /* Descriptor set */ 1386 VkDescriptorImageInfo desc_image[1] = {}; 1387 desc_image[0].sampler = data->font_sampler; 1388 desc_image[0].imageView = data->font_image_view; 1389 desc_image[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; 1390 VkWriteDescriptorSet write_desc[1] = {}; 1391 write_desc[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; 1392 write_desc[0].dstSet = data->descriptor_set; 1393 write_desc[0].descriptorCount = 1; 1394 write_desc[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 1395 write_desc[0].pImageInfo = desc_image; 1396 device_data->vtable.UpdateDescriptorSets(device_data->device, 1, write_desc, 0, NULL); 1397} 1398 1399static void setup_swapchain_data(struct swapchain_data *data, 1400 const VkSwapchainCreateInfoKHR *pCreateInfo) 1401{ 1402 data->width = pCreateInfo->imageExtent.width; 1403 data->height = pCreateInfo->imageExtent.height; 1404 data->format = pCreateInfo->imageFormat; 1405 1406 data->imgui_context = ImGui::CreateContext(); 1407 ImGui::SetCurrentContext(data->imgui_context); 1408 1409 ImGui::GetIO().IniFilename = NULL; 1410 ImGui::GetIO().DisplaySize = ImVec2((float)data->width, (float)data->height); 1411 1412 struct device_data *device_data = data->device; 1413 1414 /* Render pass */ 1415 VkAttachmentDescription attachment_desc = {}; 1416 attachment_desc.format = pCreateInfo->imageFormat; 1417 attachment_desc.samples = VK_SAMPLE_COUNT_1_BIT; 1418 attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 1419 attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE; 1420 attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; 1421 attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; 1422 attachment_desc.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 1423 attachment_desc.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; 1424 VkAttachmentReference color_attachment = {}; 1425 color_attachment.attachment = 0; 1426 color_attachment.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 1427 VkSubpassDescription subpass = {}; 1428 subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; 1429 subpass.colorAttachmentCount = 1; 1430 subpass.pColorAttachments = &color_attachment; 1431 VkSubpassDependency dependency = {}; 1432 dependency.srcSubpass = VK_SUBPASS_EXTERNAL; 1433 dependency.dstSubpass = 0; 1434 dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; 1435 dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; 1436 dependency.srcAccessMask = 0; 1437 dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 1438 VkRenderPassCreateInfo render_pass_info = {}; 1439 render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; 1440 render_pass_info.attachmentCount = 1; 1441 render_pass_info.pAttachments = &attachment_desc; 1442 render_pass_info.subpassCount = 1; 1443 render_pass_info.pSubpasses = &subpass; 1444 render_pass_info.dependencyCount = 1; 1445 render_pass_info.pDependencies = &dependency; 1446 VK_CHECK(device_data->vtable.CreateRenderPass(device_data->device, 1447 &render_pass_info, 1448 NULL, &data->render_pass)); 1449 1450 setup_swapchain_data_pipeline(data); 1451 1452 VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device, 1453 data->swapchain, 1454 &data->n_images, 1455 NULL)); 1456 1457 data->images = ralloc_array(data, VkImage, data->n_images); 1458 data->image_views = ralloc_array(data, VkImageView, data->n_images); 1459 data->framebuffers = ralloc_array(data, VkFramebuffer, data->n_images); 1460 1461 VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device, 1462 data->swapchain, 1463 &data->n_images, 1464 data->images)); 1465 1466 /* Image views */ 1467 VkImageViewCreateInfo view_info = {}; 1468 view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; 1469 view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; 1470 view_info.format = pCreateInfo->imageFormat; 1471 view_info.components.r = VK_COMPONENT_SWIZZLE_R; 1472 view_info.components.g = VK_COMPONENT_SWIZZLE_G; 1473 view_info.components.b = VK_COMPONENT_SWIZZLE_B; 1474 view_info.components.a = VK_COMPONENT_SWIZZLE_A; 1475 view_info.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; 1476 for (uint32_t i = 0; i < data->n_images; i++) { 1477 view_info.image = data->images[i]; 1478 VK_CHECK(device_data->vtable.CreateImageView(device_data->device, 1479 &view_info, NULL, 1480 &data->image_views[i])); 1481 } 1482 1483 /* Framebuffers */ 1484 VkImageView attachment[1]; 1485 VkFramebufferCreateInfo fb_info = {}; 1486 fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; 1487 fb_info.renderPass = data->render_pass; 1488 fb_info.attachmentCount = 1; 1489 fb_info.pAttachments = attachment; 1490 fb_info.width = data->width; 1491 fb_info.height = data->height; 1492 fb_info.layers = 1; 1493 for (uint32_t i = 0; i < data->n_images; i++) { 1494 attachment[0] = data->image_views[i]; 1495 VK_CHECK(device_data->vtable.CreateFramebuffer(device_data->device, &fb_info, 1496 NULL, &data->framebuffers[i])); 1497 } 1498 1499 /* Command buffer pool */ 1500 VkCommandPoolCreateInfo cmd_buffer_pool_info = {}; 1501 cmd_buffer_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; 1502 cmd_buffer_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; 1503 cmd_buffer_pool_info.queueFamilyIndex = device_data->graphic_queue->family_index; 1504 VK_CHECK(device_data->vtable.CreateCommandPool(device_data->device, 1505 &cmd_buffer_pool_info, 1506 NULL, &data->command_pool)); 1507} 1508 1509static void shutdown_swapchain_data(struct swapchain_data *data) 1510{ 1511 struct device_data *device_data = data->device; 1512 1513 list_for_each_entry_safe(struct overlay_draw, draw, &data->draws, link) { 1514 device_data->vtable.DestroySemaphore(device_data->device, draw->semaphore, NULL); 1515 device_data->vtable.DestroyFence(device_data->device, draw->fence, NULL); 1516 device_data->vtable.DestroyBuffer(device_data->device, draw->vertex_buffer, NULL); 1517 device_data->vtable.DestroyBuffer(device_data->device, draw->index_buffer, NULL); 1518 device_data->vtable.FreeMemory(device_data->device, draw->vertex_buffer_mem, NULL); 1519 device_data->vtable.FreeMemory(device_data->device, draw->index_buffer_mem, NULL); 1520 } 1521 1522 for (uint32_t i = 0; i < data->n_images; i++) { 1523 device_data->vtable.DestroyImageView(device_data->device, data->image_views[i], NULL); 1524 device_data->vtable.DestroyFramebuffer(device_data->device, data->framebuffers[i], NULL); 1525 } 1526 1527 device_data->vtable.DestroyRenderPass(device_data->device, data->render_pass, NULL); 1528 1529 device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL); 1530 1531 device_data->vtable.DestroyPipeline(device_data->device, data->pipeline, NULL); 1532 device_data->vtable.DestroyPipelineLayout(device_data->device, data->pipeline_layout, NULL); 1533 1534 device_data->vtable.DestroyDescriptorPool(device_data->device, 1535 data->descriptor_pool, NULL); 1536 device_data->vtable.DestroyDescriptorSetLayout(device_data->device, 1537 data->descriptor_layout, NULL); 1538 1539 device_data->vtable.DestroySampler(device_data->device, data->font_sampler, NULL); 1540 device_data->vtable.DestroyImageView(device_data->device, data->font_image_view, NULL); 1541 device_data->vtable.DestroyImage(device_data->device, data->font_image, NULL); 1542 device_data->vtable.FreeMemory(device_data->device, data->font_mem, NULL); 1543 1544 device_data->vtable.DestroyBuffer(device_data->device, data->upload_font_buffer, NULL); 1545 device_data->vtable.FreeMemory(device_data->device, data->upload_font_buffer_mem, NULL); 1546 1547 ImGui::DestroyContext(data->imgui_context); 1548} 1549 1550static struct overlay_draw *before_present(struct swapchain_data *swapchain_data, 1551 struct queue_data *present_queue, 1552 const VkSemaphore *wait_semaphores, 1553 unsigned n_wait_semaphores, 1554 unsigned imageIndex) 1555{ 1556 struct instance_data *instance_data = swapchain_data->device->instance; 1557 struct overlay_draw *draw = NULL; 1558 1559 snapshot_swapchain_frame(swapchain_data); 1560 1561 if (!instance_data->params.no_display && swapchain_data->n_frames > 0) { 1562 compute_swapchain_display(swapchain_data); 1563 draw = render_swapchain_display(swapchain_data, present_queue, 1564 wait_semaphores, n_wait_semaphores, 1565 imageIndex); 1566 } 1567 1568 return draw; 1569} 1570 1571static VkResult overlay_CreateSwapchainKHR( 1572 VkDevice device, 1573 const VkSwapchainCreateInfoKHR* pCreateInfo, 1574 const VkAllocationCallbacks* pAllocator, 1575 VkSwapchainKHR* pSwapchain) 1576{ 1577 struct device_data *device_data = FIND_DEVICE_DATA(device); 1578 VkResult result = device_data->vtable.CreateSwapchainKHR(device, pCreateInfo, pAllocator, pSwapchain); 1579 if (result != VK_SUCCESS) return result; 1580 1581 struct swapchain_data *swapchain_data = new_swapchain_data(*pSwapchain, device_data); 1582 setup_swapchain_data(swapchain_data, pCreateInfo); 1583 return result; 1584} 1585 1586static void overlay_DestroySwapchainKHR( 1587 VkDevice device, 1588 VkSwapchainKHR swapchain, 1589 const VkAllocationCallbacks* pAllocator) 1590{ 1591 struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain); 1592 1593 shutdown_swapchain_data(swapchain_data); 1594 swapchain_data->device->vtable.DestroySwapchainKHR(device, swapchain, pAllocator); 1595 destroy_swapchain_data(swapchain_data); 1596} 1597 1598static VkResult overlay_QueuePresentKHR( 1599 VkQueue queue, 1600 const VkPresentInfoKHR* pPresentInfo) 1601{ 1602 struct queue_data *queue_data = FIND_QUEUE_DATA(queue); 1603 struct device_data *device_data = queue_data->device; 1604 struct instance_data *instance_data = device_data->instance; 1605 uint32_t query_results[OVERLAY_QUERY_COUNT]; 1606 1607 device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame]++; 1608 1609 if (list_length(&queue_data->running_command_buffer) > 0) { 1610 /* Before getting the query results, make sure the operations have 1611 * completed. 1612 */ 1613 VkResult err = device_data->vtable.ResetFences(device_data->device, 1614 1, &queue_data->queries_fence); 1615 check_vk_result(err); 1616 err = device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence); 1617 check_vk_result(err); 1618 err = device_data->vtable.WaitForFences(device_data->device, 1619 1, &queue_data->queries_fence, 1620 VK_FALSE, UINT64_MAX); 1621 check_vk_result(err); 1622 1623 /* Now get the results. */ 1624 list_for_each_entry_safe(struct command_buffer_data, cmd_buffer_data, 1625 &queue_data->running_command_buffer, link) { 1626 list_delinit(&cmd_buffer_data->link); 1627 1628 if (cmd_buffer_data->pipeline_query_pool) { 1629 memset(query_results, 0, sizeof(query_results)); 1630 err = 1631 device_data->vtable.GetQueryPoolResults(device_data->device, 1632 cmd_buffer_data->pipeline_query_pool, 1633 cmd_buffer_data->query_index, 1, 1634 sizeof(uint32_t) * OVERLAY_QUERY_COUNT, 1635 query_results, 0, VK_QUERY_RESULT_WAIT_BIT); 1636 check_vk_result(err); 1637 1638 for (uint32_t i = OVERLAY_PARAM_ENABLED_vertices; 1639 i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { 1640 device_data->frame_stats.stats[i] += query_results[i - OVERLAY_PARAM_ENABLED_vertices]; 1641 } 1642 } 1643 if (cmd_buffer_data->timestamp_query_pool) { 1644 uint64_t gpu_timestamps[2] = { 0 }; 1645 err = 1646 device_data->vtable.GetQueryPoolResults(device_data->device, 1647 cmd_buffer_data->timestamp_query_pool, 1648 cmd_buffer_data->query_index * 2, 2, 1649 2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t), 1650 VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT); 1651 check_vk_result(err); 1652 1653 gpu_timestamps[0] &= queue_data->timestamp_mask; 1654 gpu_timestamps[1] &= queue_data->timestamp_mask; 1655 device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] += 1656 (gpu_timestamps[1] - gpu_timestamps[0]) * 1657 device_data->properties.limits.timestampPeriod; 1658 } 1659 } 1660 } 1661 1662 /* Otherwise we need to add our overlay drawing semaphore to the list of 1663 * semaphores to wait on. If we don't do that the presented picture might 1664 * be have incomplete overlay drawings. 1665 */ 1666 VkResult result = VK_SUCCESS; 1667 if (instance_data->params.no_display) { 1668 for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { 1669 VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; 1670 struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain); 1671 1672 before_present(swapchain_data, 1673 queue_data, 1674 pPresentInfo->pWaitSemaphores, 1675 pPresentInfo->waitSemaphoreCount, 1676 pPresentInfo->pImageIndices[i]); 1677 } 1678 result = queue_data->device->vtable.QueuePresentKHR(queue, pPresentInfo); 1679 } else { 1680 for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { 1681 VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; 1682 struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain); 1683 VkPresentInfoKHR present_info = *pPresentInfo; 1684 present_info.swapchainCount = 1; 1685 present_info.pSwapchains = &swapchain; 1686 1687 uint32_t image_index = pPresentInfo->pImageIndices[i]; 1688 1689 struct overlay_draw *draw = before_present(swapchain_data, 1690 queue_data, 1691 pPresentInfo->pWaitSemaphores, 1692 pPresentInfo->waitSemaphoreCount, 1693 image_index); 1694 1695 /* Because the submission of the overlay draw waits on the semaphores 1696 * handed for present, we don't need to have this present operation 1697 * wait on them as well, we can just wait on the overlay submission 1698 * semaphore. 1699 */ 1700 present_info.pWaitSemaphores = &draw->semaphore; 1701 present_info.waitSemaphoreCount = 1; 1702 1703 VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); 1704 if (pPresentInfo->pResults) 1705 pPresentInfo->pResults[i] = chain_result; 1706 if (chain_result != VK_SUCCESS && result == VK_SUCCESS) 1707 result = chain_result; 1708 } 1709 } 1710 return result; 1711} 1712 1713static VkResult overlay_AcquireNextImageKHR( 1714 VkDevice device, 1715 VkSwapchainKHR swapchain, 1716 uint64_t timeout, 1717 VkSemaphore semaphore, 1718 VkFence fence, 1719 uint32_t* pImageIndex) 1720{ 1721 struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain); 1722 struct device_data *device_data = swapchain_data->device; 1723 1724 uint64_t ts0 = os_time_get(); 1725 VkResult result = device_data->vtable.AcquireNextImageKHR(device, swapchain, timeout, 1726 semaphore, fence, pImageIndex); 1727 uint64_t ts1 = os_time_get(); 1728 1729 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0; 1730 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++; 1731 1732 return result; 1733} 1734 1735static VkResult overlay_AcquireNextImage2KHR( 1736 VkDevice device, 1737 const VkAcquireNextImageInfoKHR* pAcquireInfo, 1738 uint32_t* pImageIndex) 1739{ 1740 struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pAcquireInfo->swapchain); 1741 struct device_data *device_data = swapchain_data->device; 1742 1743 uint64_t ts0 = os_time_get(); 1744 VkResult result = device_data->vtable.AcquireNextImage2KHR(device, pAcquireInfo, pImageIndex); 1745 uint64_t ts1 = os_time_get(); 1746 1747 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0; 1748 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++; 1749 1750 return result; 1751} 1752 1753static void overlay_CmdDraw( 1754 VkCommandBuffer commandBuffer, 1755 uint32_t vertexCount, 1756 uint32_t instanceCount, 1757 uint32_t firstVertex, 1758 uint32_t firstInstance) 1759{ 1760 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1761 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++; 1762 struct device_data *device_data = cmd_buffer_data->device; 1763 device_data->vtable.CmdDraw(commandBuffer, vertexCount, instanceCount, 1764 firstVertex, firstInstance); 1765} 1766 1767static void overlay_CmdDrawIndexed( 1768 VkCommandBuffer commandBuffer, 1769 uint32_t indexCount, 1770 uint32_t instanceCount, 1771 uint32_t firstIndex, 1772 int32_t vertexOffset, 1773 uint32_t firstInstance) 1774{ 1775 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1776 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++; 1777 struct device_data *device_data = cmd_buffer_data->device; 1778 device_data->vtable.CmdDrawIndexed(commandBuffer, indexCount, instanceCount, 1779 firstIndex, vertexOffset, firstInstance); 1780} 1781 1782static void overlay_CmdDrawIndirect( 1783 VkCommandBuffer commandBuffer, 1784 VkBuffer buffer, 1785 VkDeviceSize offset, 1786 uint32_t drawCount, 1787 uint32_t stride) 1788{ 1789 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1790 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++; 1791 struct device_data *device_data = cmd_buffer_data->device; 1792 device_data->vtable.CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride); 1793} 1794 1795static void overlay_CmdDrawIndexedIndirect( 1796 VkCommandBuffer commandBuffer, 1797 VkBuffer buffer, 1798 VkDeviceSize offset, 1799 uint32_t drawCount, 1800 uint32_t stride) 1801{ 1802 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1803 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++; 1804 struct device_data *device_data = cmd_buffer_data->device; 1805 device_data->vtable.CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride); 1806} 1807 1808static void overlay_CmdDrawIndirectCountKHR( 1809 VkCommandBuffer commandBuffer, 1810 VkBuffer buffer, 1811 VkDeviceSize offset, 1812 VkBuffer countBuffer, 1813 VkDeviceSize countBufferOffset, 1814 uint32_t maxDrawCount, 1815 uint32_t stride) 1816{ 1817 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1818 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++; 1819 struct device_data *device_data = cmd_buffer_data->device; 1820 device_data->vtable.CmdDrawIndirectCountKHR(commandBuffer, buffer, offset, 1821 countBuffer, countBufferOffset, 1822 maxDrawCount, stride); 1823} 1824 1825static void overlay_CmdDrawIndexedIndirectCountKHR( 1826 VkCommandBuffer commandBuffer, 1827 VkBuffer buffer, 1828 VkDeviceSize offset, 1829 VkBuffer countBuffer, 1830 VkDeviceSize countBufferOffset, 1831 uint32_t maxDrawCount, 1832 uint32_t stride) 1833{ 1834 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1835 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++; 1836 struct device_data *device_data = cmd_buffer_data->device; 1837 device_data->vtable.CmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, 1838 countBuffer, countBufferOffset, 1839 maxDrawCount, stride); 1840} 1841 1842static void overlay_CmdDispatch( 1843 VkCommandBuffer commandBuffer, 1844 uint32_t groupCountX, 1845 uint32_t groupCountY, 1846 uint32_t groupCountZ) 1847{ 1848 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1849 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++; 1850 struct device_data *device_data = cmd_buffer_data->device; 1851 device_data->vtable.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); 1852} 1853 1854static void overlay_CmdDispatchIndirect( 1855 VkCommandBuffer commandBuffer, 1856 VkBuffer buffer, 1857 VkDeviceSize offset) 1858{ 1859 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1860 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++; 1861 struct device_data *device_data = cmd_buffer_data->device; 1862 device_data->vtable.CmdDispatchIndirect(commandBuffer, buffer, offset); 1863} 1864 1865static void overlay_CmdBindPipeline( 1866 VkCommandBuffer commandBuffer, 1867 VkPipelineBindPoint pipelineBindPoint, 1868 VkPipeline pipeline) 1869{ 1870 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1871 switch (pipelineBindPoint) { 1872 case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break; 1873 case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break; 1874 case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break; 1875 default: break; 1876 } 1877 struct device_data *device_data = cmd_buffer_data->device; 1878 device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline); 1879} 1880 1881static VkResult overlay_BeginCommandBuffer( 1882 VkCommandBuffer commandBuffer, 1883 const VkCommandBufferBeginInfo* pBeginInfo) 1884{ 1885 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1886 struct device_data *device_data = cmd_buffer_data->device; 1887 1888 memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); 1889 1890 /* We don't record any query in secondary command buffers, just make sure 1891 * we have the right inheritance. 1892 */ 1893 if (cmd_buffer_data->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { 1894 VkCommandBufferBeginInfo *begin_info = (VkCommandBufferBeginInfo *) 1895 clone_chain((const struct VkBaseInStructure *)pBeginInfo); 1896 VkCommandBufferInheritanceInfo *parent_inhe_info = (VkCommandBufferInheritanceInfo *) 1897 vk_find_struct(begin_info, COMMAND_BUFFER_INHERITANCE_INFO); 1898 VkCommandBufferInheritanceInfo inhe_info = { 1899 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, 1900 NULL, 1901 VK_NULL_HANDLE, 1902 0, 1903 VK_NULL_HANDLE, 1904 VK_FALSE, 1905 0, 1906 overlay_query_flags, 1907 }; 1908 1909 if (parent_inhe_info) 1910 parent_inhe_info->pipelineStatistics = overlay_query_flags; 1911 else { 1912 inhe_info.pNext = begin_info->pNext; 1913 begin_info->pNext = &inhe_info; 1914 } 1915 1916 VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); 1917 1918 if (!parent_inhe_info) 1919 begin_info->pNext = inhe_info.pNext; 1920 1921 free_chain((struct VkBaseOutStructure *)begin_info); 1922 1923 return result; 1924 } 1925 1926 /* Otherwise record a begin query as first command. */ 1927 VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); 1928 1929 if (result == VK_SUCCESS) { 1930 if (cmd_buffer_data->pipeline_query_pool) { 1931 device_data->vtable.CmdResetQueryPool(commandBuffer, 1932 cmd_buffer_data->pipeline_query_pool, 1933 cmd_buffer_data->query_index, 1); 1934 } 1935 if (cmd_buffer_data->timestamp_query_pool) { 1936 device_data->vtable.CmdResetQueryPool(commandBuffer, 1937 cmd_buffer_data->timestamp_query_pool, 1938 cmd_buffer_data->query_index * 2, 2); 1939 } 1940 if (cmd_buffer_data->pipeline_query_pool) { 1941 device_data->vtable.CmdBeginQuery(commandBuffer, 1942 cmd_buffer_data->pipeline_query_pool, 1943 cmd_buffer_data->query_index, 0); 1944 } 1945 if (cmd_buffer_data->timestamp_query_pool) { 1946 device_data->vtable.CmdWriteTimestamp(commandBuffer, 1947 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 1948 cmd_buffer_data->timestamp_query_pool, 1949 cmd_buffer_data->query_index * 2); 1950 } 1951 } 1952 1953 return result; 1954} 1955 1956static VkResult overlay_EndCommandBuffer( 1957 VkCommandBuffer commandBuffer) 1958{ 1959 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1960 struct device_data *device_data = cmd_buffer_data->device; 1961 1962 if (cmd_buffer_data->timestamp_query_pool) { 1963 device_data->vtable.CmdWriteTimestamp(commandBuffer, 1964 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 1965 cmd_buffer_data->timestamp_query_pool, 1966 cmd_buffer_data->query_index * 2 + 1); 1967 } 1968 if (cmd_buffer_data->pipeline_query_pool) { 1969 device_data->vtable.CmdEndQuery(commandBuffer, 1970 cmd_buffer_data->pipeline_query_pool, 1971 cmd_buffer_data->query_index); 1972 } 1973 1974 return device_data->vtable.EndCommandBuffer(commandBuffer); 1975} 1976 1977static VkResult overlay_ResetCommandBuffer( 1978 VkCommandBuffer commandBuffer, 1979 VkCommandBufferResetFlags flags) 1980{ 1981 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1982 struct device_data *device_data = cmd_buffer_data->device; 1983 1984 memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); 1985 1986 return device_data->vtable.ResetCommandBuffer(commandBuffer, flags); 1987} 1988 1989static void overlay_CmdExecuteCommands( 1990 VkCommandBuffer commandBuffer, 1991 uint32_t commandBufferCount, 1992 const VkCommandBuffer* pCommandBuffers) 1993{ 1994 struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); 1995 struct device_data *device_data = cmd_buffer_data->device; 1996 1997 /* Add the stats of the executed command buffers to the primary one. */ 1998 for (uint32_t c = 0; c < commandBufferCount; c++) { 1999 struct command_buffer_data *sec_cmd_buffer_data = FIND_CMD_BUFFER_DATA(pCommandBuffers[c]); 2000 2001 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) 2002 cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s]; 2003 } 2004 2005 device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers); 2006} 2007 2008static VkResult overlay_AllocateCommandBuffers( 2009 VkDevice device, 2010 const VkCommandBufferAllocateInfo* pAllocateInfo, 2011 VkCommandBuffer* pCommandBuffers) 2012{ 2013 struct device_data *device_data = FIND_DEVICE_DATA(device); 2014 VkResult result = 2015 device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers); 2016 if (result != VK_SUCCESS) 2017 return result; 2018 2019 VkQueryPool pipeline_query_pool = VK_NULL_HANDLE; 2020 VkQueryPool timestamp_query_pool = VK_NULL_HANDLE; 2021 if (device_data->instance->pipeline_statistics_enabled && 2022 pAllocateInfo->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 2023 VkQueryPoolCreateInfo pool_info = { 2024 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, 2025 NULL, 2026 0, 2027 VK_QUERY_TYPE_PIPELINE_STATISTICS, 2028 pAllocateInfo->commandBufferCount, 2029 overlay_query_flags, 2030 }; 2031 VkResult err = 2032 device_data->vtable.CreateQueryPool(device_data->device, &pool_info, 2033 NULL, &pipeline_query_pool); 2034 check_vk_result(err); 2035 } 2036 if (device_data->instance->params.enabled[OVERLAY_PARAM_ENABLED_gpu_timing]) { 2037 VkQueryPoolCreateInfo pool_info = { 2038 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, 2039 NULL, 2040 0, 2041 VK_QUERY_TYPE_TIMESTAMP, 2042 pAllocateInfo->commandBufferCount * 2, 2043 0, 2044 }; 2045 VkResult err = 2046 device_data->vtable.CreateQueryPool(device_data->device, &pool_info, 2047 NULL, ×tamp_query_pool); 2048 check_vk_result(err); 2049 } 2050 2051 for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) { 2052 new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, 2053 pipeline_query_pool, timestamp_query_pool, 2054 i, device_data); 2055 } 2056 2057 if (pipeline_query_pool) 2058 map_object(HKEY(pipeline_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); 2059 if (timestamp_query_pool) 2060 map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); 2061 2062 return result; 2063} 2064 2065static void overlay_FreeCommandBuffers( 2066 VkDevice device, 2067 VkCommandPool commandPool, 2068 uint32_t commandBufferCount, 2069 const VkCommandBuffer* pCommandBuffers) 2070{ 2071 struct device_data *device_data = FIND_DEVICE_DATA(device); 2072 for (uint32_t i = 0; i < commandBufferCount; i++) { 2073 struct command_buffer_data *cmd_buffer_data = 2074 FIND_CMD_BUFFER_DATA(pCommandBuffers[i]); 2075 /* It is legal to free a NULL command buffer*/ 2076 if (!cmd_buffer_data) 2077 continue; 2078 2079 uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool)); 2080 if (count == 1) { 2081 unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool)); 2082 device_data->vtable.DestroyQueryPool(device_data->device, 2083 cmd_buffer_data->pipeline_query_pool, NULL); 2084 } else if (count != 0) { 2085 map_object(HKEY(cmd_buffer_data->pipeline_query_pool), (void *)(uintptr_t)(count - 1)); 2086 } 2087 count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool)); 2088 if (count == 1) { 2089 unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool)); 2090 device_data->vtable.DestroyQueryPool(device_data->device, 2091 cmd_buffer_data->timestamp_query_pool, NULL); 2092 } else if (count != 0) { 2093 map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1)); 2094 } 2095 destroy_command_buffer_data(cmd_buffer_data); 2096 } 2097 2098 device_data->vtable.FreeCommandBuffers(device, commandPool, 2099 commandBufferCount, pCommandBuffers); 2100} 2101 2102static VkResult overlay_QueueSubmit( 2103 VkQueue queue, 2104 uint32_t submitCount, 2105 const VkSubmitInfo* pSubmits, 2106 VkFence fence) 2107{ 2108 struct queue_data *queue_data = FIND_QUEUE_DATA(queue); 2109 struct device_data *device_data = queue_data->device; 2110 2111 device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++; 2112 2113 for (uint32_t s = 0; s < submitCount; s++) { 2114 for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) { 2115 struct command_buffer_data *cmd_buffer_data = 2116 FIND_CMD_BUFFER_DATA(pSubmits[s].pCommandBuffers[c]); 2117 2118 /* Merge the submitted command buffer stats into the device. */ 2119 for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++) 2120 device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st]; 2121 2122 /* Attach the command buffer to the queue so we remember to read its 2123 * pipeline statistics & timestamps at QueuePresent(). 2124 */ 2125 if (!cmd_buffer_data->pipeline_query_pool && 2126 !cmd_buffer_data->timestamp_query_pool) 2127 continue; 2128 2129 if (list_empty(&cmd_buffer_data->link)) { 2130 list_addtail(&cmd_buffer_data->link, 2131 &queue_data->running_command_buffer); 2132 } else { 2133 fprintf(stderr, "Command buffer submitted multiple times before present.\n" 2134 "This could lead to invalid data.\n"); 2135 } 2136 } 2137 } 2138 2139 return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence); 2140} 2141 2142static VkResult overlay_CreateDevice( 2143 VkPhysicalDevice physicalDevice, 2144 const VkDeviceCreateInfo* pCreateInfo, 2145 const VkAllocationCallbacks* pAllocator, 2146 VkDevice* pDevice) 2147{ 2148 struct instance_data *instance_data = FIND_PHYSICAL_DEVICE_DATA(physicalDevice); 2149 VkLayerDeviceCreateInfo *chain_info = 2150 get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); 2151 2152 assert(chain_info->u.pLayerInfo); 2153 PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; 2154 PFN_vkGetDeviceProcAddr fpGetDeviceProcAddr = chain_info->u.pLayerInfo->pfnNextGetDeviceProcAddr; 2155 PFN_vkCreateDevice fpCreateDevice = (PFN_vkCreateDevice)fpGetInstanceProcAddr(NULL, "vkCreateDevice"); 2156 if (fpCreateDevice == NULL) { 2157 return VK_ERROR_INITIALIZATION_FAILED; 2158 } 2159 2160 // Advance the link info for the next element on the chain 2161 chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; 2162 2163 VkPhysicalDeviceFeatures device_features = {}; 2164 VkDeviceCreateInfo device_info = *pCreateInfo; 2165 2166 if (pCreateInfo->pEnabledFeatures) 2167 device_features = *(pCreateInfo->pEnabledFeatures); 2168 if (instance_data->pipeline_statistics_enabled) { 2169 device_features.inheritedQueries = true; 2170 device_features.pipelineStatisticsQuery = true; 2171 } 2172 device_info.pEnabledFeatures = &device_features; 2173 2174 2175 VkResult result = fpCreateDevice(physicalDevice, &device_info, pAllocator, pDevice); 2176 if (result != VK_SUCCESS) return result; 2177 2178 struct device_data *device_data = new_device_data(*pDevice, instance_data); 2179 device_data->physical_device = physicalDevice; 2180 vk_load_device_commands(*pDevice, fpGetDeviceProcAddr, &device_data->vtable); 2181 2182 instance_data->vtable.GetPhysicalDeviceProperties(device_data->physical_device, 2183 &device_data->properties); 2184 2185 VkLayerDeviceCreateInfo *load_data_info = 2186 get_device_chain_info(pCreateInfo, VK_LOADER_DATA_CALLBACK); 2187 device_data->set_device_loader_data = load_data_info->u.pfnSetDeviceLoaderData; 2188 2189 device_map_queues(device_data, pCreateInfo); 2190 2191 return result; 2192} 2193 2194static void overlay_DestroyDevice( 2195 VkDevice device, 2196 const VkAllocationCallbacks* pAllocator) 2197{ 2198 struct device_data *device_data = FIND_DEVICE_DATA(device); 2199 device_unmap_queues(device_data); 2200 device_data->vtable.DestroyDevice(device, pAllocator); 2201 destroy_device_data(device_data); 2202} 2203 2204static VkResult overlay_CreateInstance( 2205 const VkInstanceCreateInfo* pCreateInfo, 2206 const VkAllocationCallbacks* pAllocator, 2207 VkInstance* pInstance) 2208{ 2209 VkLayerInstanceCreateInfo *chain_info = 2210 get_instance_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); 2211 2212 assert(chain_info->u.pLayerInfo); 2213 PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = 2214 chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; 2215 PFN_vkCreateInstance fpCreateInstance = 2216 (PFN_vkCreateInstance)fpGetInstanceProcAddr(NULL, "vkCreateInstance"); 2217 if (fpCreateInstance == NULL) { 2218 return VK_ERROR_INITIALIZATION_FAILED; 2219 } 2220 2221 // Advance the link info for the next element on the chain 2222 chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; 2223 2224 VkResult result = fpCreateInstance(pCreateInfo, pAllocator, pInstance); 2225 if (result != VK_SUCCESS) return result; 2226 2227 struct instance_data *instance_data = new_instance_data(*pInstance); 2228 vk_load_instance_commands(instance_data->instance, 2229 fpGetInstanceProcAddr, 2230 &instance_data->vtable); 2231 instance_data_map_physical_devices(instance_data, true); 2232 2233 parse_overlay_env(&instance_data->params, getenv("VK_LAYER_MESA_OVERLAY_CONFIG")); 2234 2235 for (int i = OVERLAY_PARAM_ENABLED_vertices; 2236 i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { 2237 if (instance_data->params.enabled[i]) { 2238 instance_data->pipeline_statistics_enabled = true; 2239 break; 2240 } 2241 } 2242 2243 return result; 2244} 2245 2246static void overlay_DestroyInstance( 2247 VkInstance instance, 2248 const VkAllocationCallbacks* pAllocator) 2249{ 2250 struct instance_data *instance_data = FIND_INSTANCE_DATA(instance); 2251 instance_data_map_physical_devices(instance_data, false); 2252 instance_data->vtable.DestroyInstance(instance, pAllocator); 2253 destroy_instance_data(instance_data); 2254} 2255 2256static const struct { 2257 const char *name; 2258 void *ptr; 2259} name_to_funcptr_map[] = { 2260 { "vkGetDeviceProcAddr", (void *) vkGetDeviceProcAddr }, 2261#define ADD_HOOK(fn) { "vk" # fn, (void *) overlay_ ## fn } 2262 ADD_HOOK(AllocateCommandBuffers), 2263 ADD_HOOK(FreeCommandBuffers), 2264 ADD_HOOK(ResetCommandBuffer), 2265 ADD_HOOK(BeginCommandBuffer), 2266 ADD_HOOK(EndCommandBuffer), 2267 ADD_HOOK(CmdExecuteCommands), 2268 2269 ADD_HOOK(CmdDraw), 2270 ADD_HOOK(CmdDrawIndexed), 2271 ADD_HOOK(CmdDrawIndirect), 2272 ADD_HOOK(CmdDrawIndexedIndirect), 2273 ADD_HOOK(CmdDispatch), 2274 ADD_HOOK(CmdDispatchIndirect), 2275 ADD_HOOK(CmdDrawIndirectCountKHR), 2276 ADD_HOOK(CmdDrawIndexedIndirectCountKHR), 2277 2278 ADD_HOOK(CmdBindPipeline), 2279 2280 ADD_HOOK(CreateSwapchainKHR), 2281 ADD_HOOK(QueuePresentKHR), 2282 ADD_HOOK(DestroySwapchainKHR), 2283 ADD_HOOK(AcquireNextImageKHR), 2284 ADD_HOOK(AcquireNextImage2KHR), 2285 2286 ADD_HOOK(QueueSubmit), 2287 2288 ADD_HOOK(CreateDevice), 2289 ADD_HOOK(DestroyDevice), 2290 2291 ADD_HOOK(CreateInstance), 2292 ADD_HOOK(DestroyInstance), 2293#undef ADD_HOOK 2294}; 2295 2296static void *find_ptr(const char *name) 2297{ 2298 for (uint32_t i = 0; i < ARRAY_SIZE(name_to_funcptr_map); i++) { 2299 if (strcmp(name, name_to_funcptr_map[i].name) == 0) 2300 return name_to_funcptr_map[i].ptr; 2301 } 2302 2303 return NULL; 2304} 2305 2306VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr(VkDevice dev, 2307 const char *funcName) 2308{ 2309 void *ptr = find_ptr(funcName); 2310 if (ptr) return reinterpret_cast<PFN_vkVoidFunction>(ptr); 2311 2312 if (dev == NULL) return NULL; 2313 2314 struct device_data *device_data = FIND_DEVICE_DATA(dev); 2315 if (device_data->vtable.GetDeviceProcAddr == NULL) return NULL; 2316 return device_data->vtable.GetDeviceProcAddr(dev, funcName); 2317} 2318 2319VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, 2320 const char *funcName) 2321{ 2322 void *ptr = find_ptr(funcName); 2323 if (ptr) return reinterpret_cast<PFN_vkVoidFunction>(ptr); 2324 2325 if (instance == NULL) return NULL; 2326 2327 struct instance_data *instance_data = FIND_INSTANCE_DATA(instance); 2328 if (instance_data->vtable.GetInstanceProcAddr == NULL) return NULL; 2329 return instance_data->vtable.GetInstanceProcAddr(instance, funcName); 2330} 2331