1/* 2 * Copyright © 2018 Google, Inc. 3 * Copyright © 2015 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 */ 24 25#include <errno.h> 26#include <fcntl.h> 27#include <stdint.h> 28#include <sys/ioctl.h> 29#include <sys/mman.h> 30#include <xf86drm.h> 31 32#include "vk_util.h" 33 34#include "drm-uapi/msm_drm.h" 35#include "util/timespec.h" 36#include "util/os_time.h" 37#include "util/perf/u_trace.h" 38 39#include "tu_private.h" 40 41#include "tu_cs.h" 42 43struct tu_binary_syncobj { 44 uint32_t permanent, temporary; 45}; 46 47struct tu_timeline_point { 48 struct list_head link; 49 50 uint64_t value; 51 uint32_t syncobj; 52 uint32_t wait_count; 53}; 54 55struct tu_timeline { 56 uint64_t highest_submitted; 57 uint64_t highest_signaled; 58 59 /* A timeline can have multiple timeline points */ 60 struct list_head points; 61 62 /* A list containing points that has been already submited. 63 * A point will be moved to 'points' when new point is required 64 * at submit time. 65 */ 66 struct list_head free_points; 67}; 68 69typedef enum { 70 TU_SEMAPHORE_BINARY, 71 TU_SEMAPHORE_TIMELINE, 72} tu_semaphore_type; 73 74 75struct tu_syncobj { 76 struct vk_object_base base; 77 78 tu_semaphore_type type; 79 union { 80 struct tu_binary_syncobj binary; 81 struct tu_timeline timeline; 82 }; 83}; 84 85struct tu_queue_submit 86{ 87 struct list_head link; 88 89 VkCommandBuffer *cmd_buffers; 90 struct tu_u_trace_cmd_data *cmd_buffer_trace_data; 91 uint32_t cmd_buffer_count; 92 93 struct tu_syncobj **wait_semaphores; 94 uint32_t wait_semaphore_count; 95 struct tu_syncobj **signal_semaphores; 96 uint32_t signal_semaphore_count; 97 98 struct tu_syncobj **wait_timelines; 99 uint64_t *wait_timeline_values; 100 uint32_t wait_timeline_count; 101 uint32_t wait_timeline_array_length; 102 103 struct tu_syncobj **signal_timelines; 104 uint64_t *signal_timeline_values; 105 uint32_t signal_timeline_count; 106 uint32_t signal_timeline_array_length; 107 108 struct drm_msm_gem_submit_cmd *cmds; 109 struct drm_msm_gem_submit_syncobj *in_syncobjs; 110 uint32_t nr_in_syncobjs; 111 struct drm_msm_gem_submit_syncobj *out_syncobjs; 112 uint32_t nr_out_syncobjs; 113 114 bool last_submit; 115 uint32_t entry_count; 116 uint32_t counter_pass_index; 117}; 118 119struct tu_u_trace_syncobj 120{ 121 uint32_t msm_queue_id; 122 uint32_t fence; 123}; 124 125static int 126tu_drm_get_param(const struct tu_physical_device *dev, 127 uint32_t param, 128 uint64_t *value) 129{ 130 /* Technically this requires a pipe, but the kernel only supports one pipe 131 * anyway at the time of writing and most of these are clearly pipe 132 * independent. */ 133 struct drm_msm_param req = { 134 .pipe = MSM_PIPE_3D0, 135 .param = param, 136 }; 137 138 int ret = drmCommandWriteRead(dev->local_fd, DRM_MSM_GET_PARAM, &req, 139 sizeof(req)); 140 if (ret) 141 return ret; 142 143 *value = req.value; 144 145 return 0; 146} 147 148static int 149tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id) 150{ 151 uint64_t value; 152 int ret = tu_drm_get_param(dev, MSM_PARAM_GPU_ID, &value); 153 if (ret) 154 return ret; 155 156 *id = value; 157 return 0; 158} 159 160static int 161tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size) 162{ 163 uint64_t value; 164 int ret = tu_drm_get_param(dev, MSM_PARAM_GMEM_SIZE, &value); 165 if (ret) 166 return ret; 167 168 *size = value; 169 return 0; 170} 171 172static int 173tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base) 174{ 175 return tu_drm_get_param(dev, MSM_PARAM_GMEM_BASE, base); 176} 177 178int 179tu_drm_get_timestamp(struct tu_physical_device *device, uint64_t *ts) 180{ 181 return tu_drm_get_param(device, MSM_PARAM_TIMESTAMP, ts); 182} 183 184int 185tu_drm_submitqueue_new(const struct tu_device *dev, 186 int priority, 187 uint32_t *queue_id) 188{ 189 struct drm_msm_submitqueue req = { 190 .flags = 0, 191 .prio = priority, 192 }; 193 194 int ret = drmCommandWriteRead(dev->fd, 195 DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req)); 196 if (ret) 197 return ret; 198 199 *queue_id = req.id; 200 return 0; 201} 202 203void 204tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id) 205{ 206 drmCommandWrite(dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE, 207 &queue_id, sizeof(uint32_t)); 208} 209 210static void 211tu_gem_close(const struct tu_device *dev, uint32_t gem_handle) 212{ 213 struct drm_gem_close req = { 214 .handle = gem_handle, 215 }; 216 217 drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req); 218} 219 220/** Helper for DRM_MSM_GEM_INFO, returns 0 on error. */ 221static uint64_t 222tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info) 223{ 224 struct drm_msm_gem_info req = { 225 .handle = gem_handle, 226 .info = info, 227 }; 228 229 int ret = drmCommandWriteRead(dev->fd, 230 DRM_MSM_GEM_INFO, &req, sizeof(req)); 231 if (ret < 0) 232 return 0; 233 234 return req.value; 235} 236 237static VkResult 238tu_bo_init(struct tu_device *dev, 239 struct tu_bo *bo, 240 uint32_t gem_handle, 241 uint64_t size, 242 bool dump) 243{ 244 uint64_t iova = tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA); 245 if (!iova) { 246 tu_gem_close(dev, gem_handle); 247 return VK_ERROR_OUT_OF_DEVICE_MEMORY; 248 } 249 250 *bo = (struct tu_bo) { 251 .gem_handle = gem_handle, 252 .size = size, 253 .iova = iova, 254 }; 255 256 mtx_lock(&dev->bo_mutex); 257 uint32_t idx = dev->bo_count++; 258 259 /* grow the bo list if needed */ 260 if (idx >= dev->bo_list_size) { 261 uint32_t new_len = idx + 64; 262 struct drm_msm_gem_submit_bo *new_ptr = 263 vk_realloc(&dev->vk.alloc, dev->bo_list, new_len * sizeof(*dev->bo_list), 264 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 265 if (!new_ptr) 266 goto fail_bo_list; 267 268 dev->bo_list = new_ptr; 269 dev->bo_list_size = new_len; 270 } 271 272 /* grow the "bo idx" list (maps gem handles to index in the bo list) */ 273 if (bo->gem_handle >= dev->bo_idx_size) { 274 uint32_t new_len = bo->gem_handle + 256; 275 uint32_t *new_ptr = 276 vk_realloc(&dev->vk.alloc, dev->bo_idx, new_len * sizeof(*dev->bo_idx), 277 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 278 if (!new_ptr) 279 goto fail_bo_idx; 280 281 dev->bo_idx = new_ptr; 282 dev->bo_idx_size = new_len; 283 } 284 285 dev->bo_idx[bo->gem_handle] = idx; 286 dev->bo_list[idx] = (struct drm_msm_gem_submit_bo) { 287 .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE | 288 COND(dump, MSM_SUBMIT_BO_DUMP), 289 .handle = gem_handle, 290 .presumed = iova, 291 }; 292 mtx_unlock(&dev->bo_mutex); 293 294 return VK_SUCCESS; 295 296fail_bo_idx: 297 vk_free(&dev->vk.alloc, dev->bo_list); 298fail_bo_list: 299 tu_gem_close(dev, gem_handle); 300 return VK_ERROR_OUT_OF_HOST_MEMORY; 301} 302 303VkResult 304tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size, 305 enum tu_bo_alloc_flags flags) 306{ 307 /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c 308 * always sets `flags = MSM_BO_WC`, and we copy that behavior here. 309 */ 310 struct drm_msm_gem_new req = { 311 .size = size, 312 .flags = MSM_BO_WC 313 }; 314 315 if (flags & TU_BO_ALLOC_GPU_READ_ONLY) 316 req.flags |= MSM_BO_GPU_READONLY; 317 318 int ret = drmCommandWriteRead(dev->fd, 319 DRM_MSM_GEM_NEW, &req, sizeof(req)); 320 if (ret) 321 return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); 322 323 return tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP); 324} 325 326VkResult 327tu_bo_init_dmabuf(struct tu_device *dev, 328 struct tu_bo *bo, 329 uint64_t size, 330 int prime_fd) 331{ 332 /* lseek() to get the real size */ 333 off_t real_size = lseek(prime_fd, 0, SEEK_END); 334 lseek(prime_fd, 0, SEEK_SET); 335 if (real_size < 0 || (uint64_t) real_size < size) 336 return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE); 337 338 uint32_t gem_handle; 339 int ret = drmPrimeFDToHandle(dev->fd, prime_fd, 340 &gem_handle); 341 if (ret) 342 return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE); 343 344 return tu_bo_init(dev, bo, gem_handle, size, false); 345} 346 347int 348tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo) 349{ 350 int prime_fd; 351 int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle, 352 DRM_CLOEXEC, &prime_fd); 353 354 return ret == 0 ? prime_fd : -1; 355} 356 357VkResult 358tu_bo_map(struct tu_device *dev, struct tu_bo *bo) 359{ 360 if (bo->map) 361 return VK_SUCCESS; 362 363 uint64_t offset = tu_gem_info(dev, bo->gem_handle, MSM_INFO_GET_OFFSET); 364 if (!offset) 365 return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); 366 367 /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */ 368 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, 369 dev->fd, offset); 370 if (map == MAP_FAILED) 371 return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED); 372 373 bo->map = map; 374 return VK_SUCCESS; 375} 376 377void 378tu_bo_finish(struct tu_device *dev, struct tu_bo *bo) 379{ 380 assert(bo->gem_handle); 381 382 if (bo->map) 383 munmap(bo->map, bo->size); 384 385 mtx_lock(&dev->bo_mutex); 386 uint32_t idx = dev->bo_idx[bo->gem_handle]; 387 dev->bo_count--; 388 dev->bo_list[idx] = dev->bo_list[dev->bo_count]; 389 dev->bo_idx[dev->bo_list[idx].handle] = idx; 390 mtx_unlock(&dev->bo_mutex); 391 392 tu_gem_close(dev, bo->gem_handle); 393} 394 395static VkResult 396tu_drm_device_init(struct tu_physical_device *device, 397 struct tu_instance *instance, 398 drmDevicePtr drm_device) 399{ 400 const char *path = drm_device->nodes[DRM_NODE_RENDER]; 401 VkResult result = VK_SUCCESS; 402 drmVersionPtr version; 403 int fd; 404 int master_fd = -1; 405 406 fd = open(path, O_RDWR | O_CLOEXEC); 407 if (fd < 0) { 408 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 409 "failed to open device %s", path); 410 } 411 412 /* Version 1.6 added SYNCOBJ support. */ 413 const int min_version_major = 1; 414 const int min_version_minor = 6; 415 416 version = drmGetVersion(fd); 417 if (!version) { 418 close(fd); 419 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 420 "failed to query kernel driver version for device %s", 421 path); 422 } 423 424 if (strcmp(version->name, "msm")) { 425 drmFreeVersion(version); 426 close(fd); 427 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 428 "device %s does not use the msm kernel driver", 429 path); 430 } 431 432 if (version->version_major != min_version_major || 433 version->version_minor < min_version_minor) { 434 result = vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 435 "kernel driver for device %s has version %d.%d, " 436 "but Vulkan requires version >= %d.%d", 437 path, 438 version->version_major, version->version_minor, 439 min_version_major, min_version_minor); 440 drmFreeVersion(version); 441 close(fd); 442 return result; 443 } 444 445 device->msm_major_version = version->version_major; 446 device->msm_minor_version = version->version_minor; 447 448 drmFreeVersion(version); 449 450 if (instance->debug_flags & TU_DEBUG_STARTUP) 451 mesa_logi("Found compatible device '%s'.", path); 452 453 device->instance = instance; 454 455 if (instance->vk.enabled_extensions.KHR_display) { 456 master_fd = 457 open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC); 458 if (master_fd >= 0) { 459 /* TODO: free master_fd is accel is not working? */ 460 } 461 } 462 463 device->master_fd = master_fd; 464 device->local_fd = fd; 465 466 if (tu_drm_get_gpu_id(device, &device->dev_id.gpu_id)) { 467 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 468 "could not get GPU ID"); 469 goto fail; 470 } 471 472 if (tu_drm_get_param(device, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) { 473 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 474 "could not get CHIP ID"); 475 goto fail; 476 } 477 478 if (tu_drm_get_gmem_size(device, &device->gmem_size)) { 479 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 480 "could not get GMEM size"); 481 goto fail; 482 } 483 484 if (tu_drm_get_gmem_base(device, &device->gmem_base)) { 485 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 486 "could not get GMEM size"); 487 goto fail; 488 } 489 490 device->heap.size = tu_get_system_heap_size(); 491 device->heap.used = 0u; 492 device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; 493 494 result = tu_physical_device_init(device, instance); 495 if (result == VK_SUCCESS) 496 return result; 497 498fail: 499 close(fd); 500 if (master_fd != -1) 501 close(master_fd); 502 return result; 503} 504 505VkResult 506tu_enumerate_devices(struct tu_instance *instance) 507{ 508 /* TODO: Check for more devices ? */ 509 drmDevicePtr devices[8]; 510 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER; 511 int max_devices; 512 513 instance->physical_device_count = 0; 514 515 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); 516 517 if (instance->debug_flags & TU_DEBUG_STARTUP) { 518 if (max_devices < 0) 519 mesa_logi("drmGetDevices2 returned error: %s\n", strerror(max_devices)); 520 else 521 mesa_logi("Found %d drm nodes", max_devices); 522 } 523 524 if (max_devices < 1) 525 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 526 "No DRM devices found"); 527 528 for (unsigned i = 0; i < (unsigned) max_devices; i++) { 529 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && 530 devices[i]->bustype == DRM_BUS_PLATFORM) { 531 532 result = tu_drm_device_init( 533 instance->physical_devices + instance->physical_device_count, 534 instance, devices[i]); 535 if (result == VK_SUCCESS) 536 ++instance->physical_device_count; 537 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER) 538 break; 539 } 540 } 541 drmFreeDevices(devices, max_devices); 542 543 return result; 544} 545 546static void 547tu_timeline_finish(struct tu_device *device, 548 struct tu_timeline *timeline) 549{ 550 list_for_each_entry_safe(struct tu_timeline_point, point, 551 &timeline->free_points, link) { 552 list_del(&point->link); 553 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, 554 &(struct drm_syncobj_destroy) { .handle = point->syncobj }); 555 556 vk_free(&device->vk.alloc, point); 557 } 558 list_for_each_entry_safe(struct tu_timeline_point, point, 559 &timeline->points, link) { 560 list_del(&point->link); 561 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, 562 &(struct drm_syncobj_destroy) { .handle = point->syncobj }); 563 vk_free(&device->vk.alloc, point); 564 } 565} 566 567static VkResult 568sync_create(VkDevice _device, 569 bool signaled, 570 bool fence, 571 bool binary, 572 uint64_t timeline_value, 573 const VkAllocationCallbacks *pAllocator, 574 void **p_sync) 575{ 576 TU_FROM_HANDLE(tu_device, device, _device); 577 578 struct tu_syncobj *sync = 579 vk_object_alloc(&device->vk, pAllocator, sizeof(*sync), 580 fence ? VK_OBJECT_TYPE_FENCE : VK_OBJECT_TYPE_SEMAPHORE); 581 if (!sync) 582 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 583 584 if (binary) { 585 struct drm_syncobj_create create = {}; 586 if (signaled) 587 create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED; 588 589 int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create); 590 if (ret) { 591 vk_free2(&device->vk.alloc, pAllocator, sync); 592 return VK_ERROR_OUT_OF_HOST_MEMORY; 593 } 594 595 sync->binary.permanent = create.handle; 596 sync->binary.temporary = 0; 597 sync->type = TU_SEMAPHORE_BINARY; 598 } else { 599 sync->type = TU_SEMAPHORE_TIMELINE; 600 sync->timeline.highest_signaled = sync->timeline.highest_submitted = 601 timeline_value; 602 list_inithead(&sync->timeline.points); 603 list_inithead(&sync->timeline.free_points); 604 } 605 606 *p_sync = sync; 607 608 return VK_SUCCESS; 609} 610 611static void 612sync_set_temporary(struct tu_device *device, struct tu_syncobj *sync, uint32_t syncobj) 613{ 614 if (sync->binary.temporary) { 615 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, 616 &(struct drm_syncobj_destroy) { .handle = sync->binary.temporary }); 617 } 618 sync->binary.temporary = syncobj; 619} 620 621static void 622sync_destroy(VkDevice _device, struct tu_syncobj *sync, const VkAllocationCallbacks *pAllocator) 623{ 624 TU_FROM_HANDLE(tu_device, device, _device); 625 626 if (!sync) 627 return; 628 629 if (sync->type == TU_SEMAPHORE_BINARY) { 630 sync_set_temporary(device, sync, 0); 631 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, 632 &(struct drm_syncobj_destroy) { .handle = sync->binary.permanent }); 633 } else { 634 tu_timeline_finish(device, &sync->timeline); 635 } 636 637 vk_object_free(&device->vk, pAllocator, sync); 638} 639 640static VkResult 641sync_import(VkDevice _device, struct tu_syncobj *sync, bool temporary, bool sync_fd, int fd) 642{ 643 TU_FROM_HANDLE(tu_device, device, _device); 644 int ret; 645 646 if (!sync_fd) { 647 uint32_t *dst = temporary ? &sync->binary.temporary : &sync->binary.permanent; 648 649 struct drm_syncobj_handle handle = { .fd = fd }; 650 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle); 651 if (ret) 652 return VK_ERROR_INVALID_EXTERNAL_HANDLE; 653 654 if (*dst) { 655 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, 656 &(struct drm_syncobj_destroy) { .handle = *dst }); 657 } 658 *dst = handle.handle; 659 close(fd); 660 } else { 661 assert(temporary); 662 663 struct drm_syncobj_create create = {}; 664 665 if (fd == -1) 666 create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED; 667 668 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create); 669 if (ret) 670 return VK_ERROR_INVALID_EXTERNAL_HANDLE; 671 672 if (fd != -1) { 673 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &(struct drm_syncobj_handle) { 674 .fd = fd, 675 .handle = create.handle, 676 .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE, 677 }); 678 if (ret) { 679 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, 680 &(struct drm_syncobj_destroy) { .handle = create.handle }); 681 return VK_ERROR_INVALID_EXTERNAL_HANDLE; 682 } 683 close(fd); 684 } 685 686 sync_set_temporary(device, sync, create.handle); 687 } 688 689 return VK_SUCCESS; 690} 691 692static VkResult 693sync_export(VkDevice _device, struct tu_syncobj *sync, bool sync_fd, int *p_fd) 694{ 695 TU_FROM_HANDLE(tu_device, device, _device); 696 697 struct drm_syncobj_handle handle = { 698 .handle = sync->binary.temporary ?: sync->binary.permanent, 699 .flags = COND(sync_fd, DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE), 700 .fd = -1, 701 }; 702 int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle); 703 if (ret) 704 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); 705 706 /* restore permanent payload on export */ 707 sync_set_temporary(device, sync, 0); 708 709 *p_fd = handle.fd; 710 return VK_SUCCESS; 711} 712 713static VkSemaphoreTypeKHR 714get_semaphore_type(const void *pNext, uint64_t *initial_value) 715{ 716 const VkSemaphoreTypeCreateInfoKHR *type_info = 717 vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR); 718 719 if (!type_info) 720 return VK_SEMAPHORE_TYPE_BINARY_KHR; 721 722 if (initial_value) 723 *initial_value = type_info->initialValue; 724 return type_info->semaphoreType; 725} 726 727VKAPI_ATTR VkResult VKAPI_CALL 728tu_CreateSemaphore(VkDevice device, 729 const VkSemaphoreCreateInfo *pCreateInfo, 730 const VkAllocationCallbacks *pAllocator, 731 VkSemaphore *pSemaphore) 732{ 733 uint64_t timeline_value = 0; 734 VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value); 735 736 return sync_create(device, false, false, (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR), 737 timeline_value, pAllocator, (void**) pSemaphore); 738} 739 740VKAPI_ATTR void VKAPI_CALL 741tu_DestroySemaphore(VkDevice device, VkSemaphore sem, const VkAllocationCallbacks *pAllocator) 742{ 743 TU_FROM_HANDLE(tu_syncobj, sync, sem); 744 sync_destroy(device, sync, pAllocator); 745} 746 747VKAPI_ATTR VkResult VKAPI_CALL 748tu_ImportSemaphoreFdKHR(VkDevice device, const VkImportSemaphoreFdInfoKHR *info) 749{ 750 TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore); 751 return sync_import(device, sync, info->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, 752 info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, info->fd); 753} 754 755VKAPI_ATTR VkResult VKAPI_CALL 756tu_GetSemaphoreFdKHR(VkDevice device, const VkSemaphoreGetFdInfoKHR *info, int *pFd) 757{ 758 TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore); 759 return sync_export(device, sync, 760 info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, pFd); 761} 762 763VKAPI_ATTR void VKAPI_CALL 764tu_GetPhysicalDeviceExternalSemaphoreProperties( 765 VkPhysicalDevice physicalDevice, 766 const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, 767 VkExternalSemaphoreProperties *pExternalSemaphoreProperties) 768{ 769 VkSemaphoreTypeKHR type = get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL); 770 771 if (type != VK_SEMAPHORE_TYPE_TIMELINE && 772 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || 773 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT )) { 774 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; 775 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; 776 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | 777 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; 778 } else { 779 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; 780 pExternalSemaphoreProperties->compatibleHandleTypes = 0; 781 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; 782 } 783} 784 785static VkResult 786tu_queue_submit_add_timeline_wait_locked(struct tu_queue_submit* submit, 787 struct tu_device *device, 788 struct tu_syncobj *timeline, 789 uint64_t value) 790{ 791 if (submit->wait_timeline_count >= submit->wait_timeline_array_length) { 792 uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64); 793 794 submit->wait_timelines = vk_realloc(&device->vk.alloc, 795 submit->wait_timelines, 796 new_len * sizeof(*submit->wait_timelines), 797 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 798 799 if (submit->wait_timelines == NULL) 800 return VK_ERROR_OUT_OF_HOST_MEMORY; 801 802 submit->wait_timeline_values = vk_realloc(&device->vk.alloc, 803 submit->wait_timeline_values, 804 new_len * sizeof(*submit->wait_timeline_values), 805 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 806 807 if (submit->wait_timeline_values == NULL) { 808 vk_free(&device->vk.alloc, submit->wait_timelines); 809 return VK_ERROR_OUT_OF_HOST_MEMORY; 810 } 811 812 submit->wait_timeline_array_length = new_len; 813 } 814 815 submit->wait_timelines[submit->wait_timeline_count] = timeline; 816 submit->wait_timeline_values[submit->wait_timeline_count] = value; 817 818 submit->wait_timeline_count++; 819 820 return VK_SUCCESS; 821} 822 823static VkResult 824tu_queue_submit_add_timeline_signal_locked(struct tu_queue_submit* submit, 825 struct tu_device *device, 826 struct tu_syncobj *timeline, 827 uint64_t value) 828{ 829 if (submit->signal_timeline_count >= submit->signal_timeline_array_length) { 830 uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 32); 831 832 submit->signal_timelines = vk_realloc(&device->vk.alloc, 833 submit->signal_timelines, 834 new_len * sizeof(*submit->signal_timelines), 835 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 836 837 if (submit->signal_timelines == NULL) 838 return VK_ERROR_OUT_OF_HOST_MEMORY; 839 840 submit->signal_timeline_values = vk_realloc(&device->vk.alloc, 841 submit->signal_timeline_values, 842 new_len * sizeof(*submit->signal_timeline_values), 843 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 844 845 if (submit->signal_timeline_values == NULL) { 846 vk_free(&device->vk.alloc, submit->signal_timelines); 847 return VK_ERROR_OUT_OF_HOST_MEMORY; 848 } 849 850 submit->signal_timeline_array_length = new_len; 851 } 852 853 submit->signal_timelines[submit->signal_timeline_count] = timeline; 854 submit->signal_timeline_values[submit->signal_timeline_count] = value; 855 856 submit->signal_timeline_count++; 857 858 return VK_SUCCESS; 859} 860 861static VkResult 862tu_queue_submit_create_locked(struct tu_queue *queue, 863 const VkSubmitInfo *submit_info, 864 const uint32_t nr_in_syncobjs, 865 const uint32_t nr_out_syncobjs, 866 const bool last_submit, 867 const VkPerformanceQuerySubmitInfoKHR *perf_info, 868 struct tu_queue_submit **submit) 869{ 870 VkResult result; 871 872 const VkTimelineSemaphoreSubmitInfoKHR *timeline_info = 873 vk_find_struct_const(submit_info->pNext, 874 TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR); 875 876 const uint32_t wait_values_count = 877 timeline_info ? timeline_info->waitSemaphoreValueCount : 0; 878 const uint32_t signal_values_count = 879 timeline_info ? timeline_info->signalSemaphoreValueCount : 0; 880 881 const uint64_t *wait_values = 882 wait_values_count ? timeline_info->pWaitSemaphoreValues : NULL; 883 const uint64_t *signal_values = 884 signal_values_count ? timeline_info->pSignalSemaphoreValues : NULL; 885 886 struct tu_queue_submit *new_submit = vk_zalloc(&queue->device->vk.alloc, 887 sizeof(*new_submit), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 888 889 new_submit->cmd_buffer_count = submit_info->commandBufferCount; 890 new_submit->cmd_buffers = vk_zalloc(&queue->device->vk.alloc, 891 new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers), 8, 892 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 893 894 if (new_submit->cmd_buffers == NULL) { 895 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); 896 goto fail_cmd_buffers; 897 } 898 899 memcpy(new_submit->cmd_buffers, submit_info->pCommandBuffers, 900 new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers)); 901 902 new_submit->wait_semaphores = vk_zalloc(&queue->device->vk.alloc, 903 submit_info->waitSemaphoreCount * sizeof(*new_submit->wait_semaphores), 904 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 905 if (new_submit->wait_semaphores == NULL) { 906 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); 907 goto fail_wait_semaphores; 908 } 909 new_submit->wait_semaphore_count = submit_info->waitSemaphoreCount; 910 911 new_submit->signal_semaphores = vk_zalloc(&queue->device->vk.alloc, 912 submit_info->signalSemaphoreCount *sizeof(*new_submit->signal_semaphores), 913 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 914 if (new_submit->signal_semaphores == NULL) { 915 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); 916 goto fail_signal_semaphores; 917 } 918 new_submit->signal_semaphore_count = submit_info->signalSemaphoreCount; 919 920 for (uint32_t i = 0; i < submit_info->waitSemaphoreCount; i++) { 921 TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pWaitSemaphores[i]); 922 new_submit->wait_semaphores[i] = sem; 923 924 if (sem->type == TU_SEMAPHORE_TIMELINE) { 925 result = tu_queue_submit_add_timeline_wait_locked(new_submit, 926 queue->device, sem, wait_values[i]); 927 if (result != VK_SUCCESS) 928 goto fail_wait_timelines; 929 } 930 } 931 932 for (uint32_t i = 0; i < submit_info->signalSemaphoreCount; i++) { 933 TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pSignalSemaphores[i]); 934 new_submit->signal_semaphores[i] = sem; 935 936 if (sem->type == TU_SEMAPHORE_TIMELINE) { 937 result = tu_queue_submit_add_timeline_signal_locked(new_submit, 938 queue->device, sem, signal_values[i]); 939 if (result != VK_SUCCESS) 940 goto fail_signal_timelines; 941 } 942 } 943 944 bool u_trace_enabled = u_trace_context_tracing(&queue->device->trace_context); 945 bool has_trace_points = false; 946 947 uint32_t entry_count = 0; 948 for (uint32_t j = 0; j < new_submit->cmd_buffer_count; ++j) { 949 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[j]); 950 951 if (perf_info) 952 entry_count++; 953 954 entry_count += cmdbuf->cs.entry_count; 955 956 if (u_trace_enabled && u_trace_has_points(&cmdbuf->trace)) { 957 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) 958 entry_count++; 959 960 has_trace_points = true; 961 } 962 } 963 964 new_submit->cmds = vk_zalloc(&queue->device->vk.alloc, 965 entry_count * sizeof(*new_submit->cmds), 8, 966 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 967 968 if (new_submit->cmds == NULL) { 969 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); 970 goto fail_cmds; 971 } 972 973 if (has_trace_points) { 974 new_submit->cmd_buffer_trace_data = vk_zalloc(&queue->device->vk.alloc, 975 new_submit->cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8, 976 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 977 978 if (new_submit->cmd_buffer_trace_data == NULL) { 979 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); 980 goto fail_cmd_trace_data; 981 } 982 983 for (uint32_t i = 0; i < new_submit->cmd_buffer_count; ++i) { 984 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[i]); 985 986 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) && 987 u_trace_has_points(&cmdbuf->trace)) { 988 /* A single command buffer could be submitted several times, but we 989 * already backed timestamp iova addresses and trace points are 990 * single-use. Therefor we have to copy trace points and create 991 * a new timestamp buffer on every submit of reusable command buffer. 992 */ 993 if (tu_create_copy_timestamp_cs(cmdbuf, 994 &new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs, 995 &new_submit->cmd_buffer_trace_data[i].trace) != VK_SUCCESS) { 996 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); 997 goto fail_copy_timestamp_cs; 998 } 999 assert(new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs->entry_count == 1); 1000 } else { 1001 new_submit->cmd_buffer_trace_data[i].trace = &cmdbuf->trace; 1002 } 1003 } 1004 } 1005 1006 /* Allocate without wait timeline semaphores */ 1007 new_submit->in_syncobjs = vk_zalloc(&queue->device->vk.alloc, 1008 (nr_in_syncobjs - new_submit->wait_timeline_count) * 1009 sizeof(*new_submit->in_syncobjs), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1010 1011 if (new_submit->in_syncobjs == NULL) { 1012 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); 1013 goto fail_in_syncobjs; 1014 } 1015 1016 /* Allocate with signal timeline semaphores considered */ 1017 new_submit->out_syncobjs = vk_zalloc(&queue->device->vk.alloc, 1018 nr_out_syncobjs * sizeof(*new_submit->out_syncobjs), 8, 1019 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1020 1021 if (new_submit->out_syncobjs == NULL) { 1022 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); 1023 goto fail_out_syncobjs; 1024 } 1025 1026 new_submit->entry_count = entry_count; 1027 new_submit->nr_in_syncobjs = nr_in_syncobjs; 1028 new_submit->nr_out_syncobjs = nr_out_syncobjs; 1029 new_submit->last_submit = last_submit; 1030 new_submit->counter_pass_index = perf_info ? perf_info->counterPassIndex : ~0; 1031 1032 list_inithead(&new_submit->link); 1033 1034 *submit = new_submit; 1035 1036 return VK_SUCCESS; 1037 1038fail_out_syncobjs: 1039 vk_free(&queue->device->vk.alloc, new_submit->in_syncobjs); 1040fail_in_syncobjs: 1041 if (new_submit->cmd_buffer_trace_data) 1042 tu_u_trace_cmd_data_finish(queue->device, new_submit->cmd_buffer_trace_data, 1043 new_submit->cmd_buffer_count); 1044fail_copy_timestamp_cs: 1045 vk_free(&queue->device->vk.alloc, new_submit->cmd_buffer_trace_data); 1046fail_cmd_trace_data: 1047 vk_free(&queue->device->vk.alloc, new_submit->cmds); 1048fail_cmds: 1049fail_signal_timelines: 1050fail_wait_timelines: 1051 vk_free(&queue->device->vk.alloc, new_submit->signal_semaphores); 1052fail_signal_semaphores: 1053 vk_free(&queue->device->vk.alloc, new_submit->wait_semaphores); 1054fail_wait_semaphores: 1055 vk_free(&queue->device->vk.alloc, new_submit->cmd_buffers); 1056fail_cmd_buffers: 1057 return result; 1058} 1059 1060static void 1061tu_queue_submit_free(struct tu_queue *queue, struct tu_queue_submit *submit) 1062{ 1063 vk_free(&queue->device->vk.alloc, submit->wait_semaphores); 1064 vk_free(&queue->device->vk.alloc, submit->signal_semaphores); 1065 1066 vk_free(&queue->device->vk.alloc, submit->wait_timelines); 1067 vk_free(&queue->device->vk.alloc, submit->wait_timeline_values); 1068 vk_free(&queue->device->vk.alloc, submit->signal_timelines); 1069 vk_free(&queue->device->vk.alloc, submit->signal_timeline_values); 1070 1071 vk_free(&queue->device->vk.alloc, submit->cmds); 1072 vk_free(&queue->device->vk.alloc, submit->in_syncobjs); 1073 vk_free(&queue->device->vk.alloc, submit->out_syncobjs); 1074 vk_free(&queue->device->vk.alloc, submit->cmd_buffers); 1075 vk_free(&queue->device->vk.alloc, submit); 1076} 1077 1078static void 1079tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue, 1080 struct tu_queue_submit *submit) 1081{ 1082 struct drm_msm_gem_submit_cmd *cmds = submit->cmds; 1083 1084 uint32_t entry_idx = 0; 1085 for (uint32_t j = 0; j < submit->cmd_buffer_count; ++j) { 1086 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[j]); 1087 struct tu_cs *cs = &cmdbuf->cs; 1088 struct tu_device *dev = queue->device; 1089 1090 if (submit->counter_pass_index != ~0) { 1091 struct tu_cs_entry *perf_cs_entry = 1092 &dev->perfcntrs_pass_cs_entries[submit->counter_pass_index]; 1093 1094 cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF; 1095 cmds[entry_idx].submit_idx = 1096 dev->bo_idx[perf_cs_entry->bo->gem_handle]; 1097 cmds[entry_idx].submit_offset = perf_cs_entry->offset; 1098 cmds[entry_idx].size = perf_cs_entry->size; 1099 cmds[entry_idx].pad = 0; 1100 cmds[entry_idx].nr_relocs = 0; 1101 cmds[entry_idx++].relocs = 0; 1102 } 1103 1104 for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) { 1105 cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF; 1106 cmds[entry_idx].submit_idx = 1107 dev->bo_idx[cs->entries[i].bo->gem_handle]; 1108 cmds[entry_idx].submit_offset = cs->entries[i].offset; 1109 cmds[entry_idx].size = cs->entries[i].size; 1110 cmds[entry_idx].pad = 0; 1111 cmds[entry_idx].nr_relocs = 0; 1112 cmds[entry_idx].relocs = 0; 1113 } 1114 1115 if (submit->cmd_buffer_trace_data) { 1116 struct tu_cs *ts_cs = submit->cmd_buffer_trace_data[j].timestamp_copy_cs; 1117 if (ts_cs) { 1118 cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF; 1119 cmds[entry_idx].submit_idx = 1120 queue->device->bo_idx[ts_cs->entries[0].bo->gem_handle]; 1121 1122 assert(cmds[entry_idx].submit_idx < queue->device->bo_count); 1123 1124 cmds[entry_idx].submit_offset = ts_cs->entries[0].offset; 1125 cmds[entry_idx].size = ts_cs->entries[0].size; 1126 cmds[entry_idx].pad = 0; 1127 cmds[entry_idx].nr_relocs = 0; 1128 cmds[entry_idx++].relocs = 0; 1129 } 1130 } 1131 } 1132} 1133 1134static VkResult 1135tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit) 1136{ 1137 queue->device->submit_count++; 1138 1139#if HAVE_PERFETTO 1140 tu_perfetto_submit(queue->device, queue->device->submit_count); 1141#endif 1142 1143 uint32_t flags = MSM_PIPE_3D0; 1144 1145 if (submit->nr_in_syncobjs) 1146 flags |= MSM_SUBMIT_SYNCOBJ_IN; 1147 1148 if (submit->nr_out_syncobjs) 1149 flags |= MSM_SUBMIT_SYNCOBJ_OUT; 1150 1151 if (submit->last_submit) 1152 flags |= MSM_SUBMIT_FENCE_FD_OUT; 1153 1154 mtx_lock(&queue->device->bo_mutex); 1155 1156 /* drm_msm_gem_submit_cmd requires index of bo which could change at any 1157 * time when bo_mutex is not locked. So we build submit cmds here the real 1158 * place to submit. 1159 */ 1160 tu_queue_build_msm_gem_submit_cmds(queue, submit); 1161 1162 struct drm_msm_gem_submit req = { 1163 .flags = flags, 1164 .queueid = queue->msm_queue_id, 1165 .bos = (uint64_t)(uintptr_t) queue->device->bo_list, 1166 .nr_bos = queue->device->bo_count, 1167 .cmds = (uint64_t)(uintptr_t)submit->cmds, 1168 .nr_cmds = submit->entry_count, 1169 .in_syncobjs = (uint64_t)(uintptr_t)submit->in_syncobjs, 1170 .out_syncobjs = (uint64_t)(uintptr_t)submit->out_syncobjs, 1171 .nr_in_syncobjs = submit->nr_in_syncobjs - submit->wait_timeline_count, 1172 .nr_out_syncobjs = submit->nr_out_syncobjs, 1173 .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj), 1174 }; 1175 1176 int ret = drmCommandWriteRead(queue->device->fd, 1177 DRM_MSM_GEM_SUBMIT, 1178 &req, sizeof(req)); 1179 1180 mtx_unlock(&queue->device->bo_mutex); 1181 1182 if (ret) 1183 return tu_device_set_lost(queue->device, "submit failed: %s\n", 1184 strerror(errno)); 1185 1186 /* restore permanent payload on wait */ 1187 for (uint32_t i = 0; i < submit->wait_semaphore_count; i++) { 1188 TU_FROM_HANDLE(tu_syncobj, sem, submit->wait_semaphores[i]); 1189 if(sem->type == TU_SEMAPHORE_BINARY) 1190 sync_set_temporary(queue->device, sem, 0); 1191 } 1192 1193 if (submit->last_submit) { 1194 if (queue->fence >= 0) 1195 close(queue->fence); 1196 queue->fence = req.fence_fd; 1197 } 1198 1199 /* Update highest_submitted values in the timeline. */ 1200 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) { 1201 struct tu_syncobj *sem = submit->signal_timelines[i]; 1202 uint64_t signal_value = submit->signal_timeline_values[i]; 1203 1204 assert(signal_value > sem->timeline.highest_submitted); 1205 1206 sem->timeline.highest_submitted = signal_value; 1207 } 1208 1209 if (submit->cmd_buffer_trace_data) { 1210 struct tu_u_trace_flush_data *flush_data = 1211 vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_flush_data), 1212 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1213 flush_data->submission_id = queue->device->submit_count; 1214 flush_data->syncobj = 1215 vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj), 1216 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1217 flush_data->syncobj->fence = req.fence; 1218 flush_data->syncobj->msm_queue_id = queue->msm_queue_id; 1219 1220 flush_data->cmd_trace_data = submit->cmd_buffer_trace_data; 1221 flush_data->trace_count = submit->cmd_buffer_count; 1222 submit->cmd_buffer_trace_data = NULL; 1223 1224 for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) { 1225 bool free_data = i == (submit->cmd_buffer_count - 1); 1226 u_trace_flush(flush_data->cmd_trace_data[i].trace, flush_data, free_data); 1227 } 1228 } 1229 1230 pthread_cond_broadcast(&queue->device->timeline_cond); 1231 1232 return VK_SUCCESS; 1233} 1234 1235 1236static bool 1237tu_queue_submit_ready_locked(struct tu_queue_submit *submit) 1238{ 1239 for (uint32_t i = 0; i < submit->wait_timeline_count; i++) { 1240 if (submit->wait_timeline_values[i] > 1241 submit->wait_timelines[i]->timeline.highest_submitted) { 1242 return false; 1243 } 1244 } 1245 1246 return true; 1247} 1248 1249static VkResult 1250tu_timeline_add_point_locked(struct tu_device *device, 1251 struct tu_timeline *timeline, 1252 uint64_t value, 1253 struct tu_timeline_point **point) 1254{ 1255 1256 if (list_is_empty(&timeline->free_points)) { 1257 *point = vk_zalloc(&device->vk.alloc, sizeof(**point), 8, 1258 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1259 1260 if (!(*point)) 1261 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1262 1263 struct drm_syncobj_create create = {}; 1264 1265 int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create); 1266 if (ret) { 1267 vk_free(&device->vk.alloc, *point); 1268 return vk_error(device, VK_ERROR_DEVICE_LOST); 1269 } 1270 1271 (*point)->syncobj = create.handle; 1272 1273 } else { 1274 *point = list_first_entry(&timeline->free_points, 1275 struct tu_timeline_point, link); 1276 list_del(&(*point)->link); 1277 } 1278 1279 (*point)->value = value; 1280 list_addtail(&(*point)->link, &timeline->points); 1281 1282 return VK_SUCCESS; 1283} 1284 1285static VkResult 1286tu_queue_submit_timeline_locked(struct tu_queue *queue, 1287 struct tu_queue_submit *submit) 1288{ 1289 VkResult result; 1290 uint32_t timeline_idx = 1291 submit->nr_out_syncobjs - submit->signal_timeline_count; 1292 1293 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) { 1294 struct tu_timeline *timeline = &submit->signal_timelines[i]->timeline; 1295 uint64_t signal_value = submit->signal_timeline_values[i]; 1296 struct tu_timeline_point *point; 1297 1298 result = tu_timeline_add_point_locked(queue->device, timeline, 1299 signal_value, &point); 1300 if (result != VK_SUCCESS) 1301 return result; 1302 1303 submit->out_syncobjs[timeline_idx + i] = 1304 (struct drm_msm_gem_submit_syncobj) { 1305 .handle = point->syncobj, 1306 .flags = 0, 1307 }; 1308 } 1309 1310 return tu_queue_submit_locked(queue, submit); 1311} 1312 1313static VkResult 1314tu_queue_submit_deferred_locked(struct tu_queue *queue, uint32_t *advance) 1315{ 1316 VkResult result = VK_SUCCESS; 1317 1318 list_for_each_entry_safe(struct tu_queue_submit, submit, 1319 &queue->queued_submits, link) { 1320 if (!tu_queue_submit_ready_locked(submit)) 1321 break; 1322 1323 (*advance)++; 1324 1325 result = tu_queue_submit_timeline_locked(queue, submit); 1326 1327 list_del(&submit->link); 1328 tu_queue_submit_free(queue, submit); 1329 1330 if (result != VK_SUCCESS) 1331 break; 1332 } 1333 1334 return result; 1335} 1336 1337VkResult 1338tu_device_submit_deferred_locked(struct tu_device *dev) 1339{ 1340 VkResult result = VK_SUCCESS; 1341 1342 uint32_t advance = 0; 1343 do { 1344 advance = 0; 1345 for (uint32_t i = 0; i < dev->queue_count[0]; i++) { 1346 /* Try again if there's signaled submission. */ 1347 result = tu_queue_submit_deferred_locked(&dev->queues[0][i], 1348 &advance); 1349 if (result != VK_SUCCESS) 1350 return result; 1351 } 1352 1353 } while(advance); 1354 1355 return result; 1356} 1357 1358static inline void 1359get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) 1360{ 1361 struct timespec t; 1362 clock_gettime(CLOCK_MONOTONIC, &t); 1363 tv->tv_sec = t.tv_sec + ns / 1000000000; 1364 tv->tv_nsec = t.tv_nsec + ns % 1000000000; 1365} 1366 1367VkResult 1368tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj) 1369{ 1370 struct drm_msm_wait_fence req = { 1371 .fence = syncobj->fence, 1372 .queueid = syncobj->msm_queue_id, 1373 }; 1374 int ret; 1375 1376 get_abs_timeout(&req.timeout, 1000000000); 1377 1378 ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req)); 1379 if (ret && (ret != -ETIMEDOUT)) { 1380 fprintf(stderr, "wait-fence failed! %d (%s)", ret, strerror(errno)); 1381 return VK_TIMEOUT; 1382 } 1383 1384 return VK_SUCCESS; 1385} 1386 1387VKAPI_ATTR VkResult VKAPI_CALL 1388tu_QueueSubmit(VkQueue _queue, 1389 uint32_t submitCount, 1390 const VkSubmitInfo *pSubmits, 1391 VkFence _fence) 1392{ 1393 TU_FROM_HANDLE(tu_queue, queue, _queue); 1394 TU_FROM_HANDLE(tu_syncobj, fence, _fence); 1395 1396 for (uint32_t i = 0; i < submitCount; ++i) { 1397 const VkSubmitInfo *submit = pSubmits + i; 1398 const bool last_submit = (i == submitCount - 1); 1399 uint32_t out_syncobjs_size = submit->signalSemaphoreCount; 1400 1401 const VkPerformanceQuerySubmitInfoKHR *perf_info = 1402 vk_find_struct_const(pSubmits[i].pNext, 1403 PERFORMANCE_QUERY_SUBMIT_INFO_KHR); 1404 1405 if (last_submit && fence) 1406 out_syncobjs_size += 1; 1407 1408 pthread_mutex_lock(&queue->device->submit_mutex); 1409 struct tu_queue_submit *submit_req = NULL; 1410 1411 VkResult ret = tu_queue_submit_create_locked(queue, submit, 1412 submit->waitSemaphoreCount, out_syncobjs_size, 1413 last_submit, perf_info, &submit_req); 1414 1415 if (ret != VK_SUCCESS) { 1416 pthread_mutex_unlock(&queue->device->submit_mutex); 1417 return ret; 1418 } 1419 1420 /* note: assuming there won't be any very large semaphore counts */ 1421 struct drm_msm_gem_submit_syncobj *in_syncobjs = submit_req->in_syncobjs; 1422 struct drm_msm_gem_submit_syncobj *out_syncobjs = submit_req->out_syncobjs; 1423 uint32_t nr_in_syncobjs = 0, nr_out_syncobjs = 0; 1424 1425 for (uint32_t i = 0; i < submit->waitSemaphoreCount; i++) { 1426 TU_FROM_HANDLE(tu_syncobj, sem, submit->pWaitSemaphores[i]); 1427 if (sem->type == TU_SEMAPHORE_TIMELINE) 1428 continue; 1429 1430 in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) { 1431 .handle = sem->binary.temporary ?: sem->binary.permanent, 1432 .flags = MSM_SUBMIT_SYNCOBJ_RESET, 1433 }; 1434 } 1435 1436 for (uint32_t i = 0; i < submit->signalSemaphoreCount; i++) { 1437 TU_FROM_HANDLE(tu_syncobj, sem, submit->pSignalSemaphores[i]); 1438 1439 /* In case of timeline semaphores, we can defer the creation of syncobj 1440 * and adding it at real submit time. 1441 */ 1442 if (sem->type == TU_SEMAPHORE_TIMELINE) 1443 continue; 1444 1445 out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) { 1446 .handle = sem->binary.temporary ?: sem->binary.permanent, 1447 .flags = 0, 1448 }; 1449 } 1450 1451 if (last_submit && fence) { 1452 out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) { 1453 .handle = fence->binary.temporary ?: fence->binary.permanent, 1454 .flags = 0, 1455 }; 1456 } 1457 1458 /* Queue the current submit */ 1459 list_addtail(&submit_req->link, &queue->queued_submits); 1460 ret = tu_device_submit_deferred_locked(queue->device); 1461 1462 pthread_mutex_unlock(&queue->device->submit_mutex); 1463 if (ret != VK_SUCCESS) 1464 return ret; 1465 } 1466 1467 if (!submitCount && fence) { 1468 /* signal fence imemediately since we don't have a submit to do it */ 1469 drmIoctl(queue->device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) { 1470 .handles = (uintptr_t) (uint32_t[]) { fence->binary.temporary ?: fence->binary.permanent }, 1471 .count_handles = 1, 1472 }); 1473 } 1474 1475 return VK_SUCCESS; 1476} 1477 1478VKAPI_ATTR VkResult VKAPI_CALL 1479tu_CreateFence(VkDevice device, 1480 const VkFenceCreateInfo *info, 1481 const VkAllocationCallbacks *pAllocator, 1482 VkFence *pFence) 1483{ 1484 return sync_create(device, info->flags & VK_FENCE_CREATE_SIGNALED_BIT, true, true, 0, 1485 pAllocator, (void**) pFence); 1486} 1487 1488VKAPI_ATTR void VKAPI_CALL 1489tu_DestroyFence(VkDevice device, VkFence fence, const VkAllocationCallbacks *pAllocator) 1490{ 1491 TU_FROM_HANDLE(tu_syncobj, sync, fence); 1492 sync_destroy(device, sync, pAllocator); 1493} 1494 1495VKAPI_ATTR VkResult VKAPI_CALL 1496tu_ImportFenceFdKHR(VkDevice device, const VkImportFenceFdInfoKHR *info) 1497{ 1498 TU_FROM_HANDLE(tu_syncobj, sync, info->fence); 1499 return sync_import(device, sync, info->flags & VK_FENCE_IMPORT_TEMPORARY_BIT, 1500 info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, info->fd); 1501} 1502 1503VKAPI_ATTR VkResult VKAPI_CALL 1504tu_GetFenceFdKHR(VkDevice device, const VkFenceGetFdInfoKHR *info, int *pFd) 1505{ 1506 TU_FROM_HANDLE(tu_syncobj, sync, info->fence); 1507 return sync_export(device, sync, 1508 info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, pFd); 1509} 1510 1511static VkResult 1512drm_syncobj_wait(struct tu_device *device, 1513 const uint32_t *handles, uint32_t count_handles, 1514 int64_t timeout_nsec, bool wait_all) 1515{ 1516 int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &(struct drm_syncobj_wait) { 1517 .handles = (uint64_t) (uintptr_t) handles, 1518 .count_handles = count_handles, 1519 .timeout_nsec = timeout_nsec, 1520 .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | 1521 COND(wait_all, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) 1522 }); 1523 if (ret) { 1524 if (errno == ETIME) 1525 return VK_TIMEOUT; 1526 1527 assert(0); 1528 return VK_ERROR_DEVICE_LOST; /* TODO */ 1529 } 1530 return VK_SUCCESS; 1531} 1532 1533static uint64_t 1534gettime_ns(void) 1535{ 1536 struct timespec current; 1537 clock_gettime(CLOCK_MONOTONIC, ¤t); 1538 return (uint64_t)current.tv_sec * 1000000000 + current.tv_nsec; 1539} 1540 1541/* and the kernel converts it right back to relative timeout - very smart UAPI */ 1542static uint64_t 1543absolute_timeout(uint64_t timeout) 1544{ 1545 if (timeout == 0) 1546 return 0; 1547 uint64_t current_time = gettime_ns(); 1548 uint64_t max_timeout = (uint64_t) INT64_MAX - current_time; 1549 1550 timeout = MIN2(max_timeout, timeout); 1551 1552 return (current_time + timeout); 1553} 1554 1555VKAPI_ATTR VkResult VKAPI_CALL 1556tu_WaitForFences(VkDevice _device, 1557 uint32_t fenceCount, 1558 const VkFence *pFences, 1559 VkBool32 waitAll, 1560 uint64_t timeout) 1561{ 1562 TU_FROM_HANDLE(tu_device, device, _device); 1563 1564 if (tu_device_is_lost(device)) 1565 return VK_ERROR_DEVICE_LOST; 1566 1567 uint32_t handles[fenceCount]; 1568 for (unsigned i = 0; i < fenceCount; ++i) { 1569 TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]); 1570 handles[i] = fence->binary.temporary ?: fence->binary.permanent; 1571 } 1572 1573 return drm_syncobj_wait(device, handles, fenceCount, absolute_timeout(timeout), waitAll); 1574} 1575 1576VKAPI_ATTR VkResult VKAPI_CALL 1577tu_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences) 1578{ 1579 TU_FROM_HANDLE(tu_device, device, _device); 1580 int ret; 1581 1582 uint32_t handles[fenceCount]; 1583 for (unsigned i = 0; i < fenceCount; ++i) { 1584 TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]); 1585 sync_set_temporary(device, fence, 0); 1586 handles[i] = fence->binary.permanent; 1587 } 1588 1589 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_RESET, &(struct drm_syncobj_array) { 1590 .handles = (uint64_t) (uintptr_t) handles, 1591 .count_handles = fenceCount, 1592 }); 1593 if (ret) { 1594 tu_device_set_lost(device, "DRM_IOCTL_SYNCOBJ_RESET failure: %s", 1595 strerror(errno)); 1596 } 1597 1598 return VK_SUCCESS; 1599} 1600 1601VKAPI_ATTR VkResult VKAPI_CALL 1602tu_GetFenceStatus(VkDevice _device, VkFence _fence) 1603{ 1604 TU_FROM_HANDLE(tu_device, device, _device); 1605 TU_FROM_HANDLE(tu_syncobj, fence, _fence); 1606 VkResult result; 1607 1608 result = drm_syncobj_wait(device, (uint32_t[]){fence->binary.temporary ?: fence->binary.permanent}, 1, 0, false); 1609 if (result == VK_TIMEOUT) 1610 result = VK_NOT_READY; 1611 return result; 1612} 1613 1614int 1615tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2) 1616{ 1617 uint32_t handles[2], count = 0; 1618 if (fence1) 1619 handles[count++] = fence1->binary.temporary ?: fence1->binary.permanent; 1620 1621 if (fence2) 1622 handles[count++] = fence2->binary.temporary ?: fence2->binary.permanent; 1623 1624 if (!count) 1625 return 0; 1626 1627 return drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) { 1628 .handles = (uintptr_t) handles, 1629 .count_handles = count 1630 }); 1631} 1632 1633int 1634tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync) 1635{ 1636 struct drm_syncobj_handle handle = { .handle = sync->binary.permanent }; 1637 int ret; 1638 1639 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle); 1640 1641 return ret ? -1 : handle.fd; 1642} 1643 1644static VkResult 1645tu_timeline_gc_locked(struct tu_device *dev, struct tu_timeline *timeline) 1646{ 1647 VkResult result = VK_SUCCESS; 1648 1649 /* Go through every point in the timeline and check if any signaled point */ 1650 list_for_each_entry_safe(struct tu_timeline_point, point, 1651 &timeline->points, link) { 1652 1653 /* If the value of the point is higher than highest_submitted, 1654 * the point has not been submited yet. 1655 */ 1656 if (point->wait_count || point->value > timeline->highest_submitted) 1657 return VK_SUCCESS; 1658 1659 result = drm_syncobj_wait(dev, (uint32_t[]){point->syncobj}, 1, 0, true); 1660 1661 if (result == VK_TIMEOUT) { 1662 /* This means the syncobj is still busy and it should wait 1663 * with timeout specified by users via vkWaitSemaphores. 1664 */ 1665 result = VK_SUCCESS; 1666 } else { 1667 timeline->highest_signaled = 1668 MAX2(timeline->highest_signaled, point->value); 1669 list_del(&point->link); 1670 list_add(&point->link, &timeline->free_points); 1671 } 1672 } 1673 1674 return result; 1675} 1676 1677 1678static VkResult 1679tu_timeline_wait_locked(struct tu_device *device, 1680 struct tu_timeline *timeline, 1681 uint64_t value, 1682 uint64_t abs_timeout) 1683{ 1684 VkResult result; 1685 1686 while(timeline->highest_submitted < value) { 1687 struct timespec abstime; 1688 timespec_from_nsec(&abstime, abs_timeout); 1689 1690 pthread_cond_timedwait(&device->timeline_cond, &device->submit_mutex, 1691 &abstime); 1692 1693 if (os_time_get_nano() >= abs_timeout && 1694 timeline->highest_submitted < value) 1695 return VK_TIMEOUT; 1696 } 1697 1698 /* Visit every point in the timeline and wait until 1699 * the highest_signaled reaches the value. 1700 */ 1701 while (1) { 1702 result = tu_timeline_gc_locked(device, timeline); 1703 if (result != VK_SUCCESS) 1704 return result; 1705 1706 if (timeline->highest_signaled >= value) 1707 return VK_SUCCESS; 1708 1709 struct tu_timeline_point *point = 1710 list_first_entry(&timeline->points, 1711 struct tu_timeline_point, link); 1712 1713 point->wait_count++; 1714 pthread_mutex_unlock(&device->submit_mutex); 1715 result = drm_syncobj_wait(device, (uint32_t[]){point->syncobj}, 1, 1716 abs_timeout, true); 1717 1718 pthread_mutex_lock(&device->submit_mutex); 1719 point->wait_count--; 1720 1721 if (result != VK_SUCCESS) 1722 return result; 1723 } 1724 1725 return result; 1726} 1727 1728static VkResult 1729tu_wait_timelines(struct tu_device *device, 1730 const VkSemaphoreWaitInfoKHR* pWaitInfo, 1731 uint64_t abs_timeout) 1732{ 1733 if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && 1734 pWaitInfo->semaphoreCount > 1) { 1735 pthread_mutex_lock(&device->submit_mutex); 1736 1737 /* Visit every timline semaphore in the queue until timeout */ 1738 while (1) { 1739 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) { 1740 TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]); 1741 VkResult result = tu_timeline_wait_locked(device, 1742 &semaphore->timeline, pWaitInfo->pValues[i], 0); 1743 1744 /* Returns result values including VK_SUCCESS except for VK_TIMEOUT */ 1745 if (result != VK_TIMEOUT) { 1746 pthread_mutex_unlock(&device->submit_mutex); 1747 return result; 1748 } 1749 } 1750 1751 if (os_time_get_nano() > abs_timeout) { 1752 pthread_mutex_unlock(&device->submit_mutex); 1753 return VK_TIMEOUT; 1754 } 1755 } 1756 } else { 1757 VkResult result = VK_SUCCESS; 1758 1759 pthread_mutex_lock(&device->submit_mutex); 1760 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) { 1761 TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]); 1762 assert(semaphore->type == TU_SEMAPHORE_TIMELINE); 1763 1764 result = tu_timeline_wait_locked(device, &semaphore->timeline, 1765 pWaitInfo->pValues[i], abs_timeout); 1766 if (result != VK_SUCCESS) 1767 break; 1768 } 1769 pthread_mutex_unlock(&device->submit_mutex); 1770 1771 return result; 1772 } 1773} 1774 1775 1776VKAPI_ATTR VkResult VKAPI_CALL 1777tu_GetSemaphoreCounterValue(VkDevice _device, 1778 VkSemaphore _semaphore, 1779 uint64_t* pValue) 1780{ 1781 TU_FROM_HANDLE(tu_device, device, _device); 1782 TU_FROM_HANDLE(tu_syncobj, semaphore, _semaphore); 1783 1784 assert(semaphore->type == TU_SEMAPHORE_TIMELINE); 1785 1786 VkResult result; 1787 1788 pthread_mutex_lock(&device->submit_mutex); 1789 1790 result = tu_timeline_gc_locked(device, &semaphore->timeline); 1791 *pValue = semaphore->timeline.highest_signaled; 1792 1793 pthread_mutex_unlock(&device->submit_mutex); 1794 1795 return result; 1796} 1797 1798 1799VKAPI_ATTR VkResult VKAPI_CALL 1800tu_WaitSemaphores(VkDevice _device, 1801 const VkSemaphoreWaitInfoKHR* pWaitInfo, 1802 uint64_t timeout) 1803{ 1804 TU_FROM_HANDLE(tu_device, device, _device); 1805 1806 return tu_wait_timelines(device, pWaitInfo, absolute_timeout(timeout)); 1807} 1808 1809VKAPI_ATTR VkResult VKAPI_CALL 1810tu_SignalSemaphore(VkDevice _device, 1811 const VkSemaphoreSignalInfoKHR* pSignalInfo) 1812{ 1813 TU_FROM_HANDLE(tu_device, device, _device); 1814 TU_FROM_HANDLE(tu_syncobj, semaphore, pSignalInfo->semaphore); 1815 VkResult result; 1816 1817 assert(semaphore->type == TU_SEMAPHORE_TIMELINE); 1818 1819 pthread_mutex_lock(&device->submit_mutex); 1820 1821 result = tu_timeline_gc_locked(device, &semaphore->timeline); 1822 if (result != VK_SUCCESS) { 1823 pthread_mutex_unlock(&device->submit_mutex); 1824 return result; 1825 } 1826 1827 semaphore->timeline.highest_submitted = pSignalInfo->value; 1828 semaphore->timeline.highest_signaled = pSignalInfo->value; 1829 1830 result = tu_device_submit_deferred_locked(device); 1831 1832 pthread_cond_broadcast(&device->timeline_cond); 1833 pthread_mutex_unlock(&device->submit_mutex); 1834 1835 return result; 1836} 1837 1838#ifdef ANDROID 1839#include <libsync.h> 1840 1841VKAPI_ATTR VkResult VKAPI_CALL 1842tu_QueueSignalReleaseImageANDROID(VkQueue _queue, 1843 uint32_t waitSemaphoreCount, 1844 const VkSemaphore *pWaitSemaphores, 1845 VkImage image, 1846 int *pNativeFenceFd) 1847{ 1848 TU_FROM_HANDLE(tu_queue, queue, _queue); 1849 VkResult result = VK_SUCCESS; 1850 1851 if (waitSemaphoreCount == 0) { 1852 if (pNativeFenceFd) 1853 *pNativeFenceFd = -1; 1854 return VK_SUCCESS; 1855 } 1856 1857 int fd = -1; 1858 1859 for (uint32_t i = 0; i < waitSemaphoreCount; ++i) { 1860 int tmp_fd; 1861 result = tu_GetSemaphoreFdKHR( 1862 tu_device_to_handle(queue->device), 1863 &(VkSemaphoreGetFdInfoKHR) { 1864 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, 1865 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, 1866 .semaphore = pWaitSemaphores[i], 1867 }, 1868 &tmp_fd); 1869 if (result != VK_SUCCESS) { 1870 if (fd >= 0) 1871 close(fd); 1872 return result; 1873 } 1874 1875 if (fd < 0) 1876 fd = tmp_fd; 1877 else if (tmp_fd >= 0) { 1878 sync_accumulate("tu", &fd, tmp_fd); 1879 close(tmp_fd); 1880 } 1881 } 1882 1883 if (pNativeFenceFd) { 1884 *pNativeFenceFd = fd; 1885 } else if (fd >= 0) { 1886 close(fd); 1887 /* We still need to do the exports, to reset the semaphores, but 1888 * otherwise we don't wait on them. */ 1889 } 1890 return VK_SUCCESS; 1891} 1892#endif 1893