1/*
2 * Copyright © 2018 Google, Inc.
3 * Copyright © 2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <errno.h>
26#include <fcntl.h>
27#include <stdint.h>
28#include <sys/ioctl.h>
29#include <sys/mman.h>
30#include <xf86drm.h>
31
32#include "vk_util.h"
33
34#include "drm-uapi/msm_drm.h"
35#include "util/timespec.h"
36#include "util/os_time.h"
37#include "util/perf/u_trace.h"
38
39#include "tu_private.h"
40
41#include "tu_cs.h"
42
43struct tu_binary_syncobj {
44   uint32_t permanent, temporary;
45};
46
47struct tu_timeline_point {
48   struct list_head link;
49
50   uint64_t value;
51   uint32_t syncobj;
52   uint32_t wait_count;
53};
54
55struct tu_timeline {
56   uint64_t highest_submitted;
57   uint64_t highest_signaled;
58
59   /* A timeline can have multiple timeline points */
60   struct list_head points;
61
62   /* A list containing points that has been already submited.
63    * A point will be moved to 'points' when new point is required
64    * at submit time.
65    */
66   struct list_head free_points;
67};
68
69typedef enum {
70   TU_SEMAPHORE_BINARY,
71   TU_SEMAPHORE_TIMELINE,
72} tu_semaphore_type;
73
74
75struct tu_syncobj {
76   struct vk_object_base base;
77
78   tu_semaphore_type type;
79   union {
80      struct tu_binary_syncobj binary;
81      struct tu_timeline timeline;
82   };
83};
84
85struct tu_queue_submit
86{
87   struct   list_head link;
88
89   VkCommandBuffer *cmd_buffers;
90   struct tu_u_trace_cmd_data *cmd_buffer_trace_data;
91   uint32_t cmd_buffer_count;
92
93   struct   tu_syncobj **wait_semaphores;
94   uint32_t wait_semaphore_count;
95   struct   tu_syncobj **signal_semaphores;
96   uint32_t signal_semaphore_count;
97
98   struct   tu_syncobj **wait_timelines;
99   uint64_t *wait_timeline_values;
100   uint32_t wait_timeline_count;
101   uint32_t wait_timeline_array_length;
102
103   struct   tu_syncobj **signal_timelines;
104   uint64_t *signal_timeline_values;
105   uint32_t signal_timeline_count;
106   uint32_t signal_timeline_array_length;
107
108   struct   drm_msm_gem_submit_cmd *cmds;
109   struct   drm_msm_gem_submit_syncobj *in_syncobjs;
110   uint32_t nr_in_syncobjs;
111   struct   drm_msm_gem_submit_syncobj *out_syncobjs;
112   uint32_t nr_out_syncobjs;
113
114   bool     last_submit;
115   uint32_t entry_count;
116   uint32_t counter_pass_index;
117};
118
119struct tu_u_trace_syncobj
120{
121   uint32_t msm_queue_id;
122   uint32_t fence;
123};
124
125static int
126tu_drm_get_param(const struct tu_physical_device *dev,
127                 uint32_t param,
128                 uint64_t *value)
129{
130   /* Technically this requires a pipe, but the kernel only supports one pipe
131    * anyway at the time of writing and most of these are clearly pipe
132    * independent. */
133   struct drm_msm_param req = {
134      .pipe = MSM_PIPE_3D0,
135      .param = param,
136   };
137
138   int ret = drmCommandWriteRead(dev->local_fd, DRM_MSM_GET_PARAM, &req,
139                                 sizeof(req));
140   if (ret)
141      return ret;
142
143   *value = req.value;
144
145   return 0;
146}
147
148static int
149tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id)
150{
151   uint64_t value;
152   int ret = tu_drm_get_param(dev, MSM_PARAM_GPU_ID, &value);
153   if (ret)
154      return ret;
155
156   *id = value;
157   return 0;
158}
159
160static int
161tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size)
162{
163   uint64_t value;
164   int ret = tu_drm_get_param(dev, MSM_PARAM_GMEM_SIZE, &value);
165   if (ret)
166      return ret;
167
168   *size = value;
169   return 0;
170}
171
172static int
173tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base)
174{
175   return tu_drm_get_param(dev, MSM_PARAM_GMEM_BASE, base);
176}
177
178int
179tu_drm_get_timestamp(struct tu_physical_device *device, uint64_t *ts)
180{
181   return tu_drm_get_param(device, MSM_PARAM_TIMESTAMP, ts);
182}
183
184int
185tu_drm_submitqueue_new(const struct tu_device *dev,
186                       int priority,
187                       uint32_t *queue_id)
188{
189   struct drm_msm_submitqueue req = {
190      .flags = 0,
191      .prio = priority,
192   };
193
194   int ret = drmCommandWriteRead(dev->fd,
195                                 DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
196   if (ret)
197      return ret;
198
199   *queue_id = req.id;
200   return 0;
201}
202
203void
204tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id)
205{
206   drmCommandWrite(dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
207                   &queue_id, sizeof(uint32_t));
208}
209
210static void
211tu_gem_close(const struct tu_device *dev, uint32_t gem_handle)
212{
213   struct drm_gem_close req = {
214      .handle = gem_handle,
215   };
216
217   drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
218}
219
220/** Helper for DRM_MSM_GEM_INFO, returns 0 on error. */
221static uint64_t
222tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info)
223{
224   struct drm_msm_gem_info req = {
225      .handle = gem_handle,
226      .info = info,
227   };
228
229   int ret = drmCommandWriteRead(dev->fd,
230                                 DRM_MSM_GEM_INFO, &req, sizeof(req));
231   if (ret < 0)
232      return 0;
233
234   return req.value;
235}
236
237static VkResult
238tu_bo_init(struct tu_device *dev,
239           struct tu_bo *bo,
240           uint32_t gem_handle,
241           uint64_t size,
242           bool dump)
243{
244   uint64_t iova = tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA);
245   if (!iova) {
246      tu_gem_close(dev, gem_handle);
247      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
248   }
249
250   *bo = (struct tu_bo) {
251      .gem_handle = gem_handle,
252      .size = size,
253      .iova = iova,
254   };
255
256   mtx_lock(&dev->bo_mutex);
257   uint32_t idx = dev->bo_count++;
258
259   /* grow the bo list if needed */
260   if (idx >= dev->bo_list_size) {
261      uint32_t new_len = idx + 64;
262      struct drm_msm_gem_submit_bo *new_ptr =
263         vk_realloc(&dev->vk.alloc, dev->bo_list, new_len * sizeof(*dev->bo_list),
264                    8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
265      if (!new_ptr)
266         goto fail_bo_list;
267
268      dev->bo_list = new_ptr;
269      dev->bo_list_size = new_len;
270   }
271
272   /* grow the "bo idx" list (maps gem handles to index in the bo list) */
273   if (bo->gem_handle >= dev->bo_idx_size) {
274      uint32_t new_len = bo->gem_handle + 256;
275      uint32_t *new_ptr =
276         vk_realloc(&dev->vk.alloc, dev->bo_idx, new_len * sizeof(*dev->bo_idx),
277                    8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
278      if (!new_ptr)
279         goto fail_bo_idx;
280
281      dev->bo_idx = new_ptr;
282      dev->bo_idx_size = new_len;
283   }
284
285   dev->bo_idx[bo->gem_handle] = idx;
286   dev->bo_list[idx] = (struct drm_msm_gem_submit_bo) {
287      .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
288               COND(dump, MSM_SUBMIT_BO_DUMP),
289      .handle = gem_handle,
290      .presumed = iova,
291   };
292   mtx_unlock(&dev->bo_mutex);
293
294   return VK_SUCCESS;
295
296fail_bo_idx:
297   vk_free(&dev->vk.alloc, dev->bo_list);
298fail_bo_list:
299   tu_gem_close(dev, gem_handle);
300   return VK_ERROR_OUT_OF_HOST_MEMORY;
301}
302
303VkResult
304tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
305               enum tu_bo_alloc_flags flags)
306{
307   /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c
308    * always sets `flags = MSM_BO_WC`, and we copy that behavior here.
309    */
310   struct drm_msm_gem_new req = {
311      .size = size,
312      .flags = MSM_BO_WC
313   };
314
315   if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
316      req.flags |= MSM_BO_GPU_READONLY;
317
318   int ret = drmCommandWriteRead(dev->fd,
319                                 DRM_MSM_GEM_NEW, &req, sizeof(req));
320   if (ret)
321      return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
322
323   return tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP);
324}
325
326VkResult
327tu_bo_init_dmabuf(struct tu_device *dev,
328                  struct tu_bo *bo,
329                  uint64_t size,
330                  int prime_fd)
331{
332   /* lseek() to get the real size */
333   off_t real_size = lseek(prime_fd, 0, SEEK_END);
334   lseek(prime_fd, 0, SEEK_SET);
335   if (real_size < 0 || (uint64_t) real_size < size)
336      return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
337
338   uint32_t gem_handle;
339   int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
340                                &gem_handle);
341   if (ret)
342      return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
343
344   return tu_bo_init(dev, bo, gem_handle, size, false);
345}
346
347int
348tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
349{
350   int prime_fd;
351   int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle,
352                                DRM_CLOEXEC, &prime_fd);
353
354   return ret == 0 ? prime_fd : -1;
355}
356
357VkResult
358tu_bo_map(struct tu_device *dev, struct tu_bo *bo)
359{
360   if (bo->map)
361      return VK_SUCCESS;
362
363   uint64_t offset = tu_gem_info(dev, bo->gem_handle, MSM_INFO_GET_OFFSET);
364   if (!offset)
365      return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
366
367   /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */
368   void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
369                    dev->fd, offset);
370   if (map == MAP_FAILED)
371      return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
372
373   bo->map = map;
374   return VK_SUCCESS;
375}
376
377void
378tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
379{
380   assert(bo->gem_handle);
381
382   if (bo->map)
383      munmap(bo->map, bo->size);
384
385   mtx_lock(&dev->bo_mutex);
386   uint32_t idx = dev->bo_idx[bo->gem_handle];
387   dev->bo_count--;
388   dev->bo_list[idx] = dev->bo_list[dev->bo_count];
389   dev->bo_idx[dev->bo_list[idx].handle] = idx;
390   mtx_unlock(&dev->bo_mutex);
391
392   tu_gem_close(dev, bo->gem_handle);
393}
394
395static VkResult
396tu_drm_device_init(struct tu_physical_device *device,
397                   struct tu_instance *instance,
398                   drmDevicePtr drm_device)
399{
400   const char *path = drm_device->nodes[DRM_NODE_RENDER];
401   VkResult result = VK_SUCCESS;
402   drmVersionPtr version;
403   int fd;
404   int master_fd = -1;
405
406   fd = open(path, O_RDWR | O_CLOEXEC);
407   if (fd < 0) {
408      return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
409                               "failed to open device %s", path);
410   }
411
412   /* Version 1.6 added SYNCOBJ support. */
413   const int min_version_major = 1;
414   const int min_version_minor = 6;
415
416   version = drmGetVersion(fd);
417   if (!version) {
418      close(fd);
419      return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
420                               "failed to query kernel driver version for device %s",
421                               path);
422   }
423
424   if (strcmp(version->name, "msm")) {
425      drmFreeVersion(version);
426      close(fd);
427      return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
428                               "device %s does not use the msm kernel driver",
429                               path);
430   }
431
432   if (version->version_major != min_version_major ||
433       version->version_minor < min_version_minor) {
434      result = vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
435                                 "kernel driver for device %s has version %d.%d, "
436                                 "but Vulkan requires version >= %d.%d",
437                                 path,
438                                 version->version_major, version->version_minor,
439                                 min_version_major, min_version_minor);
440      drmFreeVersion(version);
441      close(fd);
442      return result;
443   }
444
445   device->msm_major_version = version->version_major;
446   device->msm_minor_version = version->version_minor;
447
448   drmFreeVersion(version);
449
450   if (instance->debug_flags & TU_DEBUG_STARTUP)
451      mesa_logi("Found compatible device '%s'.", path);
452
453   device->instance = instance;
454
455   if (instance->vk.enabled_extensions.KHR_display) {
456      master_fd =
457         open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
458      if (master_fd >= 0) {
459         /* TODO: free master_fd is accel is not working? */
460      }
461   }
462
463   device->master_fd = master_fd;
464   device->local_fd = fd;
465
466   if (tu_drm_get_gpu_id(device, &device->dev_id.gpu_id)) {
467      result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
468                                 "could not get GPU ID");
469      goto fail;
470   }
471
472   if (tu_drm_get_param(device, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
473      result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
474                                 "could not get CHIP ID");
475      goto fail;
476   }
477
478   if (tu_drm_get_gmem_size(device, &device->gmem_size)) {
479      result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
480                                "could not get GMEM size");
481      goto fail;
482   }
483
484   if (tu_drm_get_gmem_base(device, &device->gmem_base)) {
485      result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
486                                 "could not get GMEM size");
487      goto fail;
488   }
489
490   device->heap.size = tu_get_system_heap_size();
491   device->heap.used = 0u;
492   device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
493
494   result = tu_physical_device_init(device, instance);
495   if (result == VK_SUCCESS)
496       return result;
497
498fail:
499   close(fd);
500   if (master_fd != -1)
501      close(master_fd);
502   return result;
503}
504
505VkResult
506tu_enumerate_devices(struct tu_instance *instance)
507{
508   /* TODO: Check for more devices ? */
509   drmDevicePtr devices[8];
510   VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
511   int max_devices;
512
513   instance->physical_device_count = 0;
514
515   max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
516
517   if (instance->debug_flags & TU_DEBUG_STARTUP) {
518      if (max_devices < 0)
519         mesa_logi("drmGetDevices2 returned error: %s\n", strerror(max_devices));
520      else
521         mesa_logi("Found %d drm nodes", max_devices);
522   }
523
524   if (max_devices < 1)
525      return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
526                               "No DRM devices found");
527
528   for (unsigned i = 0; i < (unsigned) max_devices; i++) {
529      if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
530          devices[i]->bustype == DRM_BUS_PLATFORM) {
531
532         result = tu_drm_device_init(
533            instance->physical_devices + instance->physical_device_count,
534            instance, devices[i]);
535         if (result == VK_SUCCESS)
536            ++instance->physical_device_count;
537         else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
538            break;
539      }
540   }
541   drmFreeDevices(devices, max_devices);
542
543   return result;
544}
545
546static void
547tu_timeline_finish(struct tu_device *device,
548                    struct tu_timeline *timeline)
549{
550   list_for_each_entry_safe(struct tu_timeline_point, point,
551                            &timeline->free_points, link) {
552      list_del(&point->link);
553      drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
554            &(struct drm_syncobj_destroy) { .handle = point->syncobj });
555
556      vk_free(&device->vk.alloc, point);
557   }
558   list_for_each_entry_safe(struct tu_timeline_point, point,
559                            &timeline->points, link) {
560      list_del(&point->link);
561      drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
562            &(struct drm_syncobj_destroy) { .handle = point->syncobj });
563      vk_free(&device->vk.alloc, point);
564   }
565}
566
567static VkResult
568sync_create(VkDevice _device,
569            bool signaled,
570            bool fence,
571            bool binary,
572            uint64_t timeline_value,
573            const VkAllocationCallbacks *pAllocator,
574            void **p_sync)
575{
576   TU_FROM_HANDLE(tu_device, device, _device);
577
578   struct tu_syncobj *sync =
579         vk_object_alloc(&device->vk, pAllocator, sizeof(*sync),
580                         fence ? VK_OBJECT_TYPE_FENCE : VK_OBJECT_TYPE_SEMAPHORE);
581   if (!sync)
582      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
583
584   if (binary) {
585      struct drm_syncobj_create create = {};
586      if (signaled)
587         create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
588
589      int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create);
590      if (ret) {
591         vk_free2(&device->vk.alloc, pAllocator, sync);
592         return VK_ERROR_OUT_OF_HOST_MEMORY;
593      }
594
595      sync->binary.permanent = create.handle;
596      sync->binary.temporary = 0;
597      sync->type = TU_SEMAPHORE_BINARY;
598   } else {
599      sync->type = TU_SEMAPHORE_TIMELINE;
600      sync->timeline.highest_signaled = sync->timeline.highest_submitted =
601             timeline_value;
602      list_inithead(&sync->timeline.points);
603      list_inithead(&sync->timeline.free_points);
604   }
605
606   *p_sync = sync;
607
608   return VK_SUCCESS;
609}
610
611static void
612sync_set_temporary(struct tu_device *device, struct tu_syncobj *sync, uint32_t syncobj)
613{
614   if (sync->binary.temporary) {
615      drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
616            &(struct drm_syncobj_destroy) { .handle = sync->binary.temporary });
617   }
618   sync->binary.temporary = syncobj;
619}
620
621static void
622sync_destroy(VkDevice _device, struct tu_syncobj *sync, const VkAllocationCallbacks *pAllocator)
623{
624   TU_FROM_HANDLE(tu_device, device, _device);
625
626   if (!sync)
627      return;
628
629   if (sync->type == TU_SEMAPHORE_BINARY) {
630      sync_set_temporary(device, sync, 0);
631      drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
632            &(struct drm_syncobj_destroy) { .handle = sync->binary.permanent });
633   } else {
634      tu_timeline_finish(device, &sync->timeline);
635   }
636
637   vk_object_free(&device->vk, pAllocator, sync);
638}
639
640static VkResult
641sync_import(VkDevice _device, struct tu_syncobj *sync, bool temporary, bool sync_fd, int fd)
642{
643   TU_FROM_HANDLE(tu_device, device, _device);
644   int ret;
645
646   if (!sync_fd) {
647      uint32_t *dst = temporary ? &sync->binary.temporary : &sync->binary.permanent;
648
649      struct drm_syncobj_handle handle = { .fd = fd };
650      ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle);
651      if (ret)
652         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
653
654      if (*dst) {
655         drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
656               &(struct drm_syncobj_destroy) { .handle = *dst });
657      }
658      *dst = handle.handle;
659      close(fd);
660   } else {
661      assert(temporary);
662
663      struct drm_syncobj_create create = {};
664
665      if (fd == -1)
666         create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
667
668      ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create);
669      if (ret)
670         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
671
672      if (fd != -1) {
673         ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &(struct drm_syncobj_handle) {
674            .fd = fd,
675            .handle = create.handle,
676            .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
677         });
678         if (ret) {
679            drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
680                  &(struct drm_syncobj_destroy) { .handle = create.handle });
681            return VK_ERROR_INVALID_EXTERNAL_HANDLE;
682         }
683         close(fd);
684      }
685
686      sync_set_temporary(device, sync, create.handle);
687   }
688
689   return VK_SUCCESS;
690}
691
692static VkResult
693sync_export(VkDevice _device, struct tu_syncobj *sync, bool sync_fd, int *p_fd)
694{
695   TU_FROM_HANDLE(tu_device, device, _device);
696
697   struct drm_syncobj_handle handle = {
698      .handle = sync->binary.temporary ?: sync->binary.permanent,
699      .flags = COND(sync_fd, DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE),
700      .fd = -1,
701   };
702   int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
703   if (ret)
704      return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
705
706   /* restore permanent payload on export */
707   sync_set_temporary(device, sync, 0);
708
709   *p_fd = handle.fd;
710   return VK_SUCCESS;
711}
712
713static VkSemaphoreTypeKHR
714get_semaphore_type(const void *pNext, uint64_t *initial_value)
715{
716   const VkSemaphoreTypeCreateInfoKHR *type_info =
717      vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
718
719   if (!type_info)
720      return VK_SEMAPHORE_TYPE_BINARY_KHR;
721
722   if (initial_value)
723      *initial_value = type_info->initialValue;
724   return type_info->semaphoreType;
725}
726
727VKAPI_ATTR VkResult VKAPI_CALL
728tu_CreateSemaphore(VkDevice device,
729                   const VkSemaphoreCreateInfo *pCreateInfo,
730                   const VkAllocationCallbacks *pAllocator,
731                   VkSemaphore *pSemaphore)
732{
733   uint64_t timeline_value = 0;
734   VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value);
735
736   return sync_create(device, false, false, (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR),
737                      timeline_value, pAllocator, (void**) pSemaphore);
738}
739
740VKAPI_ATTR void VKAPI_CALL
741tu_DestroySemaphore(VkDevice device, VkSemaphore sem, const VkAllocationCallbacks *pAllocator)
742{
743   TU_FROM_HANDLE(tu_syncobj, sync, sem);
744   sync_destroy(device, sync, pAllocator);
745}
746
747VKAPI_ATTR VkResult VKAPI_CALL
748tu_ImportSemaphoreFdKHR(VkDevice device, const VkImportSemaphoreFdInfoKHR *info)
749{
750   TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore);
751   return sync_import(device, sync, info->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
752         info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, info->fd);
753}
754
755VKAPI_ATTR VkResult VKAPI_CALL
756tu_GetSemaphoreFdKHR(VkDevice device, const VkSemaphoreGetFdInfoKHR *info, int *pFd)
757{
758   TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore);
759   return sync_export(device, sync,
760         info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, pFd);
761}
762
763VKAPI_ATTR void VKAPI_CALL
764tu_GetPhysicalDeviceExternalSemaphoreProperties(
765   VkPhysicalDevice physicalDevice,
766   const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
767   VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
768{
769   VkSemaphoreTypeKHR type = get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
770
771   if (type != VK_SEMAPHORE_TYPE_TIMELINE &&
772       (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
773       pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT )) {
774      pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
775      pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
776      pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
777         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
778   } else {
779      pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
780      pExternalSemaphoreProperties->compatibleHandleTypes = 0;
781      pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
782   }
783}
784
785static VkResult
786tu_queue_submit_add_timeline_wait_locked(struct tu_queue_submit* submit,
787                                         struct tu_device *device,
788                                         struct tu_syncobj *timeline,
789                                         uint64_t value)
790{
791   if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
792      uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
793
794      submit->wait_timelines = vk_realloc(&device->vk.alloc,
795            submit->wait_timelines,
796            new_len * sizeof(*submit->wait_timelines),
797            8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
798
799      if (submit->wait_timelines == NULL)
800         return VK_ERROR_OUT_OF_HOST_MEMORY;
801
802      submit->wait_timeline_values = vk_realloc(&device->vk.alloc,
803            submit->wait_timeline_values,
804            new_len * sizeof(*submit->wait_timeline_values),
805            8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
806
807      if (submit->wait_timeline_values == NULL) {
808         vk_free(&device->vk.alloc, submit->wait_timelines);
809         return VK_ERROR_OUT_OF_HOST_MEMORY;
810      }
811
812      submit->wait_timeline_array_length = new_len;
813   }
814
815   submit->wait_timelines[submit->wait_timeline_count] = timeline;
816   submit->wait_timeline_values[submit->wait_timeline_count] = value;
817
818   submit->wait_timeline_count++;
819
820   return VK_SUCCESS;
821}
822
823static VkResult
824tu_queue_submit_add_timeline_signal_locked(struct tu_queue_submit* submit,
825                                           struct tu_device *device,
826                                           struct tu_syncobj *timeline,
827                                           uint64_t value)
828{
829   if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
830      uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 32);
831
832      submit->signal_timelines = vk_realloc(&device->vk.alloc,
833            submit->signal_timelines,
834            new_len * sizeof(*submit->signal_timelines),
835            8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
836
837      if (submit->signal_timelines == NULL)
838         return VK_ERROR_OUT_OF_HOST_MEMORY;
839
840      submit->signal_timeline_values = vk_realloc(&device->vk.alloc,
841            submit->signal_timeline_values,
842            new_len * sizeof(*submit->signal_timeline_values),
843            8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
844
845      if (submit->signal_timeline_values == NULL) {
846         vk_free(&device->vk.alloc, submit->signal_timelines);
847         return VK_ERROR_OUT_OF_HOST_MEMORY;
848      }
849
850      submit->signal_timeline_array_length = new_len;
851   }
852
853   submit->signal_timelines[submit->signal_timeline_count] = timeline;
854   submit->signal_timeline_values[submit->signal_timeline_count] = value;
855
856   submit->signal_timeline_count++;
857
858   return VK_SUCCESS;
859}
860
861static VkResult
862tu_queue_submit_create_locked(struct tu_queue *queue,
863                              const VkSubmitInfo *submit_info,
864                              const uint32_t nr_in_syncobjs,
865                              const uint32_t nr_out_syncobjs,
866                              const bool last_submit,
867                              const VkPerformanceQuerySubmitInfoKHR *perf_info,
868                              struct tu_queue_submit **submit)
869{
870   VkResult result;
871
872   const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
873         vk_find_struct_const(submit_info->pNext,
874                              TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
875
876   const uint32_t wait_values_count =
877         timeline_info ? timeline_info->waitSemaphoreValueCount : 0;
878   const uint32_t signal_values_count =
879         timeline_info ? timeline_info->signalSemaphoreValueCount : 0;
880
881   const uint64_t *wait_values =
882         wait_values_count ? timeline_info->pWaitSemaphoreValues : NULL;
883   const uint64_t *signal_values =
884         signal_values_count ?  timeline_info->pSignalSemaphoreValues : NULL;
885
886   struct tu_queue_submit *new_submit = vk_zalloc(&queue->device->vk.alloc,
887               sizeof(*new_submit), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
888
889   new_submit->cmd_buffer_count = submit_info->commandBufferCount;
890   new_submit->cmd_buffers = vk_zalloc(&queue->device->vk.alloc,
891         new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers), 8,
892         VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
893
894   if (new_submit->cmd_buffers == NULL) {
895      result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
896      goto fail_cmd_buffers;
897   }
898
899   memcpy(new_submit->cmd_buffers, submit_info->pCommandBuffers,
900          new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers));
901
902   new_submit->wait_semaphores = vk_zalloc(&queue->device->vk.alloc,
903         submit_info->waitSemaphoreCount * sizeof(*new_submit->wait_semaphores),
904         8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
905   if (new_submit->wait_semaphores == NULL) {
906      result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
907      goto fail_wait_semaphores;
908   }
909   new_submit->wait_semaphore_count = submit_info->waitSemaphoreCount;
910
911   new_submit->signal_semaphores = vk_zalloc(&queue->device->vk.alloc,
912         submit_info->signalSemaphoreCount *sizeof(*new_submit->signal_semaphores),
913         8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
914   if (new_submit->signal_semaphores == NULL) {
915      result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
916      goto fail_signal_semaphores;
917   }
918   new_submit->signal_semaphore_count = submit_info->signalSemaphoreCount;
919
920   for (uint32_t i = 0; i < submit_info->waitSemaphoreCount; i++) {
921      TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pWaitSemaphores[i]);
922      new_submit->wait_semaphores[i] = sem;
923
924      if (sem->type == TU_SEMAPHORE_TIMELINE) {
925         result = tu_queue_submit_add_timeline_wait_locked(new_submit,
926               queue->device, sem, wait_values[i]);
927         if (result != VK_SUCCESS)
928            goto fail_wait_timelines;
929      }
930   }
931
932   for (uint32_t i = 0; i < submit_info->signalSemaphoreCount; i++) {
933      TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pSignalSemaphores[i]);
934      new_submit->signal_semaphores[i] = sem;
935
936      if (sem->type == TU_SEMAPHORE_TIMELINE) {
937         result = tu_queue_submit_add_timeline_signal_locked(new_submit,
938               queue->device, sem, signal_values[i]);
939         if (result != VK_SUCCESS)
940            goto fail_signal_timelines;
941      }
942   }
943
944   bool u_trace_enabled = u_trace_context_tracing(&queue->device->trace_context);
945   bool has_trace_points = false;
946
947   uint32_t entry_count = 0;
948   for (uint32_t j = 0; j < new_submit->cmd_buffer_count; ++j) {
949      TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[j]);
950
951      if (perf_info)
952         entry_count++;
953
954      entry_count += cmdbuf->cs.entry_count;
955
956      if (u_trace_enabled && u_trace_has_points(&cmdbuf->trace)) {
957         if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
958            entry_count++;
959
960         has_trace_points = true;
961      }
962   }
963
964   new_submit->cmds = vk_zalloc(&queue->device->vk.alloc,
965         entry_count * sizeof(*new_submit->cmds), 8,
966         VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
967
968   if (new_submit->cmds == NULL) {
969      result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
970      goto fail_cmds;
971   }
972
973   if (has_trace_points) {
974      new_submit->cmd_buffer_trace_data = vk_zalloc(&queue->device->vk.alloc,
975            new_submit->cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
976            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
977
978      if (new_submit->cmd_buffer_trace_data == NULL) {
979         result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
980         goto fail_cmd_trace_data;
981      }
982
983      for (uint32_t i = 0; i < new_submit->cmd_buffer_count; ++i) {
984         TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[i]);
985
986         if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) &&
987             u_trace_has_points(&cmdbuf->trace)) {
988            /* A single command buffer could be submitted several times, but we
989             * already backed timestamp iova addresses and trace points are
990             * single-use. Therefor we have to copy trace points and create
991             * a new timestamp buffer on every submit of reusable command buffer.
992             */
993            if (tu_create_copy_timestamp_cs(cmdbuf,
994                  &new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs,
995                  &new_submit->cmd_buffer_trace_data[i].trace) != VK_SUCCESS) {
996               result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
997               goto fail_copy_timestamp_cs;
998            }
999            assert(new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs->entry_count == 1);
1000         } else {
1001            new_submit->cmd_buffer_trace_data[i].trace = &cmdbuf->trace;
1002         }
1003      }
1004   }
1005
1006   /* Allocate without wait timeline semaphores */
1007   new_submit->in_syncobjs = vk_zalloc(&queue->device->vk.alloc,
1008         (nr_in_syncobjs - new_submit->wait_timeline_count) *
1009         sizeof(*new_submit->in_syncobjs), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1010
1011   if (new_submit->in_syncobjs == NULL) {
1012      result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1013      goto fail_in_syncobjs;
1014   }
1015
1016   /* Allocate with signal timeline semaphores considered */
1017   new_submit->out_syncobjs = vk_zalloc(&queue->device->vk.alloc,
1018         nr_out_syncobjs * sizeof(*new_submit->out_syncobjs), 8,
1019         VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1020
1021   if (new_submit->out_syncobjs == NULL) {
1022      result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1023      goto fail_out_syncobjs;
1024   }
1025
1026   new_submit->entry_count = entry_count;
1027   new_submit->nr_in_syncobjs = nr_in_syncobjs;
1028   new_submit->nr_out_syncobjs = nr_out_syncobjs;
1029   new_submit->last_submit = last_submit;
1030   new_submit->counter_pass_index = perf_info ? perf_info->counterPassIndex : ~0;
1031
1032   list_inithead(&new_submit->link);
1033
1034   *submit = new_submit;
1035
1036   return VK_SUCCESS;
1037
1038fail_out_syncobjs:
1039   vk_free(&queue->device->vk.alloc, new_submit->in_syncobjs);
1040fail_in_syncobjs:
1041   if (new_submit->cmd_buffer_trace_data)
1042      tu_u_trace_cmd_data_finish(queue->device, new_submit->cmd_buffer_trace_data,
1043                                 new_submit->cmd_buffer_count);
1044fail_copy_timestamp_cs:
1045   vk_free(&queue->device->vk.alloc, new_submit->cmd_buffer_trace_data);
1046fail_cmd_trace_data:
1047   vk_free(&queue->device->vk.alloc, new_submit->cmds);
1048fail_cmds:
1049fail_signal_timelines:
1050fail_wait_timelines:
1051   vk_free(&queue->device->vk.alloc, new_submit->signal_semaphores);
1052fail_signal_semaphores:
1053   vk_free(&queue->device->vk.alloc, new_submit->wait_semaphores);
1054fail_wait_semaphores:
1055   vk_free(&queue->device->vk.alloc, new_submit->cmd_buffers);
1056fail_cmd_buffers:
1057   return result;
1058}
1059
1060static void
1061tu_queue_submit_free(struct tu_queue *queue, struct tu_queue_submit *submit)
1062{
1063   vk_free(&queue->device->vk.alloc, submit->wait_semaphores);
1064   vk_free(&queue->device->vk.alloc, submit->signal_semaphores);
1065
1066   vk_free(&queue->device->vk.alloc, submit->wait_timelines);
1067   vk_free(&queue->device->vk.alloc, submit->wait_timeline_values);
1068   vk_free(&queue->device->vk.alloc, submit->signal_timelines);
1069   vk_free(&queue->device->vk.alloc, submit->signal_timeline_values);
1070
1071   vk_free(&queue->device->vk.alloc, submit->cmds);
1072   vk_free(&queue->device->vk.alloc, submit->in_syncobjs);
1073   vk_free(&queue->device->vk.alloc, submit->out_syncobjs);
1074   vk_free(&queue->device->vk.alloc, submit->cmd_buffers);
1075   vk_free(&queue->device->vk.alloc, submit);
1076}
1077
1078static void
1079tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
1080                                   struct tu_queue_submit *submit)
1081{
1082   struct drm_msm_gem_submit_cmd *cmds = submit->cmds;
1083
1084   uint32_t entry_idx = 0;
1085   for (uint32_t j = 0; j < submit->cmd_buffer_count; ++j) {
1086      TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[j]);
1087      struct tu_cs *cs = &cmdbuf->cs;
1088      struct tu_device *dev = queue->device;
1089
1090      if (submit->counter_pass_index != ~0) {
1091         struct tu_cs_entry *perf_cs_entry =
1092            &dev->perfcntrs_pass_cs_entries[submit->counter_pass_index];
1093
1094         cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
1095         cmds[entry_idx].submit_idx =
1096            dev->bo_idx[perf_cs_entry->bo->gem_handle];
1097         cmds[entry_idx].submit_offset = perf_cs_entry->offset;
1098         cmds[entry_idx].size = perf_cs_entry->size;
1099         cmds[entry_idx].pad = 0;
1100         cmds[entry_idx].nr_relocs = 0;
1101         cmds[entry_idx++].relocs = 0;
1102      }
1103
1104      for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) {
1105         cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
1106         cmds[entry_idx].submit_idx =
1107            dev->bo_idx[cs->entries[i].bo->gem_handle];
1108         cmds[entry_idx].submit_offset = cs->entries[i].offset;
1109         cmds[entry_idx].size = cs->entries[i].size;
1110         cmds[entry_idx].pad = 0;
1111         cmds[entry_idx].nr_relocs = 0;
1112         cmds[entry_idx].relocs = 0;
1113      }
1114
1115      if (submit->cmd_buffer_trace_data) {
1116         struct tu_cs *ts_cs = submit->cmd_buffer_trace_data[j].timestamp_copy_cs;
1117         if (ts_cs) {
1118            cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
1119            cmds[entry_idx].submit_idx =
1120               queue->device->bo_idx[ts_cs->entries[0].bo->gem_handle];
1121
1122            assert(cmds[entry_idx].submit_idx < queue->device->bo_count);
1123
1124            cmds[entry_idx].submit_offset = ts_cs->entries[0].offset;
1125            cmds[entry_idx].size = ts_cs->entries[0].size;
1126            cmds[entry_idx].pad = 0;
1127            cmds[entry_idx].nr_relocs = 0;
1128            cmds[entry_idx++].relocs = 0;
1129         }
1130      }
1131   }
1132}
1133
1134static VkResult
1135tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
1136{
1137   queue->device->submit_count++;
1138
1139#if HAVE_PERFETTO
1140   tu_perfetto_submit(queue->device, queue->device->submit_count);
1141#endif
1142
1143   uint32_t flags = MSM_PIPE_3D0;
1144
1145   if (submit->nr_in_syncobjs)
1146      flags |= MSM_SUBMIT_SYNCOBJ_IN;
1147
1148   if (submit->nr_out_syncobjs)
1149      flags |= MSM_SUBMIT_SYNCOBJ_OUT;
1150
1151   if (submit->last_submit)
1152      flags |= MSM_SUBMIT_FENCE_FD_OUT;
1153
1154   mtx_lock(&queue->device->bo_mutex);
1155
1156   /* drm_msm_gem_submit_cmd requires index of bo which could change at any
1157    * time when bo_mutex is not locked. So we build submit cmds here the real
1158    * place to submit.
1159    */
1160   tu_queue_build_msm_gem_submit_cmds(queue, submit);
1161
1162   struct drm_msm_gem_submit req = {
1163      .flags = flags,
1164      .queueid = queue->msm_queue_id,
1165      .bos = (uint64_t)(uintptr_t) queue->device->bo_list,
1166      .nr_bos = queue->device->bo_count,
1167      .cmds = (uint64_t)(uintptr_t)submit->cmds,
1168      .nr_cmds = submit->entry_count,
1169      .in_syncobjs = (uint64_t)(uintptr_t)submit->in_syncobjs,
1170      .out_syncobjs = (uint64_t)(uintptr_t)submit->out_syncobjs,
1171      .nr_in_syncobjs = submit->nr_in_syncobjs - submit->wait_timeline_count,
1172      .nr_out_syncobjs = submit->nr_out_syncobjs,
1173      .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
1174   };
1175
1176   int ret = drmCommandWriteRead(queue->device->fd,
1177                                 DRM_MSM_GEM_SUBMIT,
1178                                 &req, sizeof(req));
1179
1180   mtx_unlock(&queue->device->bo_mutex);
1181
1182   if (ret)
1183      return tu_device_set_lost(queue->device, "submit failed: %s\n",
1184                                strerror(errno));
1185
1186   /* restore permanent payload on wait */
1187   for (uint32_t i = 0; i < submit->wait_semaphore_count; i++) {
1188      TU_FROM_HANDLE(tu_syncobj, sem, submit->wait_semaphores[i]);
1189      if(sem->type == TU_SEMAPHORE_BINARY)
1190         sync_set_temporary(queue->device, sem, 0);
1191   }
1192
1193   if (submit->last_submit) {
1194      if (queue->fence >= 0)
1195         close(queue->fence);
1196      queue->fence = req.fence_fd;
1197   }
1198
1199   /* Update highest_submitted values in the timeline. */
1200   for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
1201      struct tu_syncobj *sem = submit->signal_timelines[i];
1202      uint64_t signal_value = submit->signal_timeline_values[i];
1203
1204      assert(signal_value > sem->timeline.highest_submitted);
1205
1206      sem->timeline.highest_submitted = signal_value;
1207   }
1208
1209   if (submit->cmd_buffer_trace_data) {
1210      struct tu_u_trace_flush_data *flush_data =
1211         vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_flush_data),
1212               8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1213      flush_data->submission_id = queue->device->submit_count;
1214      flush_data->syncobj =
1215         vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
1216               8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1217      flush_data->syncobj->fence = req.fence;
1218      flush_data->syncobj->msm_queue_id = queue->msm_queue_id;
1219
1220      flush_data->cmd_trace_data = submit->cmd_buffer_trace_data;
1221      flush_data->trace_count = submit->cmd_buffer_count;
1222      submit->cmd_buffer_trace_data = NULL;
1223
1224      for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) {
1225         bool free_data = i == (submit->cmd_buffer_count - 1);
1226         u_trace_flush(flush_data->cmd_trace_data[i].trace, flush_data, free_data);
1227      }
1228   }
1229
1230   pthread_cond_broadcast(&queue->device->timeline_cond);
1231
1232   return VK_SUCCESS;
1233}
1234
1235
1236static bool
1237tu_queue_submit_ready_locked(struct tu_queue_submit *submit)
1238{
1239   for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
1240      if (submit->wait_timeline_values[i] >
1241            submit->wait_timelines[i]->timeline.highest_submitted) {
1242         return false;
1243      }
1244   }
1245
1246   return true;
1247}
1248
1249static VkResult
1250tu_timeline_add_point_locked(struct tu_device *device,
1251                             struct tu_timeline *timeline,
1252                             uint64_t value,
1253                             struct tu_timeline_point **point)
1254{
1255
1256   if (list_is_empty(&timeline->free_points)) {
1257      *point = vk_zalloc(&device->vk.alloc, sizeof(**point), 8,
1258            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1259
1260      if (!(*point))
1261         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1262
1263      struct drm_syncobj_create create = {};
1264
1265      int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create);
1266      if (ret) {
1267         vk_free(&device->vk.alloc, *point);
1268         return vk_error(device, VK_ERROR_DEVICE_LOST);
1269      }
1270
1271      (*point)->syncobj = create.handle;
1272
1273   } else {
1274      *point = list_first_entry(&timeline->free_points,
1275                                struct tu_timeline_point, link);
1276      list_del(&(*point)->link);
1277   }
1278
1279   (*point)->value = value;
1280   list_addtail(&(*point)->link, &timeline->points);
1281
1282   return VK_SUCCESS;
1283}
1284
1285static VkResult
1286tu_queue_submit_timeline_locked(struct tu_queue *queue,
1287                                struct tu_queue_submit *submit)
1288{
1289   VkResult result;
1290   uint32_t timeline_idx =
1291         submit->nr_out_syncobjs - submit->signal_timeline_count;
1292
1293   for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
1294      struct tu_timeline *timeline = &submit->signal_timelines[i]->timeline;
1295      uint64_t signal_value = submit->signal_timeline_values[i];
1296      struct tu_timeline_point *point;
1297
1298      result = tu_timeline_add_point_locked(queue->device, timeline,
1299            signal_value, &point);
1300      if (result != VK_SUCCESS)
1301         return result;
1302
1303      submit->out_syncobjs[timeline_idx + i] =
1304         (struct drm_msm_gem_submit_syncobj) {
1305            .handle = point->syncobj,
1306            .flags = 0,
1307         };
1308   }
1309
1310   return tu_queue_submit_locked(queue, submit);
1311}
1312
1313static VkResult
1314tu_queue_submit_deferred_locked(struct tu_queue *queue, uint32_t *advance)
1315{
1316   VkResult result = VK_SUCCESS;
1317
1318   list_for_each_entry_safe(struct tu_queue_submit, submit,
1319                            &queue->queued_submits, link) {
1320      if (!tu_queue_submit_ready_locked(submit))
1321         break;
1322
1323      (*advance)++;
1324
1325      result = tu_queue_submit_timeline_locked(queue, submit);
1326
1327      list_del(&submit->link);
1328      tu_queue_submit_free(queue, submit);
1329
1330      if (result != VK_SUCCESS)
1331         break;
1332   }
1333
1334   return result;
1335}
1336
1337VkResult
1338tu_device_submit_deferred_locked(struct tu_device *dev)
1339{
1340    VkResult result = VK_SUCCESS;
1341
1342    uint32_t advance = 0;
1343    do {
1344       advance = 0;
1345       for (uint32_t i = 0; i < dev->queue_count[0]; i++) {
1346          /* Try again if there's signaled submission. */
1347          result = tu_queue_submit_deferred_locked(&dev->queues[0][i],
1348                &advance);
1349          if (result != VK_SUCCESS)
1350             return result;
1351       }
1352
1353    } while(advance);
1354
1355    return result;
1356}
1357
1358static inline void
1359get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
1360{
1361   struct timespec t;
1362   clock_gettime(CLOCK_MONOTONIC, &t);
1363   tv->tv_sec = t.tv_sec + ns / 1000000000;
1364   tv->tv_nsec = t.tv_nsec + ns % 1000000000;
1365}
1366
1367VkResult
1368tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
1369{
1370   struct drm_msm_wait_fence req = {
1371      .fence = syncobj->fence,
1372      .queueid = syncobj->msm_queue_id,
1373   };
1374   int ret;
1375
1376   get_abs_timeout(&req.timeout, 1000000000);
1377
1378   ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
1379   if (ret && (ret != -ETIMEDOUT)) {
1380      fprintf(stderr, "wait-fence failed! %d (%s)", ret, strerror(errno));
1381      return VK_TIMEOUT;
1382   }
1383
1384   return VK_SUCCESS;
1385}
1386
1387VKAPI_ATTR VkResult VKAPI_CALL
1388tu_QueueSubmit(VkQueue _queue,
1389               uint32_t submitCount,
1390               const VkSubmitInfo *pSubmits,
1391               VkFence _fence)
1392{
1393   TU_FROM_HANDLE(tu_queue, queue, _queue);
1394   TU_FROM_HANDLE(tu_syncobj, fence, _fence);
1395
1396   for (uint32_t i = 0; i < submitCount; ++i) {
1397      const VkSubmitInfo *submit = pSubmits + i;
1398      const bool last_submit = (i == submitCount - 1);
1399      uint32_t out_syncobjs_size = submit->signalSemaphoreCount;
1400
1401      const VkPerformanceQuerySubmitInfoKHR *perf_info =
1402         vk_find_struct_const(pSubmits[i].pNext,
1403                              PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
1404
1405      if (last_submit && fence)
1406         out_syncobjs_size += 1;
1407
1408      pthread_mutex_lock(&queue->device->submit_mutex);
1409      struct tu_queue_submit *submit_req = NULL;
1410
1411      VkResult ret = tu_queue_submit_create_locked(queue, submit,
1412            submit->waitSemaphoreCount, out_syncobjs_size,
1413            last_submit, perf_info, &submit_req);
1414
1415      if (ret != VK_SUCCESS) {
1416         pthread_mutex_unlock(&queue->device->submit_mutex);
1417         return ret;
1418      }
1419
1420      /* note: assuming there won't be any very large semaphore counts */
1421      struct drm_msm_gem_submit_syncobj *in_syncobjs = submit_req->in_syncobjs;
1422      struct drm_msm_gem_submit_syncobj *out_syncobjs = submit_req->out_syncobjs;
1423      uint32_t nr_in_syncobjs = 0, nr_out_syncobjs = 0;
1424
1425      for (uint32_t i = 0; i < submit->waitSemaphoreCount; i++) {
1426         TU_FROM_HANDLE(tu_syncobj, sem, submit->pWaitSemaphores[i]);
1427         if (sem->type == TU_SEMAPHORE_TIMELINE)
1428            continue;
1429
1430         in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1431            .handle = sem->binary.temporary ?: sem->binary.permanent,
1432            .flags = MSM_SUBMIT_SYNCOBJ_RESET,
1433         };
1434      }
1435
1436      for (uint32_t i = 0; i < submit->signalSemaphoreCount; i++) {
1437         TU_FROM_HANDLE(tu_syncobj, sem, submit->pSignalSemaphores[i]);
1438
1439         /* In case of timeline semaphores, we can defer the creation of syncobj
1440          * and adding it at real submit time.
1441          */
1442         if (sem->type == TU_SEMAPHORE_TIMELINE)
1443            continue;
1444
1445         out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1446            .handle = sem->binary.temporary ?: sem->binary.permanent,
1447            .flags = 0,
1448         };
1449      }
1450
1451      if (last_submit && fence) {
1452         out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1453            .handle = fence->binary.temporary ?: fence->binary.permanent,
1454            .flags = 0,
1455         };
1456      }
1457
1458      /* Queue the current submit */
1459      list_addtail(&submit_req->link, &queue->queued_submits);
1460      ret = tu_device_submit_deferred_locked(queue->device);
1461
1462      pthread_mutex_unlock(&queue->device->submit_mutex);
1463      if (ret != VK_SUCCESS)
1464          return ret;
1465   }
1466
1467   if (!submitCount && fence) {
1468      /* signal fence imemediately since we don't have a submit to do it */
1469      drmIoctl(queue->device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) {
1470         .handles = (uintptr_t) (uint32_t[]) { fence->binary.temporary ?: fence->binary.permanent },
1471         .count_handles = 1,
1472      });
1473   }
1474
1475   return VK_SUCCESS;
1476}
1477
1478VKAPI_ATTR VkResult VKAPI_CALL
1479tu_CreateFence(VkDevice device,
1480               const VkFenceCreateInfo *info,
1481               const VkAllocationCallbacks *pAllocator,
1482               VkFence *pFence)
1483{
1484   return sync_create(device, info->flags & VK_FENCE_CREATE_SIGNALED_BIT, true, true, 0,
1485                      pAllocator, (void**) pFence);
1486}
1487
1488VKAPI_ATTR void VKAPI_CALL
1489tu_DestroyFence(VkDevice device, VkFence fence, const VkAllocationCallbacks *pAllocator)
1490{
1491   TU_FROM_HANDLE(tu_syncobj, sync, fence);
1492   sync_destroy(device, sync, pAllocator);
1493}
1494
1495VKAPI_ATTR VkResult VKAPI_CALL
1496tu_ImportFenceFdKHR(VkDevice device, const VkImportFenceFdInfoKHR *info)
1497{
1498   TU_FROM_HANDLE(tu_syncobj, sync, info->fence);
1499   return sync_import(device, sync, info->flags & VK_FENCE_IMPORT_TEMPORARY_BIT,
1500         info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, info->fd);
1501}
1502
1503VKAPI_ATTR VkResult VKAPI_CALL
1504tu_GetFenceFdKHR(VkDevice device, const VkFenceGetFdInfoKHR *info, int *pFd)
1505{
1506   TU_FROM_HANDLE(tu_syncobj, sync, info->fence);
1507   return sync_export(device, sync,
1508         info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, pFd);
1509}
1510
1511static VkResult
1512drm_syncobj_wait(struct tu_device *device,
1513                 const uint32_t *handles, uint32_t count_handles,
1514                 int64_t timeout_nsec, bool wait_all)
1515{
1516   int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &(struct drm_syncobj_wait) {
1517      .handles = (uint64_t) (uintptr_t) handles,
1518      .count_handles = count_handles,
1519      .timeout_nsec = timeout_nsec,
1520      .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
1521               COND(wait_all, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
1522   });
1523   if (ret) {
1524      if (errno == ETIME)
1525         return VK_TIMEOUT;
1526
1527      assert(0);
1528      return VK_ERROR_DEVICE_LOST; /* TODO */
1529   }
1530   return VK_SUCCESS;
1531}
1532
1533static uint64_t
1534gettime_ns(void)
1535{
1536   struct timespec current;
1537   clock_gettime(CLOCK_MONOTONIC, &current);
1538   return (uint64_t)current.tv_sec * 1000000000 + current.tv_nsec;
1539}
1540
1541/* and the kernel converts it right back to relative timeout - very smart UAPI */
1542static uint64_t
1543absolute_timeout(uint64_t timeout)
1544{
1545   if (timeout == 0)
1546      return 0;
1547   uint64_t current_time = gettime_ns();
1548   uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
1549
1550   timeout = MIN2(max_timeout, timeout);
1551
1552   return (current_time + timeout);
1553}
1554
1555VKAPI_ATTR VkResult VKAPI_CALL
1556tu_WaitForFences(VkDevice _device,
1557                 uint32_t fenceCount,
1558                 const VkFence *pFences,
1559                 VkBool32 waitAll,
1560                 uint64_t timeout)
1561{
1562   TU_FROM_HANDLE(tu_device, device, _device);
1563
1564   if (tu_device_is_lost(device))
1565      return VK_ERROR_DEVICE_LOST;
1566
1567   uint32_t handles[fenceCount];
1568   for (unsigned i = 0; i < fenceCount; ++i) {
1569      TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]);
1570      handles[i] = fence->binary.temporary ?: fence->binary.permanent;
1571   }
1572
1573   return drm_syncobj_wait(device, handles, fenceCount, absolute_timeout(timeout), waitAll);
1574}
1575
1576VKAPI_ATTR VkResult VKAPI_CALL
1577tu_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
1578{
1579   TU_FROM_HANDLE(tu_device, device, _device);
1580   int ret;
1581
1582   uint32_t handles[fenceCount];
1583   for (unsigned i = 0; i < fenceCount; ++i) {
1584      TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]);
1585      sync_set_temporary(device, fence, 0);
1586      handles[i] = fence->binary.permanent;
1587   }
1588
1589   ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_RESET, &(struct drm_syncobj_array) {
1590      .handles = (uint64_t) (uintptr_t) handles,
1591      .count_handles = fenceCount,
1592   });
1593   if (ret) {
1594      tu_device_set_lost(device, "DRM_IOCTL_SYNCOBJ_RESET failure: %s",
1595                         strerror(errno));
1596   }
1597
1598   return VK_SUCCESS;
1599}
1600
1601VKAPI_ATTR VkResult VKAPI_CALL
1602tu_GetFenceStatus(VkDevice _device, VkFence _fence)
1603{
1604   TU_FROM_HANDLE(tu_device, device, _device);
1605   TU_FROM_HANDLE(tu_syncobj, fence, _fence);
1606   VkResult result;
1607
1608   result = drm_syncobj_wait(device, (uint32_t[]){fence->binary.temporary ?: fence->binary.permanent}, 1, 0, false);
1609   if (result == VK_TIMEOUT)
1610      result = VK_NOT_READY;
1611   return result;
1612}
1613
1614int
1615tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2)
1616{
1617   uint32_t handles[2], count = 0;
1618   if (fence1)
1619      handles[count++] = fence1->binary.temporary ?: fence1->binary.permanent;
1620
1621   if (fence2)
1622      handles[count++] = fence2->binary.temporary ?: fence2->binary.permanent;
1623
1624   if (!count)
1625      return 0;
1626
1627   return drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) {
1628      .handles = (uintptr_t) handles,
1629      .count_handles = count
1630   });
1631}
1632
1633int
1634tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync)
1635{
1636   struct drm_syncobj_handle handle = { .handle = sync->binary.permanent };
1637   int ret;
1638
1639   ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
1640
1641   return ret ? -1 : handle.fd;
1642}
1643
1644static VkResult
1645tu_timeline_gc_locked(struct tu_device *dev, struct tu_timeline *timeline)
1646{
1647   VkResult result = VK_SUCCESS;
1648
1649   /* Go through every point in the timeline and check if any signaled point */
1650   list_for_each_entry_safe(struct tu_timeline_point, point,
1651                            &timeline->points, link) {
1652
1653      /* If the value of the point is higher than highest_submitted,
1654       * the point has not been submited yet.
1655       */
1656      if (point->wait_count || point->value > timeline->highest_submitted)
1657         return VK_SUCCESS;
1658
1659      result = drm_syncobj_wait(dev, (uint32_t[]){point->syncobj}, 1, 0, true);
1660
1661      if (result == VK_TIMEOUT) {
1662         /* This means the syncobj is still busy and it should wait
1663          * with timeout specified by users via vkWaitSemaphores.
1664          */
1665         result = VK_SUCCESS;
1666      } else {
1667         timeline->highest_signaled =
1668               MAX2(timeline->highest_signaled, point->value);
1669         list_del(&point->link);
1670         list_add(&point->link, &timeline->free_points);
1671      }
1672   }
1673
1674   return result;
1675}
1676
1677
1678static VkResult
1679tu_timeline_wait_locked(struct tu_device *device,
1680                        struct tu_timeline *timeline,
1681                        uint64_t value,
1682                        uint64_t abs_timeout)
1683{
1684   VkResult result;
1685
1686   while(timeline->highest_submitted < value) {
1687      struct timespec abstime;
1688      timespec_from_nsec(&abstime, abs_timeout);
1689
1690      pthread_cond_timedwait(&device->timeline_cond, &device->submit_mutex,
1691            &abstime);
1692
1693      if (os_time_get_nano() >= abs_timeout &&
1694            timeline->highest_submitted < value)
1695         return VK_TIMEOUT;
1696   }
1697
1698   /* Visit every point in the timeline and wait until
1699    * the highest_signaled reaches the value.
1700    */
1701   while (1) {
1702      result = tu_timeline_gc_locked(device, timeline);
1703      if (result != VK_SUCCESS)
1704         return result;
1705
1706      if (timeline->highest_signaled >= value)
1707          return VK_SUCCESS;
1708
1709      struct tu_timeline_point *point =
1710            list_first_entry(&timeline->points,
1711                             struct tu_timeline_point, link);
1712
1713      point->wait_count++;
1714      pthread_mutex_unlock(&device->submit_mutex);
1715      result = drm_syncobj_wait(device, (uint32_t[]){point->syncobj}, 1,
1716                                abs_timeout, true);
1717
1718      pthread_mutex_lock(&device->submit_mutex);
1719      point->wait_count--;
1720
1721      if (result != VK_SUCCESS)
1722         return result;
1723   }
1724
1725   return result;
1726}
1727
1728static VkResult
1729tu_wait_timelines(struct tu_device *device,
1730                  const VkSemaphoreWaitInfoKHR* pWaitInfo,
1731                  uint64_t abs_timeout)
1732{
1733   if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) &&
1734         pWaitInfo->semaphoreCount > 1) {
1735      pthread_mutex_lock(&device->submit_mutex);
1736
1737      /* Visit every timline semaphore in the queue until timeout */
1738      while (1) {
1739         for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
1740            TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]);
1741            VkResult result = tu_timeline_wait_locked(device,
1742                  &semaphore->timeline, pWaitInfo->pValues[i], 0);
1743
1744            /* Returns result values including VK_SUCCESS except for VK_TIMEOUT */
1745            if (result != VK_TIMEOUT) {
1746               pthread_mutex_unlock(&device->submit_mutex);
1747               return result;
1748            }
1749         }
1750
1751         if (os_time_get_nano() > abs_timeout) {
1752            pthread_mutex_unlock(&device->submit_mutex);
1753            return VK_TIMEOUT;
1754         }
1755      }
1756   } else {
1757      VkResult result = VK_SUCCESS;
1758
1759      pthread_mutex_lock(&device->submit_mutex);
1760      for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
1761         TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]);
1762         assert(semaphore->type == TU_SEMAPHORE_TIMELINE);
1763
1764         result = tu_timeline_wait_locked(device, &semaphore->timeline,
1765               pWaitInfo->pValues[i], abs_timeout);
1766         if (result != VK_SUCCESS)
1767            break;
1768      }
1769      pthread_mutex_unlock(&device->submit_mutex);
1770
1771      return result;
1772   }
1773}
1774
1775
1776VKAPI_ATTR VkResult VKAPI_CALL
1777tu_GetSemaphoreCounterValue(VkDevice _device,
1778                            VkSemaphore _semaphore,
1779                            uint64_t* pValue)
1780{
1781   TU_FROM_HANDLE(tu_device, device, _device);
1782   TU_FROM_HANDLE(tu_syncobj, semaphore, _semaphore);
1783
1784   assert(semaphore->type == TU_SEMAPHORE_TIMELINE);
1785
1786   VkResult result;
1787
1788   pthread_mutex_lock(&device->submit_mutex);
1789
1790   result = tu_timeline_gc_locked(device, &semaphore->timeline);
1791   *pValue = semaphore->timeline.highest_signaled;
1792
1793   pthread_mutex_unlock(&device->submit_mutex);
1794
1795   return result;
1796}
1797
1798
1799VKAPI_ATTR VkResult VKAPI_CALL
1800tu_WaitSemaphores(VkDevice _device,
1801                  const VkSemaphoreWaitInfoKHR* pWaitInfo,
1802                  uint64_t timeout)
1803{
1804   TU_FROM_HANDLE(tu_device, device, _device);
1805
1806   return tu_wait_timelines(device, pWaitInfo, absolute_timeout(timeout));
1807}
1808
1809VKAPI_ATTR VkResult VKAPI_CALL
1810tu_SignalSemaphore(VkDevice _device,
1811                   const VkSemaphoreSignalInfoKHR* pSignalInfo)
1812{
1813   TU_FROM_HANDLE(tu_device, device, _device);
1814   TU_FROM_HANDLE(tu_syncobj, semaphore, pSignalInfo->semaphore);
1815   VkResult result;
1816
1817   assert(semaphore->type == TU_SEMAPHORE_TIMELINE);
1818
1819   pthread_mutex_lock(&device->submit_mutex);
1820
1821   result = tu_timeline_gc_locked(device, &semaphore->timeline);
1822   if (result != VK_SUCCESS) {
1823      pthread_mutex_unlock(&device->submit_mutex);
1824      return result;
1825   }
1826
1827   semaphore->timeline.highest_submitted = pSignalInfo->value;
1828   semaphore->timeline.highest_signaled = pSignalInfo->value;
1829
1830   result = tu_device_submit_deferred_locked(device);
1831
1832   pthread_cond_broadcast(&device->timeline_cond);
1833   pthread_mutex_unlock(&device->submit_mutex);
1834
1835   return result;
1836}
1837
1838#ifdef ANDROID
1839#include <libsync.h>
1840
1841VKAPI_ATTR VkResult VKAPI_CALL
1842tu_QueueSignalReleaseImageANDROID(VkQueue _queue,
1843                                  uint32_t waitSemaphoreCount,
1844                                  const VkSemaphore *pWaitSemaphores,
1845                                  VkImage image,
1846                                  int *pNativeFenceFd)
1847{
1848   TU_FROM_HANDLE(tu_queue, queue, _queue);
1849   VkResult result = VK_SUCCESS;
1850
1851   if (waitSemaphoreCount == 0) {
1852      if (pNativeFenceFd)
1853         *pNativeFenceFd = -1;
1854      return VK_SUCCESS;
1855   }
1856
1857   int fd = -1;
1858
1859   for (uint32_t i = 0; i < waitSemaphoreCount; ++i) {
1860      int tmp_fd;
1861      result = tu_GetSemaphoreFdKHR(
1862         tu_device_to_handle(queue->device),
1863         &(VkSemaphoreGetFdInfoKHR) {
1864            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
1865            .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
1866            .semaphore = pWaitSemaphores[i],
1867         },
1868         &tmp_fd);
1869      if (result != VK_SUCCESS) {
1870         if (fd >= 0)
1871            close(fd);
1872         return result;
1873      }
1874
1875      if (fd < 0)
1876         fd = tmp_fd;
1877      else if (tmp_fd >= 0) {
1878         sync_accumulate("tu", &fd, tmp_fd);
1879         close(tmp_fd);
1880      }
1881   }
1882
1883   if (pNativeFenceFd) {
1884      *pNativeFenceFd = fd;
1885   } else if (fd >= 0) {
1886      close(fd);
1887      /* We still need to do the exports, to reset the semaphores, but
1888       * otherwise we don't wait on them. */
1889   }
1890   return VK_SUCCESS;
1891}
1892#endif
1893