1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28/**
29 * \file
30 * \brief Support for GL_ARB_sync and EGL_KHR_fence_sync.
31 *
32 * GL_ARB_sync is implemented by flushing the current batchbuffer and keeping a
33 * reference on it.  We can then check for completion or wait for completion
34 * using the normal buffer object mechanisms.  This does mean that if an
35 * application is using many sync objects, it will emit small batchbuffers
36 * which may end up being a significant overhead.  In other tests of removing
37 * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
38 * performance bottleneck, though.
39 */
40
41#include <libsync.h> /* Requires Android or libdrm-2.4.72 */
42
43#include "util/os_file.h"
44#include "util/u_memory.h"
45#include <xf86drm.h>
46
47#include "brw_context.h"
48#include "brw_batch.h"
49#include "mesa/main/externalobjects.h"
50
51struct brw_fence {
52   struct brw_context *brw;
53
54   enum brw_fence_type {
55      /** The fence waits for completion of brw_fence::batch_bo. */
56      BRW_FENCE_TYPE_BO_WAIT,
57
58      /** The fence waits for brw_fence::sync_fd to signal. */
59      BRW_FENCE_TYPE_SYNC_FD,
60   } type;
61
62   union {
63      struct brw_bo *batch_bo;
64
65      /* This struct owns the fd. */
66      int sync_fd;
67   };
68
69   mtx_t mutex;
70   bool signalled;
71};
72
73struct brw_gl_sync {
74   struct gl_sync_object gl;
75   struct brw_fence fence;
76};
77
78struct intel_semaphore_object {
79   struct gl_semaphore_object Base;
80   struct drm_syncobj_handle *syncobj;
81};
82
83static inline struct intel_semaphore_object *
84intel_semaphore_object(struct gl_semaphore_object *sem_obj) {
85   return (struct intel_semaphore_object*) sem_obj;
86}
87
88static struct gl_semaphore_object *
89intel_semaphoreobj_alloc(struct gl_context *ctx, GLuint name)
90{
91   struct intel_semaphore_object *is_obj = CALLOC_STRUCT(intel_semaphore_object);
92   if (!is_obj)
93      return NULL;
94
95   _mesa_initialize_semaphore_object(ctx, &is_obj->Base, name);
96   return &is_obj->Base;
97}
98
99static void
100intel_semaphoreobj_free(struct gl_context *ctx,
101                     struct gl_semaphore_object *semObj)
102{
103   _mesa_delete_semaphore_object(ctx, semObj);
104}
105
106static void
107intel_semaphoreobj_import(struct gl_context *ctx,
108                                struct gl_semaphore_object *semObj,
109                                int fd)
110{
111   struct brw_context *brw = brw_context(ctx);
112   struct brw_screen *screen = brw->screen;
113   struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
114   iSemObj->syncobj = CALLOC_STRUCT(drm_syncobj_handle);
115   iSemObj->syncobj->fd = fd;
116
117   if (drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, iSemObj->syncobj) < 0) {
118      fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n",
119              strerror(errno));
120      free(iSemObj->syncobj);
121   }
122}
123
124static void
125intel_semaphoreobj_signal(struct gl_context *ctx,
126                                       struct gl_semaphore_object *semObj,
127                                       GLuint numBufferBarriers,
128                                       struct gl_buffer_object **bufObjs,
129                                       GLuint numTextureBarriers,
130                                       struct gl_texture_object **texObjs,
131                                       const GLenum *dstLayouts)
132{
133   struct brw_context *brw = brw_context(ctx);
134   struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
135   struct drm_i915_gem_exec_fence *fence =
136      util_dynarray_grow(&brw->batch.exec_fences, struct drm_i915_gem_exec_fence *, 1);
137   fence->flags = I915_EXEC_FENCE_SIGNAL;
138   fence->handle = iSemObj->syncobj->handle;
139   brw->batch.contains_fence_signal = true;
140}
141
142static void
143intel_semaphoreobj_wait(struct gl_context *ctx,
144                                     struct gl_semaphore_object *semObj,
145                                     GLuint numBufferBarriers,
146                                     struct gl_buffer_object **bufObjs,
147                                     GLuint numTextureBarriers,
148                                     struct gl_texture_object **texObjs,
149                                     const GLenum *srcLayouts)
150{
151   struct brw_context *brw = brw_context(ctx);
152   struct brw_screen *screen = brw->screen;
153   struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
154   struct drm_syncobj_wait args = {
155      .handles = (uintptr_t)&iSemObj->syncobj->handle,
156      .count_handles = 1,
157   };
158
159   drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
160}
161
162static void
163brw_fence_init(struct brw_context *brw, struct brw_fence *fence,
164               enum brw_fence_type type)
165{
166   fence->brw = brw;
167   fence->type = type;
168   mtx_init(&fence->mutex, mtx_plain);
169
170   switch (type) {
171   case BRW_FENCE_TYPE_BO_WAIT:
172      fence->batch_bo = NULL;
173      break;
174    case BRW_FENCE_TYPE_SYNC_FD:
175      fence->sync_fd = -1;
176      break;
177   }
178}
179
180static void
181brw_fence_finish(struct brw_fence *fence)
182{
183   switch (fence->type) {
184   case BRW_FENCE_TYPE_BO_WAIT:
185      if (fence->batch_bo)
186         brw_bo_unreference(fence->batch_bo);
187      break;
188   case BRW_FENCE_TYPE_SYNC_FD:
189      if (fence->sync_fd != -1)
190         close(fence->sync_fd);
191      break;
192   }
193
194   mtx_destroy(&fence->mutex);
195}
196
197static bool MUST_CHECK
198brw_fence_insert_locked(struct brw_context *brw, struct brw_fence *fence)
199{
200   __DRIcontext *driContext = brw->driContext;
201   __DRIdrawable *driDrawable = driContext->driDrawablePriv;
202
203   /*
204    * From KHR_fence_sync:
205    *
206    *   When the condition of the sync object is satisfied by the fence
207    *   command, the sync is signaled by the associated client API context,
208    *   causing any eglClientWaitSyncKHR commands (see below) blocking on
209    *   <sync> to unblock. The only condition currently supported is
210    *   EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR, which is satisfied by
211    *   completion of the fence command corresponding to the sync object,
212    *   and all preceding commands in the associated client API context's
213    *   command stream. The sync object will not be signaled until all
214    *   effects from these commands on the client API's internal and
215    *   framebuffer state are fully realized. No other state is affected by
216    *   execution of the fence command.
217    *
218    * Note the emphasis there on ensuring that the framebuffer is fully
219    * realised before the fence is signaled. We cannot just flush the batch,
220    * but must also resolve the drawable first. The importance of this is,
221    * for example, in creating a fence for a frame to be passed to a
222    * remote compositor. Without us flushing the drawable explicitly, the
223    * resolve will be in a following batch (when the client finally calls
224    * SwapBuffers, or triggers a resolve via some other path) and so the
225    * compositor may read the incomplete framebuffer instead.
226    */
227   if (driDrawable)
228      brw_resolve_for_dri2_flush(brw, driDrawable);
229   brw_emit_mi_flush(brw);
230
231   switch (fence->type) {
232   case BRW_FENCE_TYPE_BO_WAIT:
233      assert(!fence->batch_bo);
234      assert(!fence->signalled);
235
236      fence->batch_bo = brw->batch.batch.bo;
237      brw_bo_reference(fence->batch_bo);
238
239      if (brw_batch_flush(brw) < 0) {
240         brw_bo_unreference(fence->batch_bo);
241         fence->batch_bo = NULL;
242         return false;
243      }
244      break;
245   case BRW_FENCE_TYPE_SYNC_FD:
246      assert(!fence->signalled);
247
248      if (fence->sync_fd == -1) {
249         /* Create an out-fence that signals after all pending commands
250          * complete.
251          */
252         if (brw_batch_flush_fence(brw, -1, &fence->sync_fd) < 0)
253            return false;
254         assert(fence->sync_fd != -1);
255      } else {
256         /* Wait on the in-fence before executing any subsequently submitted
257          * commands.
258          */
259         if (brw_batch_flush(brw) < 0)
260            return false;
261
262         /* Emit a dummy batch just for the fence. */
263         brw_emit_mi_flush(brw);
264         if (brw_batch_flush_fence(brw, fence->sync_fd, NULL) < 0)
265            return false;
266      }
267      break;
268   }
269
270   return true;
271}
272
273static bool MUST_CHECK
274brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
275{
276   bool ret;
277
278   mtx_lock(&fence->mutex);
279   ret = brw_fence_insert_locked(brw, fence);
280   mtx_unlock(&fence->mutex);
281
282   return ret;
283}
284
285static bool
286brw_fence_has_completed_locked(struct brw_fence *fence)
287{
288   if (fence->signalled)
289      return true;
290
291   switch (fence->type) {
292   case BRW_FENCE_TYPE_BO_WAIT:
293      if (!fence->batch_bo) {
294         /* There may be no batch if brw_batch_flush() failed. */
295         return false;
296      }
297
298      if (brw_bo_busy(fence->batch_bo))
299         return false;
300
301      brw_bo_unreference(fence->batch_bo);
302      fence->batch_bo = NULL;
303      fence->signalled = true;
304
305      return true;
306
307   case BRW_FENCE_TYPE_SYNC_FD:
308      assert(fence->sync_fd != -1);
309
310      if (sync_wait(fence->sync_fd, 0) == -1)
311         return false;
312
313      fence->signalled = true;
314
315      return true;
316   }
317
318   return false;
319}
320
321static bool
322brw_fence_has_completed(struct brw_fence *fence)
323{
324   bool ret;
325
326   mtx_lock(&fence->mutex);
327   ret = brw_fence_has_completed_locked(fence);
328   mtx_unlock(&fence->mutex);
329
330   return ret;
331}
332
333static bool
334brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
335                             uint64_t timeout)
336{
337   int32_t timeout_i32;
338
339   if (fence->signalled)
340      return true;
341
342   switch (fence->type) {
343   case BRW_FENCE_TYPE_BO_WAIT:
344      if (!fence->batch_bo) {
345         /* There may be no batch if brw_batch_flush() failed. */
346         return false;
347      }
348
349      /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
350       * immediately for timeouts <= 0.  The best we can do is to clamp the
351       * timeout to INT64_MAX.  This limits the maximum timeout from 584 years to
352       * 292 years - likely not a big deal.
353       */
354      if (timeout > INT64_MAX)
355         timeout = INT64_MAX;
356
357      if (brw_bo_wait(fence->batch_bo, timeout) != 0)
358         return false;
359
360      fence->signalled = true;
361      brw_bo_unreference(fence->batch_bo);
362      fence->batch_bo = NULL;
363
364      return true;
365   case BRW_FENCE_TYPE_SYNC_FD:
366      if (fence->sync_fd == -1)
367         return false;
368
369      if (timeout > INT32_MAX)
370         timeout_i32 = -1;
371      else
372         timeout_i32 = timeout;
373
374      if (sync_wait(fence->sync_fd, timeout_i32) == -1)
375         return false;
376
377      fence->signalled = true;
378      return true;
379   }
380
381   assert(!"bad enum brw_fence_type");
382   return false;
383}
384
385/**
386 * Return true if the function successfully signals or has already signalled.
387 * (This matches the behavior expected from __DRI2fence::client_wait_sync).
388 */
389static bool
390brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
391                      uint64_t timeout)
392{
393   bool ret;
394
395   mtx_lock(&fence->mutex);
396   ret = brw_fence_client_wait_locked(brw, fence, timeout);
397   mtx_unlock(&fence->mutex);
398
399   return ret;
400}
401
402static void
403brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
404{
405   switch (fence->type) {
406   case BRW_FENCE_TYPE_BO_WAIT:
407      /* We have nothing to do for WaitSync.  Our GL command stream is sequential,
408       * so given that the sync object has already flushed the batchbuffer, any
409       * batchbuffers coming after this waitsync will naturally not occur until
410       * the previous one is done.
411       */
412      break;
413   case BRW_FENCE_TYPE_SYNC_FD:
414      assert(fence->sync_fd != -1);
415
416      /* The user wants explicit synchronization, so give them what they want. */
417      if (!brw_fence_insert(brw, fence)) {
418         /* FIXME: There exists no way yet to report an error here. If an error
419          * occurs, continue silently and hope for the best.
420          */
421      }
422      break;
423   }
424}
425
426static struct gl_sync_object *
427brw_gl_new_sync(struct gl_context *ctx)
428{
429   struct brw_gl_sync *sync;
430
431   sync = calloc(1, sizeof(*sync));
432   if (!sync)
433      return NULL;
434
435   return &sync->gl;
436}
437
438static void
439brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
440{
441   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
442
443   brw_fence_finish(&sync->fence);
444   free(sync->gl.Label);
445   free(sync);
446}
447
448static void
449brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
450                  GLenum condition, GLbitfield flags)
451{
452   struct brw_context *brw = brw_context(ctx);
453   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
454
455   /* brw_fence_insert_locked() assumes it must do a complete flush */
456   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
457
458   brw_fence_init(brw, &sync->fence, BRW_FENCE_TYPE_BO_WAIT);
459
460   if (!brw_fence_insert_locked(brw, &sync->fence)) {
461      /* FIXME: There exists no way to report a GL error here. If an error
462       * occurs, continue silently and hope for the best.
463       */
464   }
465}
466
467static void
468brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
469                        GLbitfield flags, GLuint64 timeout)
470{
471   struct brw_context *brw = brw_context(ctx);
472   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
473
474   if (brw_fence_client_wait(brw, &sync->fence, timeout))
475      sync->gl.StatusFlag = 1;
476}
477
478static void
479brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
480                          GLbitfield flags, GLuint64 timeout)
481{
482   struct brw_context *brw = brw_context(ctx);
483   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
484
485   brw_fence_server_wait(brw, &sync->fence);
486}
487
488static void
489brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
490{
491   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
492
493   if (brw_fence_has_completed(&sync->fence))
494      sync->gl.StatusFlag = 1;
495}
496
497void
498brw_init_syncobj_functions(struct dd_function_table *functions)
499{
500   functions->NewSyncObject = brw_gl_new_sync;
501   functions->DeleteSyncObject = brw_gl_delete_sync;
502   functions->FenceSync = brw_gl_fence_sync;
503   functions->CheckSync = brw_gl_check_sync;
504   functions->ClientWaitSync = brw_gl_client_wait_sync;
505   functions->ServerWaitSync = brw_gl_server_wait_sync;
506   functions->NewSemaphoreObject = intel_semaphoreobj_alloc;
507   functions->DeleteSemaphoreObject = intel_semaphoreobj_free;
508   functions->ImportSemaphoreFd = intel_semaphoreobj_import;
509   functions->ServerSignalSemaphoreObject = intel_semaphoreobj_signal;
510   functions->ServerWaitSemaphoreObject = intel_semaphoreobj_wait;
511}
512
513static void *
514brw_dri_create_fence(__DRIcontext *ctx)
515{
516   struct brw_context *brw = ctx->driverPrivate;
517   struct brw_fence *fence;
518
519   fence = calloc(1, sizeof(*fence));
520   if (!fence)
521      return NULL;
522
523   brw_fence_init(brw, fence, BRW_FENCE_TYPE_BO_WAIT);
524
525   if (!brw_fence_insert_locked(brw, fence)) {
526      brw_fence_finish(fence);
527      free(fence);
528      return NULL;
529   }
530
531   return fence;
532}
533
534static void
535brw_dri_destroy_fence(__DRIscreen *dri_screen, void *_fence)
536{
537   struct brw_fence *fence = _fence;
538
539   brw_fence_finish(fence);
540   free(fence);
541}
542
543static GLboolean
544brw_dri_client_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags,
545                         uint64_t timeout)
546{
547   struct brw_fence *fence = _fence;
548
549   return brw_fence_client_wait(fence->brw, fence, timeout);
550}
551
552static void
553brw_dri_server_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags)
554{
555   struct brw_fence *fence = _fence;
556
557   /* We might be called here with a NULL fence as a result of WaitSyncKHR
558    * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case.
559    */
560   if (!fence)
561      return;
562
563   brw_fence_server_wait(fence->brw, fence);
564}
565
566static unsigned
567brw_dri_get_capabilities(__DRIscreen *dri_screen)
568{
569   struct brw_screen *screen = dri_screen->driverPrivate;
570   unsigned caps = 0;
571
572   if (screen->has_exec_fence)
573      caps |=  __DRI_FENCE_CAP_NATIVE_FD;
574
575   return caps;
576}
577
578static void *
579brw_dri_create_fence_fd(__DRIcontext *dri_ctx, int fd)
580{
581   struct brw_context *brw = dri_ctx->driverPrivate;
582   struct brw_fence *fence;
583
584   assert(brw->screen->has_exec_fence);
585
586   fence = calloc(1, sizeof(*fence));
587   if (!fence)
588      return NULL;
589
590   brw_fence_init(brw, fence, BRW_FENCE_TYPE_SYNC_FD);
591
592   if (fd == -1) {
593      /* Create an out-fence fd */
594      if (!brw_fence_insert_locked(brw, fence))
595         goto fail;
596   } else {
597      /* Import the sync fd as an in-fence. */
598      fence->sync_fd = os_dupfd_cloexec(fd);
599   }
600
601   assert(fence->sync_fd != -1);
602
603   return fence;
604
605fail:
606   brw_fence_finish(fence);
607   free(fence);
608   return NULL;
609}
610
611static int
612brw_dri_get_fence_fd_locked(struct brw_fence *fence)
613{
614   assert(fence->type == BRW_FENCE_TYPE_SYNC_FD);
615   return os_dupfd_cloexec(fence->sync_fd);
616}
617
618static int
619brw_dri_get_fence_fd(__DRIscreen *dri_screen, void *_fence)
620{
621   struct brw_fence *fence = _fence;
622   int fd;
623
624   mtx_lock(&fence->mutex);
625   fd = brw_dri_get_fence_fd_locked(fence);
626   mtx_unlock(&fence->mutex);
627
628   return fd;
629}
630
631const __DRI2fenceExtension brwFenceExtension = {
632   .base = { __DRI2_FENCE, 2 },
633
634   .create_fence = brw_dri_create_fence,
635   .destroy_fence = brw_dri_destroy_fence,
636   .client_wait_sync = brw_dri_client_wait_sync,
637   .server_wait_sync = brw_dri_server_wait_sync,
638   .get_fence_from_cl_event = NULL,
639   .get_capabilities = brw_dri_get_capabilities,
640   .create_fence_fd = brw_dri_create_fence_fd,
641   .get_fence_fd = brw_dri_get_fence_fd,
642};
643