crocus_bufmgr.c revision 7ec681f3
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file crocus_bufmgr.c
25 *
26 * The crocus buffer manager.
27 *
28 * XXX: write better comments
29 * - BOs
30 * - Explain BO cache
31 * - main interface to GEM in the kernel
32 */
33
34#ifdef HAVE_CONFIG_H
35#include "config.h"
36#endif
37
38#include <xf86drm.h>
39#include <util/u_atomic.h>
40#include <fcntl.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <unistd.h>
45#include <assert.h>
46#include <sys/ioctl.h>
47#include <sys/mman.h>
48#include <sys/stat.h>
49#include <sys/types.h>
50#include <stdbool.h>
51#include <time.h>
52
53#include "errno.h"
54#include "common/intel_clflush.h"
55#include "dev/intel_debug.h"
56#include "common/intel_gem.h"
57#include "dev/intel_device_info.h"
58#include "main/macros.h"
59#include "util/debug.h"
60#include "util/macros.h"
61#include "util/hash_table.h"
62#include "util/list.h"
63#include "util/os_file.h"
64#include "util/u_dynarray.h"
65#include "util/vma.h"
66#include "crocus_bufmgr.h"
67#include "crocus_context.h"
68#include "string.h"
69
70#include "drm-uapi/i915_drm.h"
71
72#ifdef HAVE_VALGRIND
73#include <valgrind.h>
74#include <memcheck.h>
75#define VG(x) x
76#else
77#define VG(x)
78#endif
79
80/**
81 * For debugging purposes, this returns a time in seconds.
82 */
83static double
84get_time(void)
85{
86   struct timespec tp;
87
88   clock_gettime(CLOCK_MONOTONIC, &tp);
89
90   return tp.tv_sec + tp.tv_nsec / 1000000000.0;
91}
92
93/* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
94 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
95 * leaked. All because it does not call VG(cli_free) from its
96 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
97 * and allocation, we mark it available for use upon mmapping and remove
98 * it upon unmapping.
99 */
100#define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
101#define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
102
103#define PAGE_SIZE 4096
104
105#define WARN_ONCE(cond, fmt...) do {                            \
106   if (unlikely(cond)) {                                        \
107      static bool _warned = false;                              \
108      if (!_warned) {                                           \
109         fprintf(stderr, "WARNING: ");                          \
110         fprintf(stderr, fmt);                                  \
111         _warned = true;                                        \
112      }                                                         \
113   }                                                            \
114} while (0)
115
116#define FILE_DEBUG_FLAG DEBUG_BUFMGR
117
118struct bo_cache_bucket {
119   /** List of cached BOs. */
120   struct list_head head;
121
122   /** Size of this bucket, in bytes. */
123   uint64_t size;
124};
125
126struct bo_export {
127   /** File descriptor associated with a handle export. */
128   int drm_fd;
129
130   /** GEM handle in drm_fd */
131   uint32_t gem_handle;
132
133   struct list_head link;
134};
135
136struct crocus_bufmgr {
137   /**
138    * List into the list of bufmgr.
139    */
140   struct list_head link;
141
142   uint32_t refcount;
143
144   int fd;
145
146   simple_mtx_t lock;
147
148   /** Array of lists of cached gem objects of power-of-two sizes */
149   struct bo_cache_bucket cache_bucket[14 * 4];
150   int num_buckets;
151   time_t time;
152
153   struct hash_table *name_table;
154   struct hash_table *handle_table;
155
156   /**
157    * List of BOs which we've effectively freed, but are hanging on to
158    * until they're idle before closing and returning the VMA.
159    */
160   struct list_head zombie_list;
161
162   bool has_llc:1;
163   bool has_mmap_offset:1;
164   bool has_tiling_uapi:1;
165   bool bo_reuse:1;
166};
167
168static simple_mtx_t global_bufmgr_list_mutex = _SIMPLE_MTX_INITIALIZER_NP;
169static struct list_head global_bufmgr_list = {
170   .next = &global_bufmgr_list,
171   .prev = &global_bufmgr_list,
172};
173
174static int bo_set_tiling_internal(struct crocus_bo *bo, uint32_t tiling_mode,
175                                  uint32_t stride);
176
177static void bo_free(struct crocus_bo *bo);
178
179static uint32_t
180key_hash_uint(const void *key)
181{
182   return _mesa_hash_data(key, 4);
183}
184
185static bool
186key_uint_equal(const void *a, const void *b)
187{
188   return *((unsigned *) a) == *((unsigned *) b);
189}
190
191static struct crocus_bo *
192find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
193{
194   struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
195   struct crocus_bo *bo = entry ? entry->data : NULL;
196
197   if (bo) {
198      assert(bo->external);
199      assert(!bo->reusable);
200
201      /* Being non-reusable, the BO cannot be in the cache lists, but it
202       * may be in the zombie list if it had reached zero references, but
203       * we hadn't yet closed it...and then reimported the same BO.  If it
204       * is, then remove it since it's now been resurrected.
205       */
206      if (bo->head.prev || bo->head.next)
207         list_del(&bo->head);
208
209      crocus_bo_reference(bo);
210   }
211
212   return bo;
213}
214
215/**
216 * This function finds the correct bucket fit for the input size.
217 * The function works with O(1) complexity when the requested size
218 * was queried instead of iterating the size through all the buckets.
219 */
220static struct bo_cache_bucket *
221bucket_for_size(struct crocus_bufmgr *bufmgr, uint64_t size)
222{
223   /* Calculating the pages and rounding up to the page size. */
224   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
225
226   /* Row  Bucket sizes    clz((x-1) | 3)   Row    Column
227    *        in pages                      stride   size
228    *   0:   1  2  3  4 -> 30 30 30 30        4       1
229    *   1:   5  6  7  8 -> 29 29 29 29        4       1
230    *   2:  10 12 14 16 -> 28 28 28 28        8       2
231    *   3:  20 24 28 32 -> 27 27 27 27       16       4
232    */
233   const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
234   const unsigned row_max_pages = 4 << row;
235
236   /* The '& ~2' is the special case for row 1. In row 1, max pages /
237    * 2 is 2, but the previous row maximum is zero (because there is
238    * no previous row). All row maximum sizes are power of 2, so that
239    * is the only case where that bit will be set.
240    */
241   const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
242   int col_size_log2 = row - 1;
243   col_size_log2 += (col_size_log2 < 0);
244
245   const unsigned col = (pages - prev_row_max_pages +
246                         ((1 << col_size_log2) - 1)) >> col_size_log2;
247
248   /* Calculating the index based on the row and column. */
249   const unsigned index = (row * 4) + (col - 1);
250
251   return (index < bufmgr->num_buckets) ?
252          &bufmgr->cache_bucket[index] : NULL;
253}
254
255
256int
257crocus_bo_busy(struct crocus_bo *bo)
258{
259   struct crocus_bufmgr *bufmgr = bo->bufmgr;
260   struct drm_i915_gem_busy busy = { .handle = bo->gem_handle };
261
262   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
263   if (ret == 0) {
264      bo->idle = !busy.busy;
265      return busy.busy;
266   }
267   return false;
268}
269
270int
271crocus_bo_madvise(struct crocus_bo *bo, int state)
272{
273   struct drm_i915_gem_madvise madv = {
274      .handle = bo->gem_handle,
275      .madv = state,
276      .retained = 1,
277   };
278
279   intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
280
281   return madv.retained;
282}
283
284static struct crocus_bo *
285bo_calloc(void)
286{
287   struct crocus_bo *bo = calloc(1, sizeof(*bo));
288   if (!bo)
289      return NULL;
290
291   list_inithead(&bo->exports);
292   bo->hash = _mesa_hash_pointer(bo);
293   return bo;
294}
295
296static struct crocus_bo *
297alloc_bo_from_cache(struct crocus_bufmgr *bufmgr,
298                    struct bo_cache_bucket *bucket,
299                    uint32_t alignment,
300                    unsigned flags)
301{
302   if (!bucket)
303      return NULL;
304
305   struct crocus_bo *bo = NULL;
306
307   list_for_each_entry_safe(struct crocus_bo, cur, &bucket->head, head) {
308      /* If the last BO in the cache is busy, there are no idle BOs.  Bail,
309       * either falling back to a non-matching memzone, or if that fails,
310       * allocating a fresh buffer.
311       */
312      if (crocus_bo_busy(cur))
313         return NULL;
314
315      list_del(&cur->head);
316
317      /* Tell the kernel we need this BO.  If it still exists, we're done! */
318      if (crocus_bo_madvise(cur, I915_MADV_WILLNEED)) {
319         bo = cur;
320         break;
321      }
322
323      /* This BO was purged, throw it out and keep looking. */
324      bo_free(cur);
325   }
326
327   if (!bo)
328      return NULL;
329
330   /* Zero the contents if necessary.  If this fails, fall back to
331    * allocating a fresh BO, which will always be zeroed by the kernel.
332    */
333   if (flags & BO_ALLOC_ZEROED) {
334      void *map = crocus_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
335      if (map) {
336         memset(map, 0, bo->size);
337      } else {
338         bo_free(bo);
339         return NULL;
340      }
341   }
342
343   return bo;
344}
345
346static struct crocus_bo *
347alloc_fresh_bo(struct crocus_bufmgr *bufmgr, uint64_t bo_size)
348{
349   struct crocus_bo *bo = bo_calloc();
350   if (!bo)
351      return NULL;
352
353   struct drm_i915_gem_create create = { .size = bo_size };
354
355   /* All new BOs we get from the kernel are zeroed, so we don't need to
356    * worry about that here.
357    */
358   if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create) != 0) {
359      free(bo);
360      return NULL;
361   }
362
363   bo->gem_handle = create.handle;
364   bo->bufmgr = bufmgr;
365   bo->size = bo_size;
366   bo->idle = true;
367   bo->tiling_mode = I915_TILING_NONE;
368   bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
369   bo->stride = 0;
370
371   /* Calling set_domain() will allocate pages for the BO outside of the
372    * struct mutex lock in the kernel, which is more efficient than waiting
373    * to create them during the first execbuf that uses the BO.
374    */
375   struct drm_i915_gem_set_domain sd = {
376      .handle = bo->gem_handle,
377      .read_domains = I915_GEM_DOMAIN_CPU,
378      .write_domain = 0,
379   };
380
381   if (intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) {
382      bo_free(bo);
383      return NULL;
384   }
385
386   return bo;
387}
388
389static struct crocus_bo *
390bo_alloc_internal(struct crocus_bufmgr *bufmgr,
391                  const char *name,
392                  uint64_t size,
393                  uint32_t alignment,
394                  unsigned flags,
395                  uint32_t tiling_mode,
396                  uint32_t stride)
397{
398   struct crocus_bo *bo;
399   unsigned int page_size = getpagesize();
400   struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size);
401
402   /* Round the size up to the bucket size, or if we don't have caching
403    * at this size, a multiple of the page size.
404    */
405   uint64_t bo_size =
406      bucket ? bucket->size : MAX2(ALIGN(size, page_size), page_size);
407
408   simple_mtx_lock(&bufmgr->lock);
409
410   /* Get a buffer out of the cache if available.  First, we try to find
411    * one with a matching memory zone so we can avoid reallocating VMA.
412    */
413   bo = alloc_bo_from_cache(bufmgr, bucket, alignment, flags);
414
415   simple_mtx_unlock(&bufmgr->lock);
416
417   if (!bo) {
418      bo = alloc_fresh_bo(bufmgr, bo_size);
419      if (!bo)
420         return NULL;
421   }
422
423   if (bo_set_tiling_internal(bo, tiling_mode, stride))
424      goto err_free;
425
426   bo->name = name;
427   p_atomic_set(&bo->refcount, 1);
428   bo->reusable = bucket && bufmgr->bo_reuse;
429   bo->cache_coherent = bufmgr->has_llc;
430   bo->index = -1;
431   bo->kflags = 0;
432
433   if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) {
434      struct drm_i915_gem_caching arg = {
435         .handle = bo->gem_handle,
436         .caching = 1,
437      };
438      if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) == 0) {
439         bo->cache_coherent = true;
440         bo->reusable = false;
441      }
442   }
443
444   DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle,
445       bo->name, (unsigned long long) size);
446
447   return bo;
448
449err_free:
450   bo_free(bo);
451   return NULL;
452}
453
454struct crocus_bo *
455crocus_bo_alloc(struct crocus_bufmgr *bufmgr,
456                const char *name,
457                uint64_t size)
458{
459   return bo_alloc_internal(bufmgr, name, size, 1,
460                            0, I915_TILING_NONE, 0);
461}
462
463struct crocus_bo *
464crocus_bo_alloc_tiled(struct crocus_bufmgr *bufmgr, const char *name,
465                      uint64_t size, uint32_t alignment,
466                      uint32_t tiling_mode, uint32_t pitch, unsigned flags)
467{
468   return bo_alloc_internal(bufmgr, name, size, alignment,
469                            flags, tiling_mode, pitch);
470}
471
472struct crocus_bo *
473crocus_bo_create_userptr(struct crocus_bufmgr *bufmgr, const char *name,
474                         void *ptr, size_t size)
475{
476   struct crocus_bo *bo;
477
478   bo = bo_calloc();
479   if (!bo)
480      return NULL;
481
482   struct drm_i915_gem_userptr arg = {
483      .user_ptr = (uintptr_t)ptr,
484      .user_size = size,
485   };
486   if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg))
487      goto err_free;
488   bo->gem_handle = arg.handle;
489
490   /* Check the buffer for validity before we try and use it in a batch */
491   struct drm_i915_gem_set_domain sd = {
492      .handle = bo->gem_handle,
493      .read_domains = I915_GEM_DOMAIN_CPU,
494   };
495   if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd))
496      goto err_close;
497
498   bo->name = name;
499   bo->size = size;
500   bo->map_cpu = ptr;
501
502   bo->bufmgr = bufmgr;
503   bo->kflags = 0;
504
505   p_atomic_set(&bo->refcount, 1);
506   bo->userptr = true;
507   bo->cache_coherent = true;
508   bo->index = -1;
509   bo->idle = true;
510
511   return bo;
512
513err_close:
514   intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &bo->gem_handle);
515err_free:
516   free(bo);
517   return NULL;
518}
519
520/**
521 * Returns a crocus_bo wrapping the given buffer object handle.
522 *
523 * This can be used when one application needs to pass a buffer object
524 * to another.
525 */
526struct crocus_bo *
527crocus_bo_gem_create_from_name(struct crocus_bufmgr *bufmgr,
528                               const char *name, unsigned int handle)
529{
530   struct crocus_bo *bo;
531
532   /* At the moment most applications only have a few named bo.
533    * For instance, in a DRI client only the render buffers passed
534    * between X and the client are named. And since X returns the
535    * alternating names for the front/back buffer a linear search
536    * provides a sufficiently fast match.
537    */
538   simple_mtx_lock(&bufmgr->lock);
539   bo = find_and_ref_external_bo(bufmgr->name_table, handle);
540   if (bo)
541      goto out;
542
543   struct drm_gem_open open_arg = { .name = handle };
544   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
545   if (ret != 0) {
546      DBG("Couldn't reference %s handle 0x%08x: %s\n",
547          name, handle, strerror(errno));
548      bo = NULL;
549      goto out;
550   }
551   /* Now see if someone has used a prime handle to get this
552    * object from the kernel before by looking through the list
553    * again for a matching gem_handle
554    */
555   bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
556   if (bo)
557      goto out;
558
559   bo = bo_calloc();
560   if (!bo)
561      goto out;
562
563   p_atomic_set(&bo->refcount, 1);
564
565   bo->size = open_arg.size;
566   bo->gtt_offset = 0;
567   bo->bufmgr = bufmgr;
568   bo->gem_handle = open_arg.handle;
569   bo->name = name;
570   bo->global_name = handle;
571   bo->reusable = false;
572   bo->external = true;
573   bo->kflags = 0;
574
575   _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
576   _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
577
578   struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
579   ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
580   if (ret != 0)
581      goto err_unref;
582
583   bo->tiling_mode = get_tiling.tiling_mode;
584   bo->swizzle_mode = get_tiling.swizzle_mode;
585   /* XXX stride is unknown */
586   DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
587
588out:
589   simple_mtx_unlock(&bufmgr->lock);
590   return bo;
591
592err_unref:
593   bo_free(bo);
594   simple_mtx_unlock(&bufmgr->lock);
595   return NULL;
596}
597
598static void
599bo_close(struct crocus_bo *bo)
600{
601   struct crocus_bufmgr *bufmgr = bo->bufmgr;
602
603   if (bo->external) {
604      struct hash_entry *entry;
605
606      if (bo->global_name) {
607         entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
608         _mesa_hash_table_remove(bufmgr->name_table, entry);
609      }
610
611      entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
612      _mesa_hash_table_remove(bufmgr->handle_table, entry);
613
614      list_for_each_entry_safe(struct bo_export, export, &bo->exports, link) {
615         struct drm_gem_close close = { .handle = export->gem_handle };
616         intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close);
617
618         list_del(&export->link);
619         free(export);
620      }
621   } else {
622      assert(list_is_empty(&bo->exports));
623   }
624
625   /* Close this object */
626   struct drm_gem_close close = { .handle = bo->gem_handle };
627   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
628   if (ret != 0) {
629      DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
630          bo->gem_handle, bo->name, strerror(errno));
631   }
632
633   free(bo);
634}
635
636static void
637bo_free(struct crocus_bo *bo)
638{
639   struct crocus_bufmgr *bufmgr = bo->bufmgr;
640
641   if (bo->map_cpu && !bo->userptr) {
642      VG_NOACCESS(bo->map_cpu, bo->size);
643      munmap(bo->map_cpu, bo->size);
644   }
645   if (bo->map_wc) {
646      VG_NOACCESS(bo->map_wc, bo->size);
647      munmap(bo->map_wc, bo->size);
648   }
649   if (bo->map_gtt) {
650      VG_NOACCESS(bo->map_gtt, bo->size);
651      munmap(bo->map_gtt, bo->size);
652   }
653
654   if (bo->idle) {
655      bo_close(bo);
656   } else {
657      /* Defer closing the GEM BO and returning the VMA for reuse until the
658       * BO is idle.  Just move it to the dead list for now.
659       */
660      list_addtail(&bo->head, &bufmgr->zombie_list);
661   }
662}
663
664/** Frees all cached buffers significantly older than @time. */
665static void
666cleanup_bo_cache(struct crocus_bufmgr *bufmgr, time_t time)
667{
668   int i;
669
670   if (bufmgr->time == time)
671      return;
672
673   for (i = 0; i < bufmgr->num_buckets; i++) {
674      struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
675
676      list_for_each_entry_safe(struct crocus_bo, bo, &bucket->head, head) {
677         if (time - bo->free_time <= 1)
678            break;
679
680         list_del(&bo->head);
681
682         bo_free(bo);
683      }
684   }
685
686   list_for_each_entry_safe(struct crocus_bo, bo, &bufmgr->zombie_list, head) {
687      /* Stop once we reach a busy BO - all others past this point were
688       * freed more recently so are likely also busy.
689       */
690      if (!bo->idle && crocus_bo_busy(bo))
691         break;
692
693      list_del(&bo->head);
694      bo_close(bo);
695   }
696
697   bufmgr->time = time;
698}
699
700static void
701bo_unreference_final(struct crocus_bo *bo, time_t time)
702{
703   struct crocus_bufmgr *bufmgr = bo->bufmgr;
704   struct bo_cache_bucket *bucket;
705
706   DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
707
708   bucket = NULL;
709   if (bo->reusable)
710      bucket = bucket_for_size(bufmgr, bo->size);
711   /* Put the buffer into our internal cache for reuse if we can. */
712   if (bucket && crocus_bo_madvise(bo, I915_MADV_DONTNEED)) {
713      bo->free_time = time;
714      bo->name = NULL;
715
716      list_addtail(&bo->head, &bucket->head);
717   } else {
718      bo_free(bo);
719   }
720}
721
722void
723__crocus_bo_unreference(struct crocus_bo *bo)
724{
725   struct crocus_bufmgr *bufmgr = bo->bufmgr;
726   struct timespec time;
727
728   clock_gettime(CLOCK_MONOTONIC, &time);
729
730   simple_mtx_lock(&bufmgr->lock);
731
732   if (p_atomic_dec_zero(&bo->refcount)) {
733      bo_unreference_final(bo, time.tv_sec);
734      cleanup_bo_cache(bufmgr, time.tv_sec);
735   }
736
737   simple_mtx_unlock(&bufmgr->lock);
738}
739
740static void
741bo_wait_with_stall_warning(struct pipe_debug_callback *dbg,
742                           struct crocus_bo *bo,
743                           const char *action)
744{
745   bool busy = dbg && !bo->idle;
746   double elapsed = unlikely(busy) ? -get_time() : 0.0;
747
748   crocus_bo_wait_rendering(bo);
749
750   if (unlikely(busy)) {
751      elapsed += get_time();
752      if (elapsed > 1e-5) /* 0.01ms */ {
753         perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
754                    action, bo->name, elapsed * 1000);
755      }
756   }
757}
758
759static void
760print_flags(unsigned flags)
761{
762   if (flags & MAP_READ)
763      DBG("READ ");
764   if (flags & MAP_WRITE)
765      DBG("WRITE ");
766   if (flags & MAP_ASYNC)
767      DBG("ASYNC ");
768   if (flags & MAP_PERSISTENT)
769      DBG("PERSISTENT ");
770   if (flags & MAP_COHERENT)
771      DBG("COHERENT ");
772   if (flags & MAP_RAW)
773      DBG("RAW ");
774   DBG("\n");
775}
776
777static void *
778crocus_bo_gem_mmap_legacy(struct pipe_debug_callback *dbg,
779                          struct crocus_bo *bo, bool wc)
780{
781   struct crocus_bufmgr *bufmgr = bo->bufmgr;
782
783   struct drm_i915_gem_mmap mmap_arg = {
784      .handle = bo->gem_handle,
785      .size = bo->size,
786      .flags = wc ? I915_MMAP_WC : 0,
787   };
788
789   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
790   if (ret != 0) {
791      DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
792          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
793      return NULL;
794   }
795   void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
796
797   return map;
798}
799
800static void *
801crocus_bo_gem_mmap_offset(struct pipe_debug_callback *dbg, struct crocus_bo *bo,
802                          bool wc)
803{
804   struct crocus_bufmgr *bufmgr = bo->bufmgr;
805
806   struct drm_i915_gem_mmap_offset mmap_arg = {
807      .handle = bo->gem_handle,
808      .flags = wc ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB,
809   };
810
811   /* Get the fake offset back */
812   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg);
813   if (ret != 0) {
814      DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
815          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
816      return NULL;
817   }
818
819   /* And map it */
820   void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
821                    bufmgr->fd, mmap_arg.offset);
822   if (map == MAP_FAILED) {
823      DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
824          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
825      return NULL;
826   }
827
828   return map;
829}
830
831static void *
832crocus_bo_gem_mmap(struct pipe_debug_callback *dbg, struct crocus_bo *bo, bool wc)
833{
834   struct crocus_bufmgr *bufmgr = bo->bufmgr;
835
836   if (bufmgr->has_mmap_offset)
837      return crocus_bo_gem_mmap_offset(dbg, bo, wc);
838   else
839      return crocus_bo_gem_mmap_legacy(dbg, bo, wc);
840}
841
842static void *
843crocus_bo_map_cpu(struct pipe_debug_callback *dbg,
844                  struct crocus_bo *bo, unsigned flags)
845{
846   /* We disallow CPU maps for writing to non-coherent buffers, as the
847    * CPU map can become invalidated when a batch is flushed out, which
848    * can happen at unpredictable times.  You should use WC maps instead.
849    */
850   assert(bo->cache_coherent || !(flags & MAP_WRITE));
851
852   if (!bo->map_cpu) {
853      DBG("crocus_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name);
854
855      void *map = crocus_bo_gem_mmap(dbg, bo, false);
856      if (!map) {
857         return NULL;
858      }
859
860      VG_DEFINED(map, bo->size);
861
862      if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) {
863         VG_NOACCESS(map, bo->size);
864         munmap(map, bo->size);
865      }
866   }
867   assert(bo->map_cpu);
868
869   DBG("crocus_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name,
870       bo->map_cpu);
871   print_flags(flags);
872
873   if (!(flags & MAP_ASYNC)) {
874      bo_wait_with_stall_warning(dbg, bo, "CPU mapping");
875   }
876
877   if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
878      /* If we're reusing an existing CPU mapping, the CPU caches may
879       * contain stale data from the last time we read from that mapping.
880       * (With the BO cache, it might even be data from a previous buffer!)
881       * Even if it's a brand new mapping, the kernel may have zeroed the
882       * buffer via CPU writes.
883       *
884       * We need to invalidate those cachelines so that we see the latest
885       * contents, and so long as we only read from the CPU mmap we do not
886       * need to write those cachelines back afterwards.
887       *
888       * On LLC, the emprical evidence suggests that writes from the GPU
889       * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU
890       * cachelines. (Other reads, such as the display engine, bypass the
891       * LLC entirely requiring us to keep dirty pixels for the scanout
892       * out of any cache.)
893       */
894      intel_invalidate_range(bo->map_cpu, bo->size);
895   }
896
897   return bo->map_cpu;
898}
899
900static void *
901crocus_bo_map_wc(struct pipe_debug_callback *dbg,
902                 struct crocus_bo *bo, unsigned flags)
903{
904   if (!bo->map_wc) {
905      DBG("crocus_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name);
906
907      void *map = crocus_bo_gem_mmap(dbg, bo, true);
908      if (!map) {
909         return NULL;
910      }
911
912      VG_DEFINED(map, bo->size);
913
914      if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) {
915         VG_NOACCESS(map, bo->size);
916         munmap(map, bo->size);
917      }
918   }
919   assert(bo->map_wc);
920
921   DBG("crocus_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc);
922   print_flags(flags);
923
924   if (!(flags & MAP_ASYNC)) {
925      bo_wait_with_stall_warning(dbg, bo, "WC mapping");
926   }
927
928   return bo->map_wc;
929}
930
931/**
932 * Perform an uncached mapping via the GTT.
933 *
934 * Write access through the GTT is not quite fully coherent. On low power
935 * systems especially, like modern Atoms, we can observe reads from RAM before
936 * the write via GTT has landed. A write memory barrier that flushes the Write
937 * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later
938 * read after the write as the GTT write suffers a small delay through the GTT
939 * indirection. The kernel uses an uncached mmio read to ensure the GTT write
940 * is ordered with reads (either by the GPU, WB or WC) and unconditionally
941 * flushes prior to execbuf submission. However, if we are not informing the
942 * kernel about our GTT writes, it will not flush before earlier access, such
943 * as when using the cmdparser. Similarly, we need to be careful if we should
944 * ever issue a CPU read immediately following a GTT write.
945 *
946 * Telling the kernel about write access also has one more important
947 * side-effect. Upon receiving notification about the write, it cancels any
948 * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by
949 * either SW_FINISH or DIRTYFB. The presumption is that we never write to the
950 * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR
951 * tracking is handled on the buffer exchange instead.
952 */
953static void *
954crocus_bo_map_gtt(struct pipe_debug_callback *dbg,
955                  struct crocus_bo *bo, unsigned flags)
956{
957   struct crocus_bufmgr *bufmgr = bo->bufmgr;
958
959   /* If we don't support get/set_tiling, there's no support for GTT mapping
960    * either (it won't do any de-tiling for us).
961    */
962   assert(bufmgr->has_tiling_uapi);
963
964   /* Get a mapping of the buffer if we haven't before. */
965   if (bo->map_gtt == NULL) {
966      DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name);
967
968      struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle };
969
970      /* Get the fake offset back... */
971      int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
972      if (ret != 0) {
973         DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
974             __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
975         return NULL;
976      }
977
978      /* and mmap it. */
979      void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE,
980                       MAP_SHARED, bufmgr->fd, mmap_arg.offset);
981      if (map == MAP_FAILED) {
982         DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
983             __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
984         return NULL;
985      }
986
987      /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will
988       * already intercept this mmap call. However, for consistency between
989       * all the mmap paths, we mark the pointer as defined now and mark it
990       * as inaccessible afterwards.
991       */
992      VG_DEFINED(map, bo->size);
993
994      if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) {
995         VG_NOACCESS(map, bo->size);
996         munmap(map, bo->size);
997      }
998   }
999   assert(bo->map_gtt);
1000
1001   DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt);
1002   print_flags(flags);
1003
1004   if (!(flags & MAP_ASYNC)) {
1005      bo_wait_with_stall_warning(dbg, bo, "GTT mapping");
1006   }
1007
1008   return bo->map_gtt;
1009}
1010
1011static bool
1012can_map_cpu(struct crocus_bo *bo, unsigned flags)
1013{
1014   if (bo->cache_coherent)
1015      return true;
1016
1017   /* Even if the buffer itself is not cache-coherent (such as a scanout), on
1018    * an LLC platform reads always are coherent (as they are performed via the
1019    * central system agent). It is just the writes that we need to take special
1020    * care to ensure that land in main memory and not stick in the CPU cache.
1021    */
1022   if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc)
1023      return true;
1024
1025   /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
1026    * across batch flushes where the kernel will change cache domains of the
1027    * bo, invalidating continued access to the CPU mmap on non-LLC device.
1028    *
1029    * Similarly, ASYNC typically means that the buffer will be accessed via
1030    * both the CPU and the GPU simultaneously.  Batches may be executed that
1031    * use the BO even while it is mapped.  While OpenGL technically disallows
1032    * most drawing while non-persistent mappings are active, we may still use
1033    * the GPU for blits or other operations, causing batches to happen at
1034    * inconvenient times.
1035    *
1036    * If RAW is set, we expect the caller to be able to handle a WC buffer
1037    * more efficiently than the involuntary clflushes.
1038    */
1039   if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC | MAP_RAW))
1040      return false;
1041
1042   return !(flags & MAP_WRITE);
1043}
1044
1045void *
1046crocus_bo_map(struct pipe_debug_callback *dbg,
1047              struct crocus_bo *bo, unsigned flags)
1048{
1049   if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
1050      return crocus_bo_map_gtt(dbg, bo, flags);
1051
1052   void *map;
1053
1054   if (can_map_cpu(bo, flags))
1055      map = crocus_bo_map_cpu(dbg, bo, flags);
1056   else
1057      map = crocus_bo_map_wc(dbg, bo, flags);
1058
1059   /* Allow the attempt to fail by falling back to the GTT where necessary.
1060    *
1061    * Not every buffer can be mmaped directly using the CPU (or WC), for
1062    * example buffers that wrap stolen memory or are imported from other
1063    * devices. For those, we have little choice but to use a GTT mmapping.
1064    * However, if we use a slow GTT mmapping for reads where we expected fast
1065    * access, that order of magnitude difference in throughput will be clearly
1066    * expressed by angry users.
1067    *
1068    * We skip MAP_RAW because we want to avoid map_gtt's fence detiling.
1069    */
1070   if (!map && !(flags & MAP_RAW)) {
1071      perf_debug(dbg, "Fallback GTT mapping for %s with access flags %x\n",
1072                 bo->name, flags);
1073      map = crocus_bo_map_gtt(dbg, bo, flags);
1074   }
1075
1076   return map;
1077}
1078
1079/** Waits for all GPU rendering with the object to have completed. */
1080void
1081crocus_bo_wait_rendering(struct crocus_bo *bo)
1082{
1083   /* We require a kernel recent enough for WAIT_IOCTL support.
1084    * See intel_init_bufmgr()
1085    */
1086   crocus_bo_wait(bo, -1);
1087}
1088
1089/**
1090 * Waits on a BO for the given amount of time.
1091 *
1092 * @bo: buffer object to wait for
1093 * @timeout_ns: amount of time to wait in nanoseconds.
1094 *   If value is less than 0, an infinite wait will occur.
1095 *
1096 * Returns 0 if the wait was successful ie. the last batch referencing the
1097 * object has completed within the allotted time. Otherwise some negative return
1098 * value describes the error. Of particular interest is -ETIME when the wait has
1099 * failed to yield the desired result.
1100 *
1101 * Similar to crocus_bo_wait_rendering except a timeout parameter allows
1102 * the operation to give up after a certain amount of time. Another subtle
1103 * difference is the internal locking semantics are different (this variant does
1104 * not hold the lock for the duration of the wait). This makes the wait subject
1105 * to a larger userspace race window.
1106 *
1107 * The implementation shall wait until the object is no longer actively
1108 * referenced within a batch buffer at the time of the call. The wait will
1109 * not guarantee that the buffer is re-issued via another thread, or an flinked
1110 * handle. Userspace must make sure this race does not occur if such precision
1111 * is important.
1112 *
1113 * Note that some kernels have broken the inifite wait for negative values
1114 * promise, upgrade to latest stable kernels if this is the case.
1115 */
1116int
1117crocus_bo_wait(struct crocus_bo *bo, int64_t timeout_ns)
1118{
1119   struct crocus_bufmgr *bufmgr = bo->bufmgr;
1120
1121   /* If we know it's idle, don't bother with the kernel round trip */
1122   if (bo->idle && !bo->external)
1123      return 0;
1124
1125   struct drm_i915_gem_wait wait = {
1126      .bo_handle = bo->gem_handle,
1127      .timeout_ns = timeout_ns,
1128   };
1129   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1130   if (ret != 0)
1131      return -errno;
1132
1133   bo->idle = true;
1134
1135   return ret;
1136}
1137
1138static void
1139crocus_bufmgr_destroy(struct crocus_bufmgr *bufmgr)
1140{
1141   simple_mtx_destroy(&bufmgr->lock);
1142
1143   /* Free any cached buffer objects we were going to reuse */
1144   for (int i = 0; i < bufmgr->num_buckets; i++) {
1145      struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
1146
1147      list_for_each_entry_safe(struct crocus_bo, bo, &bucket->head, head) {
1148         list_del(&bo->head);
1149
1150         bo_free(bo);
1151      }
1152   }
1153
1154   /* Close any buffer objects on the dead list. */
1155   list_for_each_entry_safe(struct crocus_bo, bo, &bufmgr->zombie_list, head) {
1156      list_del(&bo->head);
1157      bo_close(bo);
1158   }
1159
1160   _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1161   _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1162
1163   close(bufmgr->fd);
1164
1165   free(bufmgr);
1166}
1167
1168static int
1169bo_set_tiling_internal(struct crocus_bo *bo, uint32_t tiling_mode,
1170                       uint32_t stride)
1171{
1172   struct crocus_bufmgr *bufmgr = bo->bufmgr;
1173   struct drm_i915_gem_set_tiling set_tiling;
1174   int ret;
1175
1176   if (bo->global_name == 0 &&
1177       tiling_mode == bo->tiling_mode && stride == bo->stride)
1178      return 0;
1179
1180   memset(&set_tiling, 0, sizeof(set_tiling));
1181   do {
1182      /* set_tiling is slightly broken and overwrites the
1183       * input on the error path, so we have to open code
1184       * drm_ioctl.
1185       */
1186      set_tiling.handle = bo->gem_handle;
1187      set_tiling.tiling_mode = tiling_mode;
1188      set_tiling.stride = stride;
1189
1190      ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
1191   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
1192   if (ret == -1)
1193      return -errno;
1194
1195   bo->tiling_mode = set_tiling.tiling_mode;
1196   bo->swizzle_mode = set_tiling.swizzle_mode;
1197   bo->stride = set_tiling.stride;
1198   return 0;
1199}
1200
1201int
1202crocus_bo_get_tiling(struct crocus_bo *bo, uint32_t *tiling_mode,
1203                     uint32_t *swizzle_mode)
1204{
1205   *tiling_mode = bo->tiling_mode;
1206   *swizzle_mode = bo->swizzle_mode;
1207   return 0;
1208}
1209
1210struct crocus_bo *
1211crocus_bo_import_dmabuf(struct crocus_bufmgr *bufmgr, int prime_fd,
1212                        uint64_t modifier)
1213{
1214   uint32_t handle;
1215   struct crocus_bo *bo;
1216
1217   simple_mtx_lock(&bufmgr->lock);
1218   int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1219   if (ret) {
1220      DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1221          strerror(errno));
1222      simple_mtx_unlock(&bufmgr->lock);
1223      return NULL;
1224   }
1225
1226   /*
1227    * See if the kernel has already returned this buffer to us. Just as
1228    * for named buffers, we must not create two bo's pointing at the same
1229    * kernel object
1230    */
1231   bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1232   if (bo)
1233      goto out;
1234
1235   bo = bo_calloc();
1236   if (!bo)
1237      goto out;
1238
1239   p_atomic_set(&bo->refcount, 1);
1240
1241   /* Determine size of bo.  The fd-to-handle ioctl really should
1242    * return the size, but it doesn't.  If we have kernel 3.12 or
1243    * later, we can lseek on the prime fd to get the size.  Older
1244    * kernels will just fail, in which case we fall back to the
1245    * provided (estimated or guess size). */
1246   ret = lseek(prime_fd, 0, SEEK_END);
1247   if (ret != -1)
1248      bo->size = ret;
1249
1250   bo->bufmgr = bufmgr;
1251   bo->name = "prime";
1252   bo->reusable = false;
1253   bo->external = true;
1254   bo->kflags = 0;
1255   bo->gem_handle = handle;
1256   _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1257
1258   const struct isl_drm_modifier_info *mod_info =
1259      isl_drm_modifier_get_info(modifier);
1260   if (mod_info) {
1261      bo->tiling_mode = isl_tiling_to_i915_tiling(mod_info->tiling);
1262   } else if (bufmgr->has_tiling_uapi) {
1263      struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
1264      if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
1265         goto err;
1266
1267      bo->tiling_mode = get_tiling.tiling_mode;
1268   } else {
1269      bo->tiling_mode = I915_TILING_NONE;
1270   }
1271
1272out:
1273   simple_mtx_unlock(&bufmgr->lock);
1274   return bo;
1275
1276err:
1277   bo_free(bo);
1278   simple_mtx_unlock(&bufmgr->lock);
1279   return NULL;
1280}
1281
1282struct crocus_bo *
1283crocus_bo_import_dmabuf_no_mods(struct crocus_bufmgr *bufmgr,
1284                                int prime_fd)
1285{
1286   uint32_t handle;
1287   struct crocus_bo *bo;
1288
1289   simple_mtx_lock(&bufmgr->lock);
1290   int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1291   if (ret) {
1292      DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1293          strerror(errno));
1294      simple_mtx_unlock(&bufmgr->lock);
1295      return NULL;
1296   }
1297
1298   /*
1299    * See if the kernel has already returned this buffer to us. Just as
1300    * for named buffers, we must not create two bo's pointing at the same
1301    * kernel object
1302    */
1303   bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1304   if (bo)
1305      goto out;
1306
1307   bo = bo_calloc();
1308   if (!bo)
1309      goto out;
1310
1311   p_atomic_set(&bo->refcount, 1);
1312
1313   /* Determine size of bo.  The fd-to-handle ioctl really should
1314    * return the size, but it doesn't.  If we have kernel 3.12 or
1315    * later, we can lseek on the prime fd to get the size.  Older
1316    * kernels will just fail, in which case we fall back to the
1317    * provided (estimated or guess size). */
1318   ret = lseek(prime_fd, 0, SEEK_END);
1319   if (ret != -1)
1320      bo->size = ret;
1321
1322   bo->bufmgr = bufmgr;
1323   bo->name = "prime";
1324   bo->reusable = false;
1325   bo->external = true;
1326   bo->kflags = 0;
1327   bo->gem_handle = handle;
1328   _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1329
1330out:
1331   simple_mtx_unlock(&bufmgr->lock);
1332   return bo;
1333}
1334
1335static void
1336crocus_bo_make_external_locked(struct crocus_bo *bo)
1337{
1338   if (!bo->external) {
1339      _mesa_hash_table_insert(bo->bufmgr->handle_table, &bo->gem_handle, bo);
1340      bo->external = true;
1341      bo->reusable = false;
1342   }
1343}
1344
1345static void
1346crocus_bo_make_external(struct crocus_bo *bo)
1347{
1348   struct crocus_bufmgr *bufmgr = bo->bufmgr;
1349
1350   if (bo->external) {
1351      assert(!bo->reusable);
1352      return;
1353   }
1354
1355   simple_mtx_lock(&bufmgr->lock);
1356   crocus_bo_make_external_locked(bo);
1357   simple_mtx_unlock(&bufmgr->lock);
1358}
1359
1360int
1361crocus_bo_export_dmabuf(struct crocus_bo *bo, int *prime_fd)
1362{
1363   struct crocus_bufmgr *bufmgr = bo->bufmgr;
1364
1365   crocus_bo_make_external(bo);
1366
1367   if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1368                          DRM_CLOEXEC, prime_fd) != 0)
1369      return -errno;
1370
1371   return 0;
1372}
1373
1374uint32_t
1375crocus_bo_export_gem_handle(struct crocus_bo *bo)
1376{
1377   crocus_bo_make_external(bo);
1378
1379   return bo->gem_handle;
1380}
1381
1382int
1383crocus_bo_flink(struct crocus_bo *bo, uint32_t *name)
1384{
1385   struct crocus_bufmgr *bufmgr = bo->bufmgr;
1386
1387   if (!bo->global_name) {
1388      struct drm_gem_flink flink = { .handle = bo->gem_handle };
1389
1390      if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
1391         return -errno;
1392
1393      simple_mtx_lock(&bufmgr->lock);
1394      if (!bo->global_name) {
1395         crocus_bo_make_external_locked(bo);
1396         bo->global_name = flink.name;
1397         _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
1398      }
1399      simple_mtx_unlock(&bufmgr->lock);
1400   }
1401
1402   *name = bo->global_name;
1403   return 0;
1404}
1405
1406int
1407crocus_bo_export_gem_handle_for_device(struct crocus_bo *bo, int drm_fd,
1408                                       uint32_t *out_handle)
1409{
1410   /* Only add the new GEM handle to the list of export if it belongs to a
1411    * different GEM device. Otherwise we might close the same buffer multiple
1412    * times.
1413    */
1414   struct crocus_bufmgr *bufmgr = bo->bufmgr;
1415   int ret = os_same_file_description(drm_fd, bufmgr->fd);
1416   WARN_ONCE(ret < 0,
1417             "Kernel has no file descriptor comparison support: %s\n",
1418             strerror(errno));
1419   if (ret == 0) {
1420      *out_handle = crocus_bo_export_gem_handle(bo);
1421      return 0;
1422   }
1423
1424   struct bo_export *export = calloc(1, sizeof(*export));
1425   if (!export)
1426      return -ENOMEM;
1427
1428   export->drm_fd = drm_fd;
1429
1430   int dmabuf_fd = -1;
1431   int err = crocus_bo_export_dmabuf(bo, &dmabuf_fd);
1432   if (err) {
1433      free(export);
1434      return err;
1435   }
1436
1437   simple_mtx_lock(&bufmgr->lock);
1438   err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
1439   close(dmabuf_fd);
1440   if (err) {
1441      simple_mtx_unlock(&bufmgr->lock);
1442      free(export);
1443      return err;
1444   }
1445
1446   bool found = false;
1447   list_for_each_entry(struct bo_export, iter, &bo->exports, link) {
1448      if (iter->drm_fd != drm_fd)
1449         continue;
1450      /* Here we assume that for a given DRM fd, we'll always get back the
1451       * same GEM handle for a given buffer.
1452       */
1453      assert(iter->gem_handle == export->gem_handle);
1454      free(export);
1455      export = iter;
1456      found = true;
1457      break;
1458   }
1459   if (!found)
1460      list_addtail(&export->link, &bo->exports);
1461
1462   simple_mtx_unlock(&bufmgr->lock);
1463
1464   *out_handle = export->gem_handle;
1465
1466   return 0;
1467}
1468
1469static void
1470add_bucket(struct crocus_bufmgr *bufmgr, int size)
1471{
1472   unsigned int i = bufmgr->num_buckets;
1473
1474   assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
1475
1476   list_inithead(&bufmgr->cache_bucket[i].head);
1477   bufmgr->cache_bucket[i].size = size;
1478   bufmgr->num_buckets++;
1479
1480   assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]);
1481   assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]);
1482   assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]);
1483}
1484
1485static void
1486init_cache_buckets(struct crocus_bufmgr *bufmgr)
1487{
1488   uint64_t size, cache_max_size = 64 * 1024 * 1024;
1489
1490   /* OK, so power of two buckets was too wasteful of memory.
1491    * Give 3 other sizes between each power of two, to hopefully
1492    * cover things accurately enough.  (The alternative is
1493    * probably to just go for exact matching of sizes, and assume
1494    * that for things like composited window resize the tiled
1495    * width/height alignment and rounding of sizes to pages will
1496    * get us useful cache hit rates anyway)
1497    */
1498   add_bucket(bufmgr, PAGE_SIZE);
1499   add_bucket(bufmgr, PAGE_SIZE * 2);
1500   add_bucket(bufmgr, PAGE_SIZE * 3);
1501
1502   /* Initialize the linked lists for BO reuse cache. */
1503   for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
1504      add_bucket(bufmgr, size);
1505
1506      add_bucket(bufmgr, size + size * 1 / 4);
1507      add_bucket(bufmgr, size + size * 2 / 4);
1508      add_bucket(bufmgr, size + size * 3 / 4);
1509   }
1510}
1511
1512uint32_t
1513crocus_create_hw_context(struct crocus_bufmgr *bufmgr)
1514{
1515   struct drm_i915_gem_context_create create = { };
1516   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
1517   if (ret != 0) {
1518      DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno));
1519      return 0;
1520   }
1521
1522   /* Upon declaring a GPU hang, the kernel will zap the guilty context
1523    * back to the default logical HW state and attempt to continue on to
1524    * our next submitted batchbuffer.  However, our render batches assume
1525    * the previous GPU state is preserved, and only emit commands needed
1526    * to incrementally change that state.  In particular, we inherit the
1527    * STATE_BASE_ADDRESS and PIPELINE_SELECT settings, which are critical.
1528    * With default base addresses, our next batches will almost certainly
1529    * cause more GPU hangs, leading to repeated hangs until we're banned
1530    * or the machine is dead.
1531    *
1532    * Here we tell the kernel not to attempt to recover our context but
1533    * immediately (on the next batchbuffer submission) report that the
1534    * context is lost, and we will do the recovery ourselves.  Ideally,
1535    * we'll have two lost batches instead of a continual stream of hangs.
1536    */
1537   struct drm_i915_gem_context_param p = {
1538      .ctx_id = create.ctx_id,
1539      .param = I915_CONTEXT_PARAM_RECOVERABLE,
1540      .value = false,
1541   };
1542   drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p);
1543
1544   return create.ctx_id;
1545}
1546
1547static int
1548crocus_hw_context_get_priority(struct crocus_bufmgr *bufmgr, uint32_t ctx_id)
1549{
1550   struct drm_i915_gem_context_param p = {
1551      .ctx_id = ctx_id,
1552      .param = I915_CONTEXT_PARAM_PRIORITY,
1553   };
1554   drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p);
1555   return p.value; /* on error, return 0 i.e. default priority */
1556}
1557
1558int
1559crocus_hw_context_set_priority(struct crocus_bufmgr *bufmgr,
1560                               uint32_t ctx_id,
1561                               int priority)
1562{
1563   struct drm_i915_gem_context_param p = {
1564      .ctx_id = ctx_id,
1565      .param = I915_CONTEXT_PARAM_PRIORITY,
1566      .value = priority,
1567   };
1568   int err;
1569
1570   err = 0;
1571   if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
1572      err = -errno;
1573
1574   return err;
1575}
1576
1577uint32_t
1578crocus_clone_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id)
1579{
1580   uint32_t new_ctx = crocus_create_hw_context(bufmgr);
1581
1582   if (new_ctx) {
1583      int priority = crocus_hw_context_get_priority(bufmgr, ctx_id);
1584      crocus_hw_context_set_priority(bufmgr, new_ctx, priority);
1585   }
1586
1587   return new_ctx;
1588}
1589
1590void
1591crocus_destroy_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id)
1592{
1593   struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id };
1594
1595   if (ctx_id != 0 &&
1596       intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) {
1597      fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
1598              strerror(errno));
1599   }
1600}
1601
1602int
1603crocus_reg_read(struct crocus_bufmgr *bufmgr, uint32_t offset, uint64_t *result)
1604{
1605   struct drm_i915_reg_read reg_read = { .offset = offset };
1606   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
1607
1608   *result = reg_read.val;
1609   return ret;
1610}
1611
1612static int
1613gem_param(int fd, int name)
1614{
1615   int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */
1616
1617   struct drm_i915_getparam gp = { .param = name, .value = &v };
1618   if (intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
1619      return -1;
1620
1621   return v;
1622}
1623
1624/**
1625 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
1626 * and manage map buffer objections.
1627 *
1628 * \param fd File descriptor of the opened DRM device.
1629 */
1630static struct crocus_bufmgr *
1631crocus_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
1632{
1633   struct crocus_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
1634   if (bufmgr == NULL)
1635      return NULL;
1636
1637   /* Handles to buffer objects belong to the device fd and are not
1638    * reference counted by the kernel.  If the same fd is used by
1639    * multiple parties (threads sharing the same screen bufmgr, or
1640    * even worse the same device fd passed to multiple libraries)
1641    * ownership of those handles is shared by those independent parties.
1642    *
1643    * Don't do this! Ensure that each library/bufmgr has its own device
1644    * fd so that its namespace does not clash with another.
1645    */
1646   bufmgr->fd = os_dupfd_cloexec(fd);
1647
1648   p_atomic_set(&bufmgr->refcount, 1);
1649
1650   simple_mtx_init(&bufmgr->lock, mtx_plain);
1651
1652   list_inithead(&bufmgr->zombie_list);
1653
1654   bufmgr->has_llc = devinfo->has_llc;
1655   bufmgr->has_tiling_uapi = devinfo->has_tiling_uapi;
1656   bufmgr->bo_reuse = bo_reuse;
1657   bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
1658
1659   init_cache_buckets(bufmgr);
1660
1661   bufmgr->name_table =
1662      _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
1663   bufmgr->handle_table =
1664      _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
1665
1666   return bufmgr;
1667}
1668
1669static struct crocus_bufmgr *
1670crocus_bufmgr_ref(struct crocus_bufmgr *bufmgr)
1671{
1672   p_atomic_inc(&bufmgr->refcount);
1673   return bufmgr;
1674}
1675
1676void
1677crocus_bufmgr_unref(struct crocus_bufmgr *bufmgr)
1678{
1679   simple_mtx_lock(&global_bufmgr_list_mutex);
1680   if (p_atomic_dec_zero(&bufmgr->refcount)) {
1681      list_del(&bufmgr->link);
1682      crocus_bufmgr_destroy(bufmgr);
1683   }
1684   simple_mtx_unlock(&global_bufmgr_list_mutex);
1685}
1686
1687/**
1688 * Gets an already existing GEM buffer manager or create a new one.
1689 *
1690 * \param fd File descriptor of the opened DRM device.
1691 */
1692struct crocus_bufmgr *
1693crocus_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd, bool bo_reuse)
1694{
1695   struct stat st;
1696
1697   if (fstat(fd, &st))
1698      return NULL;
1699
1700   struct crocus_bufmgr *bufmgr = NULL;
1701
1702   simple_mtx_lock(&global_bufmgr_list_mutex);
1703   list_for_each_entry(struct crocus_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
1704      struct stat iter_st;
1705      if (fstat(iter_bufmgr->fd, &iter_st))
1706         continue;
1707
1708      if (st.st_rdev == iter_st.st_rdev) {
1709         assert(iter_bufmgr->bo_reuse == bo_reuse);
1710         bufmgr = crocus_bufmgr_ref(iter_bufmgr);
1711         goto unlock;
1712      }
1713   }
1714
1715   bufmgr = crocus_bufmgr_create(devinfo, fd, bo_reuse);
1716   if (bufmgr)
1717      list_addtail(&bufmgr->link, &global_bufmgr_list);
1718
1719 unlock:
1720   simple_mtx_unlock(&global_bufmgr_list_mutex);
1721
1722   return bufmgr;
1723}
1724
1725int
1726crocus_bufmgr_get_fd(struct crocus_bufmgr *bufmgr)
1727{
1728   return bufmgr->fd;
1729}
1730