1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file iris_bufmgr.c
25 *
26 * The Iris buffer manager.
27 *
28 * XXX: write better comments
29 * - BOs
30 * - Explain BO cache
31 * - main interface to GEM in the kernel
32 */
33
34#include <xf86drm.h>
35#include <util/u_atomic.h>
36#include <fcntl.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <unistd.h>
41#include <assert.h>
42#include <sys/ioctl.h>
43#include <sys/mman.h>
44#include <sys/stat.h>
45#include <sys/types.h>
46#include <stdbool.h>
47#include <time.h>
48#include <unistd.h>
49
50#include "errno.h"
51#include "common/intel_aux_map.h"
52#include "common/intel_clflush.h"
53#include "dev/intel_debug.h"
54#include "common/intel_gem.h"
55#include "dev/intel_device_info.h"
56#include "isl/isl.h"
57#include "main/macros.h"
58#include "os/os_mman.h"
59#include "util/debug.h"
60#include "util/macros.h"
61#include "util/hash_table.h"
62#include "util/list.h"
63#include "util/os_file.h"
64#include "util/u_dynarray.h"
65#include "util/vma.h"
66#include "iris_bufmgr.h"
67#include "iris_context.h"
68#include "string.h"
69
70#include "drm-uapi/i915_drm.h"
71
72#ifdef HAVE_VALGRIND
73#include <valgrind.h>
74#include <memcheck.h>
75#define VG(x) x
76#else
77#define VG(x)
78#endif
79
80/* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
81 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
82 * leaked. All because it does not call VG(cli_free) from its
83 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
84 * and allocation, we mark it available for use upon mmapping and remove
85 * it upon unmapping.
86 */
87#define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
88#define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
89
90/* On FreeBSD PAGE_SIZE is already defined in
91 * /usr/include/machine/param.h that is indirectly
92 * included here.
93 */
94#ifndef PAGE_SIZE
95#define PAGE_SIZE 4096
96#endif
97
98#define WARN_ONCE(cond, fmt...) do {                            \
99   if (unlikely(cond)) {                                        \
100      static bool _warned = false;                              \
101      if (!_warned) {                                           \
102         fprintf(stderr, "WARNING: ");                          \
103         fprintf(stderr, fmt);                                  \
104         _warned = true;                                        \
105      }                                                         \
106   }                                                            \
107} while (0)
108
109#define FILE_DEBUG_FLAG DEBUG_BUFMGR
110
111/**
112 * For debugging purposes, this returns a time in seconds.
113 */
114static double
115get_time(void)
116{
117   struct timespec tp;
118
119   clock_gettime(CLOCK_MONOTONIC, &tp);
120
121   return tp.tv_sec + tp.tv_nsec / 1000000000.0;
122}
123
124static inline int
125atomic_add_unless(int *v, int add, int unless)
126{
127   int c, old;
128   c = p_atomic_read(v);
129   while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
130      c = old;
131   return c == unless;
132}
133
134static const char *
135memzone_name(enum iris_memory_zone memzone)
136{
137   const char *names[] = {
138      [IRIS_MEMZONE_SHADER]   = "shader",
139      [IRIS_MEMZONE_BINDER]   = "binder",
140      [IRIS_MEMZONE_BINDLESS] = "scratchsurf",
141      [IRIS_MEMZONE_SURFACE]  = "surface",
142      [IRIS_MEMZONE_DYNAMIC]  = "dynamic",
143      [IRIS_MEMZONE_OTHER]    = "other",
144      [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor",
145   };
146   assert(memzone < ARRAY_SIZE(names));
147   return names[memzone];
148}
149
150struct bo_cache_bucket {
151   /** List of cached BOs. */
152   struct list_head head;
153
154   /** Size of this bucket, in bytes. */
155   uint64_t size;
156};
157
158struct bo_export {
159   /** File descriptor associated with a handle export. */
160   int drm_fd;
161
162   /** GEM handle in drm_fd */
163   uint32_t gem_handle;
164
165   struct list_head link;
166};
167
168struct iris_memregion {
169   struct drm_i915_gem_memory_class_instance region;
170   uint64_t size;
171};
172
173#define NUM_SLAB_ALLOCATORS 3
174
175enum iris_heap {
176   IRIS_HEAP_SYSTEM_MEMORY,
177   IRIS_HEAP_DEVICE_LOCAL,
178   IRIS_HEAP_MAX,
179};
180
181struct iris_slab {
182   struct pb_slab base;
183
184   unsigned entry_size;
185
186   /** The BO representing the entire slab */
187   struct iris_bo *bo;
188
189   /** Array of iris_bo structs representing BOs allocated out of this slab */
190   struct iris_bo *entries;
191};
192
193struct iris_bufmgr {
194   /**
195    * List into the list of bufmgr.
196    */
197   struct list_head link;
198
199   uint32_t refcount;
200
201   int fd;
202
203   simple_mtx_t lock;
204   simple_mtx_t bo_deps_lock;
205
206   /** Array of lists of cached gem objects of power-of-two sizes */
207   struct bo_cache_bucket cache_bucket[14 * 4];
208   int num_buckets;
209
210   /** Same as cache_bucket, but for local memory gem objects */
211   struct bo_cache_bucket local_cache_bucket[14 * 4];
212   int num_local_buckets;
213
214   time_t time;
215
216   struct hash_table *name_table;
217   struct hash_table *handle_table;
218
219   /**
220    * List of BOs which we've effectively freed, but are hanging on to
221    * until they're idle before closing and returning the VMA.
222    */
223   struct list_head zombie_list;
224
225   struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
226
227   uint64_t vma_min_align;
228   struct iris_memregion vram, sys;
229
230   int next_screen_id;
231
232   bool has_llc:1;
233   bool has_local_mem:1;
234   bool has_mmap_offset:1;
235   bool has_tiling_uapi:1;
236   bool has_userptr_probe:1;
237   bool bo_reuse:1;
238
239   struct intel_aux_map_context *aux_map_ctx;
240
241   struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
242};
243
244static simple_mtx_t global_bufmgr_list_mutex = _SIMPLE_MTX_INITIALIZER_NP;
245static struct list_head global_bufmgr_list = {
246   .next = &global_bufmgr_list,
247   .prev = &global_bufmgr_list,
248};
249
250static void bo_free(struct iris_bo *bo);
251
252static struct iris_bo *
253find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
254{
255   struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
256   struct iris_bo *bo = entry ? entry->data : NULL;
257
258   if (bo) {
259      assert(iris_bo_is_external(bo));
260      assert(iris_bo_is_real(bo));
261      assert(!bo->real.reusable);
262
263      /* Being non-reusable, the BO cannot be in the cache lists, but it
264       * may be in the zombie list if it had reached zero references, but
265       * we hadn't yet closed it...and then reimported the same BO.  If it
266       * is, then remove it since it's now been resurrected.
267       */
268      if (list_is_linked(&bo->head))
269         list_del(&bo->head);
270
271      iris_bo_reference(bo);
272   }
273
274   return bo;
275}
276
277/**
278 * This function finds the correct bucket fit for the input size.
279 * The function works with O(1) complexity when the requested size
280 * was queried instead of iterating the size through all the buckets.
281 */
282static struct bo_cache_bucket *
283bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size, bool local)
284{
285   /* Calculating the pages and rounding up to the page size. */
286   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
287
288   /* Row  Bucket sizes    clz((x-1) | 3)   Row    Column
289    *        in pages                      stride   size
290    *   0:   1  2  3  4 -> 30 30 30 30        4       1
291    *   1:   5  6  7  8 -> 29 29 29 29        4       1
292    *   2:  10 12 14 16 -> 28 28 28 28        8       2
293    *   3:  20 24 28 32 -> 27 27 27 27       16       4
294    */
295   const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
296   const unsigned row_max_pages = 4 << row;
297
298   /* The '& ~2' is the special case for row 1. In row 1, max pages /
299    * 2 is 2, but the previous row maximum is zero (because there is
300    * no previous row). All row maximum sizes are power of 2, so that
301    * is the only case where that bit will be set.
302    */
303   const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
304   int col_size_log2 = row - 1;
305   col_size_log2 += (col_size_log2 < 0);
306
307   const unsigned col = (pages - prev_row_max_pages +
308                        ((1 << col_size_log2) - 1)) >> col_size_log2;
309
310   /* Calculating the index based on the row and column. */
311   const unsigned index = (row * 4) + (col - 1);
312
313   int num_buckets = local ? bufmgr->num_local_buckets : bufmgr->num_buckets;
314   struct bo_cache_bucket *buckets = local ?
315      bufmgr->local_cache_bucket : bufmgr->cache_bucket;
316
317   return (index < num_buckets) ? &buckets[index] : NULL;
318}
319
320enum iris_memory_zone
321iris_memzone_for_address(uint64_t address)
322{
323   STATIC_ASSERT(IRIS_MEMZONE_OTHER_START    > IRIS_MEMZONE_DYNAMIC_START);
324   STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START  > IRIS_MEMZONE_SURFACE_START);
325   STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START  > IRIS_MEMZONE_BINDLESS_START);
326   STATIC_ASSERT(IRIS_MEMZONE_BINDLESS_START > IRIS_MEMZONE_BINDER_START);
327   STATIC_ASSERT(IRIS_MEMZONE_BINDER_START   > IRIS_MEMZONE_SHADER_START);
328   STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
329
330   if (address >= IRIS_MEMZONE_OTHER_START)
331      return IRIS_MEMZONE_OTHER;
332
333   if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
334      return IRIS_MEMZONE_BORDER_COLOR_POOL;
335
336   if (address > IRIS_MEMZONE_DYNAMIC_START)
337      return IRIS_MEMZONE_DYNAMIC;
338
339   if (address >= IRIS_MEMZONE_SURFACE_START)
340      return IRIS_MEMZONE_SURFACE;
341
342   if (address >= IRIS_MEMZONE_BINDLESS_START)
343      return IRIS_MEMZONE_BINDLESS;
344
345   if (address >= IRIS_MEMZONE_BINDER_START)
346      return IRIS_MEMZONE_BINDER;
347
348   return IRIS_MEMZONE_SHADER;
349}
350
351/**
352 * Allocate a section of virtual memory for a buffer, assigning an address.
353 *
354 * This uses either the bucket allocator for the given size, or the large
355 * object allocator (util_vma).
356 */
357static uint64_t
358vma_alloc(struct iris_bufmgr *bufmgr,
359          enum iris_memory_zone memzone,
360          uint64_t size,
361          uint64_t alignment)
362{
363   /* Force minimum alignment based on device requirements */
364   assert((alignment & (alignment - 1)) == 0);
365   alignment = MAX2(alignment, bufmgr->vma_min_align);
366
367   if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
368      return IRIS_BORDER_COLOR_POOL_ADDRESS;
369
370   /* The binder handles its own allocations.  Return non-zero here. */
371   if (memzone == IRIS_MEMZONE_BINDER)
372      return IRIS_MEMZONE_BINDER_START;
373
374   uint64_t addr =
375      util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment);
376
377   assert((addr >> 48ull) == 0);
378   assert((addr % alignment) == 0);
379
380   return intel_canonical_address(addr);
381}
382
383static void
384vma_free(struct iris_bufmgr *bufmgr,
385         uint64_t address,
386         uint64_t size)
387{
388   if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
389      return;
390
391   /* Un-canonicalize the address. */
392   address = intel_48b_address(address);
393
394   if (address == 0ull)
395      return;
396
397   enum iris_memory_zone memzone = iris_memzone_for_address(address);
398
399   /* The binder handles its own allocations. */
400   if (memzone == IRIS_MEMZONE_BINDER)
401      return;
402
403   assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator));
404
405   util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
406}
407
408static bool
409iris_bo_busy_gem(struct iris_bo *bo)
410{
411   assert(iris_bo_is_real(bo));
412
413   struct iris_bufmgr *bufmgr = bo->bufmgr;
414   struct drm_i915_gem_busy busy = { .handle = bo->gem_handle };
415
416   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
417   if (ret == 0) {
418      return busy.busy;
419   }
420   return false;
421}
422
423/* A timeout of 0 just checks for busyness. */
424static int
425iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
426{
427   int ret = 0;
428   struct iris_bufmgr *bufmgr = bo->bufmgr;
429
430   /* If we know it's idle, don't bother with the kernel round trip */
431   if (bo->idle)
432      return 0;
433
434   simple_mtx_lock(&bufmgr->bo_deps_lock);
435
436   uint32_t handles[bo->deps_size * IRIS_BATCH_COUNT * 2];
437   int handle_count = 0;
438
439   for (int d = 0; d < bo->deps_size; d++) {
440      for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
441         struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
442         struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
443         if (r)
444            handles[handle_count++] = r->handle;
445         if (w)
446            handles[handle_count++] = w->handle;
447      }
448   }
449
450   if (handle_count == 0)
451      goto out;
452
453   /* Unlike the gem wait, negative values are not infinite here. */
454   int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
455   if (timeout_abs < 0)
456      timeout_abs = INT64_MAX;
457
458   struct drm_syncobj_wait args = {
459      .handles = (uintptr_t) handles,
460      .timeout_nsec = timeout_abs,
461      .count_handles = handle_count,
462      .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
463   };
464
465   ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
466   if (ret != 0) {
467      ret = -errno;
468      goto out;
469   }
470
471   /* We just waited everything, so clean all the deps. */
472   for (int d = 0; d < bo->deps_size; d++) {
473      for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
474         iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
475         iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
476      }
477   }
478
479out:
480   simple_mtx_unlock(&bufmgr->bo_deps_lock);
481   return ret;
482}
483
484static bool
485iris_bo_busy_syncobj(struct iris_bo *bo)
486{
487   return iris_bo_wait_syncobj(bo, 0) == -ETIME;
488}
489
490bool
491iris_bo_busy(struct iris_bo *bo)
492{
493   bool busy;
494   if (iris_bo_is_external(bo))
495      busy = iris_bo_busy_gem(bo);
496   else
497      busy = iris_bo_busy_syncobj(bo);
498
499   bo->idle = !busy;
500
501   return busy;
502}
503
504int
505iris_bo_madvise(struct iris_bo *bo, int state)
506{
507   /* We can't madvise suballocated BOs. */
508   assert(iris_bo_is_real(bo));
509
510   struct drm_i915_gem_madvise madv = {
511      .handle = bo->gem_handle,
512      .madv = state,
513      .retained = 1,
514   };
515
516   intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
517
518   return madv.retained;
519}
520
521static struct iris_bo *
522bo_calloc(void)
523{
524   struct iris_bo *bo = calloc(1, sizeof(*bo));
525   if (!bo)
526      return NULL;
527
528   list_inithead(&bo->real.exports);
529
530   bo->hash = _mesa_hash_pointer(bo);
531
532   return bo;
533}
534
535static void
536bo_unmap(struct iris_bo *bo)
537{
538   assert(iris_bo_is_real(bo));
539
540   VG_NOACCESS(bo->real.map, bo->size);
541   os_munmap(bo->real.map, bo->size);
542   bo->real.map = NULL;
543}
544
545static struct pb_slabs *
546get_slabs(struct iris_bufmgr *bufmgr, uint64_t size)
547{
548   for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
549      struct pb_slabs *slabs = &bufmgr->bo_slabs[i];
550
551      if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1))
552         return slabs;
553   }
554
555   unreachable("should have found a valid slab for this size");
556}
557
558/* Return the power of two size of a slab entry matching the input size. */
559static unsigned
560get_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size)
561{
562   unsigned entry_size = util_next_power_of_two(size);
563   unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order;
564
565   return MAX2(entry_size, min_entry_size);
566}
567
568/* Return the slab entry alignment. */
569static unsigned
570get_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size)
571{
572   unsigned entry_size = get_slab_pot_entry_size(bufmgr, size);
573
574   if (size <= entry_size * 3 / 4)
575      return entry_size / 4;
576
577   return entry_size;
578}
579
580static bool
581iris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
582{
583   struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
584
585   return !iris_bo_busy(bo);
586}
587
588static void
589iris_slab_free(void *priv, struct pb_slab *pslab)
590{
591   struct iris_bufmgr *bufmgr = priv;
592   struct iris_slab *slab = (void *) pslab;
593   struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx;
594
595   assert(!slab->bo->aux_map_address);
596
597   /* Since we're freeing the whole slab, all buffers allocated out of it
598    * must be reclaimable.  We require buffers to be idle to be reclaimed
599    * (see iris_can_reclaim_slab()), so we know all entries must be idle.
600    * Therefore, we can safely unmap their aux table entries.
601    */
602   for (unsigned i = 0; i < pslab->num_entries; i++) {
603      struct iris_bo *bo = &slab->entries[i];
604      if (aux_map_ctx && bo->aux_map_address) {
605         intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
606         bo->aux_map_address = 0;
607      }
608
609      /* Unref read/write dependency syncobjs and free the array. */
610      for (int d = 0; d < bo->deps_size; d++) {
611         for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
612            iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
613            iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
614         }
615      }
616      free(bo->deps);
617   }
618
619   iris_bo_unreference(slab->bo);
620
621   free(slab->entries);
622   free(slab);
623}
624
625static struct pb_slab *
626iris_slab_alloc(void *priv,
627                unsigned heap,
628                unsigned entry_size,
629                unsigned group_index)
630{
631   struct iris_bufmgr *bufmgr = priv;
632   struct iris_slab *slab = calloc(1, sizeof(struct iris_slab));
633   unsigned flags = heap == IRIS_HEAP_SYSTEM_MEMORY ? BO_ALLOC_SMEM : 0;
634   unsigned slab_size = 0;
635   /* We only support slab allocation for IRIS_MEMZONE_OTHER */
636   enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
637
638   if (!slab)
639      return NULL;
640
641   struct pb_slabs *slabs = bufmgr->bo_slabs;
642
643   /* Determine the slab buffer size. */
644   for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
645      unsigned max_entry_size =
646         1 << (slabs[i].min_order + slabs[i].num_orders - 1);
647
648      if (entry_size <= max_entry_size) {
649         /* The slab size is twice the size of the largest possible entry. */
650         slab_size = max_entry_size * 2;
651
652         if (!util_is_power_of_two_nonzero(entry_size)) {
653            assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
654
655            /* If the entry size is 3/4 of a power of two, we would waste
656             * space and not gain anything if we allocated only twice the
657             * power of two for the backing buffer:
658             *
659             *    2 * 3/4 = 1.5 usable with buffer size 2
660             *
661             * Allocating 5 times the entry size leads us to the next power
662             * of two and results in a much better memory utilization:
663             *
664             *    5 * 3/4 = 3.75 usable with buffer size 4
665             */
666            if (entry_size * 5 > slab_size)
667               slab_size = util_next_power_of_two(entry_size * 5);
668         }
669
670         /* The largest slab should have the same size as the PTE fragment
671          * size to get faster address translation.
672          *
673          * TODO: move this to intel_device_info?
674          */
675         const unsigned pte_size = 2 * 1024 * 1024;
676
677         if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size)
678            slab_size = pte_size;
679
680         break;
681      }
682   }
683   assert(slab_size != 0);
684
685   slab->bo =
686      iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags);
687   if (!slab->bo)
688      goto fail;
689
690   slab_size = slab->bo->size;
691
692   slab->base.num_entries = slab_size / entry_size;
693   slab->base.num_free = slab->base.num_entries;
694   slab->entry_size = entry_size;
695   slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
696   if (!slab->entries)
697      goto fail_bo;
698
699   list_inithead(&slab->base.free);
700
701   for (unsigned i = 0; i < slab->base.num_entries; i++) {
702      struct iris_bo *bo = &slab->entries[i];
703
704      bo->size = entry_size;
705      bo->bufmgr = bufmgr;
706      bo->hash = _mesa_hash_pointer(bo);
707      bo->gem_handle = 0;
708      bo->address = slab->bo->address + i * entry_size;
709      bo->aux_map_address = 0;
710      bo->index = -1;
711      bo->refcount = 0;
712      bo->idle = true;
713
714      bo->slab.entry.slab = &slab->base;
715      bo->slab.entry.group_index = group_index;
716      bo->slab.entry.entry_size = entry_size;
717
718      bo->slab.real = iris_get_backing_bo(slab->bo);
719
720      list_addtail(&bo->slab.entry.head, &slab->base.free);
721   }
722
723   return &slab->base;
724
725fail_bo:
726   iris_bo_unreference(slab->bo);
727fail:
728   free(slab);
729   return NULL;
730}
731
732static struct iris_bo *
733alloc_bo_from_slabs(struct iris_bufmgr *bufmgr,
734                    const char *name,
735                    uint64_t size,
736                    uint32_t alignment,
737                    unsigned flags,
738                    bool local)
739{
740   if (flags & BO_ALLOC_NO_SUBALLOC)
741      return NULL;
742
743   struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
744   unsigned max_slab_entry_size =
745      1 << (last_slab->min_order + last_slab->num_orders - 1);
746
747   if (size > max_slab_entry_size)
748      return NULL;
749
750   struct pb_slab_entry *entry;
751
752   enum iris_heap heap =
753      local ? IRIS_HEAP_DEVICE_LOCAL : IRIS_HEAP_SYSTEM_MEMORY;
754
755   unsigned alloc_size = size;
756
757   /* Always use slabs for sizes less than 4 KB because the kernel aligns
758    * everything to 4 KB.
759    */
760   if (size < alignment && alignment <= 4 * 1024)
761      alloc_size = alignment;
762
763   if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) {
764      /* 3/4 allocations can return too small alignment.
765       * Try again with a power of two allocation size.
766       */
767      unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size);
768
769      if (alignment <= pot_size) {
770         /* This size works but wastes some memory to fulfill the alignment. */
771         alloc_size = pot_size;
772      } else {
773         /* can't fulfill alignment requirements */
774         return NULL;
775      }
776   }
777
778   struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size);
779   entry = pb_slab_alloc(slabs, alloc_size, heap);
780   if (!entry) {
781      /* Clean up and try again... */
782      pb_slabs_reclaim(slabs);
783
784      entry = pb_slab_alloc(slabs, alloc_size, heap);
785   }
786   if (!entry)
787      return NULL;
788
789   struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
790
791   if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
792      /* This buffer was associated with an aux-buffer range.  We only allow
793       * slab allocated buffers to be reclaimed when idle (not in use by an
794       * executing batch).  (See iris_can_reclaim_slab().)  So we know that
795       * our previous aux mapping is no longer in use, and we can safely
796       * remove it.
797       */
798      intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
799                                bo->size);
800      bo->aux_map_address = 0;
801   }
802
803   p_atomic_set(&bo->refcount, 1);
804   bo->name = name;
805   bo->size = size;
806
807   /* Zero the contents if necessary.  If this fails, fall back to
808    * allocating a fresh BO, which will always be zeroed by the kernel.
809    */
810   if (flags & BO_ALLOC_ZEROED) {
811      void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
812      if (map) {
813         memset(map, 0, bo->size);
814      } else {
815         pb_slab_free(slabs, &bo->slab.entry);
816         return NULL;
817      }
818   }
819
820   return bo;
821}
822
823static struct iris_bo *
824alloc_bo_from_cache(struct iris_bufmgr *bufmgr,
825                    struct bo_cache_bucket *bucket,
826                    uint32_t alignment,
827                    enum iris_memory_zone memzone,
828                    enum iris_mmap_mode mmap_mode,
829                    unsigned flags,
830                    bool match_zone)
831{
832   if (!bucket)
833      return NULL;
834
835   struct iris_bo *bo = NULL;
836
837   list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) {
838      assert(iris_bo_is_real(cur));
839
840      /* Find one that's got the right mapping type.  We used to swap maps
841       * around but the kernel doesn't allow this on discrete GPUs.
842       */
843      if (mmap_mode != cur->real.mmap_mode)
844         continue;
845
846      /* Try a little harder to find one that's already in the right memzone */
847      if (match_zone && memzone != iris_memzone_for_address(cur->address))
848         continue;
849
850      /* If the last BO in the cache is busy, there are no idle BOs.  Bail,
851       * either falling back to a non-matching memzone, or if that fails,
852       * allocating a fresh buffer.
853       */
854      if (iris_bo_busy(cur))
855         return NULL;
856
857      list_del(&cur->head);
858
859      /* Tell the kernel we need this BO.  If it still exists, we're done! */
860      if (iris_bo_madvise(cur, I915_MADV_WILLNEED)) {
861         bo = cur;
862         break;
863      }
864
865      /* This BO was purged, throw it out and keep looking. */
866      bo_free(cur);
867   }
868
869   if (!bo)
870      return NULL;
871
872   if (bo->aux_map_address) {
873      /* This buffer was associated with an aux-buffer range. We make sure
874       * that buffers are not reused from the cache while the buffer is (busy)
875       * being used by an executing batch. Since we are here, the buffer is no
876       * longer being used by a batch and the buffer was deleted (in order to
877       * end up in the cache). Therefore its old aux-buffer range can be
878       * removed from the aux-map.
879       */
880      if (bo->bufmgr->aux_map_ctx)
881         intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
882                                   bo->size);
883      bo->aux_map_address = 0;
884   }
885
886   /* If the cached BO isn't in the right memory zone, or the alignment
887    * isn't sufficient, free the old memory and assign it a new address.
888    */
889   if (memzone != iris_memzone_for_address(bo->address) ||
890       bo->address % alignment != 0) {
891      vma_free(bufmgr, bo->address, bo->size);
892      bo->address = 0ull;
893   }
894
895   /* Zero the contents if necessary.  If this fails, fall back to
896    * allocating a fresh BO, which will always be zeroed by the kernel.
897    */
898   if (flags & BO_ALLOC_ZEROED) {
899      void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
900      if (map) {
901         memset(map, 0, bo->size);
902      } else {
903         bo_free(bo);
904         return NULL;
905      }
906   }
907
908   return bo;
909}
910
911static struct iris_bo *
912alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, bool local)
913{
914   struct iris_bo *bo = bo_calloc();
915   if (!bo)
916      return NULL;
917
918   /* If we have vram size, we have multiple memory regions and should choose
919    * one of them.
920    */
921   if (bufmgr->vram.size > 0) {
922      /* All new BOs we get from the kernel are zeroed, so we don't need to
923       * worry about that here.
924       */
925      struct drm_i915_gem_memory_class_instance regions[2];
926      uint32_t nregions = 0;
927      if (local) {
928         /* For vram allocations, still use system memory as a fallback. */
929         regions[nregions++] = bufmgr->vram.region;
930         regions[nregions++] = bufmgr->sys.region;
931      } else {
932         regions[nregions++] = bufmgr->sys.region;
933      }
934
935      struct drm_i915_gem_create_ext_memory_regions ext_regions = {
936         .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
937         .num_regions = nregions,
938         .regions = (uintptr_t)regions,
939      };
940
941      struct drm_i915_gem_create_ext create = {
942         .size = bo_size,
943         .extensions = (uintptr_t)&ext_regions,
944      };
945
946      /* It should be safe to use GEM_CREATE_EXT without checking, since we are
947       * in the side of the branch where discrete memory is available. So we
948       * can assume GEM_CREATE_EXT is supported already.
949       */
950      if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create) != 0) {
951         free(bo);
952         return NULL;
953      }
954      bo->gem_handle = create.handle;
955   } else {
956      struct drm_i915_gem_create create = { .size = bo_size };
957
958      /* All new BOs we get from the kernel are zeroed, so we don't need to
959       * worry about that here.
960       */
961      if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create) != 0) {
962         free(bo);
963         return NULL;
964      }
965      bo->gem_handle = create.handle;
966   }
967
968   bo->bufmgr = bufmgr;
969   bo->size = bo_size;
970   bo->idle = true;
971   bo->real.local = local;
972
973   if (bufmgr->vram.size == 0) {
974      /* Calling set_domain() will allocate pages for the BO outside of the
975       * struct mutex lock in the kernel, which is more efficient than waiting
976       * to create them during the first execbuf that uses the BO.
977       */
978      struct drm_i915_gem_set_domain sd = {
979         .handle = bo->gem_handle,
980         .read_domains = I915_GEM_DOMAIN_CPU,
981         .write_domain = 0,
982      };
983
984      intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd);
985   }
986
987   return bo;
988}
989
990struct iris_bo *
991iris_bo_alloc(struct iris_bufmgr *bufmgr,
992              const char *name,
993              uint64_t size,
994              uint32_t alignment,
995              enum iris_memory_zone memzone,
996              unsigned flags)
997{
998   struct iris_bo *bo;
999   unsigned int page_size = getpagesize();
1000   bool local = bufmgr->vram.size > 0 &&
1001      !(flags & BO_ALLOC_COHERENT || flags & BO_ALLOC_SMEM);
1002   struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size, local);
1003
1004   if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_COHERENT))
1005      flags |= BO_ALLOC_NO_SUBALLOC;
1006
1007   bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags, local);
1008
1009   if (bo)
1010      return bo;
1011
1012   /* Round the size up to the bucket size, or if we don't have caching
1013    * at this size, a multiple of the page size.
1014    */
1015   uint64_t bo_size =
1016      bucket ? bucket->size : MAX2(ALIGN(size, page_size), page_size);
1017
1018   bool is_coherent = bufmgr->has_llc ||
1019                      (bufmgr->vram.size > 0 && !local) ||
1020                      (flags & BO_ALLOC_COHERENT);
1021   bool is_scanout = (flags & BO_ALLOC_SCANOUT) != 0;
1022   enum iris_mmap_mode mmap_mode =
1023      !local && is_coherent && !is_scanout ? IRIS_MMAP_WB : IRIS_MMAP_WC;
1024
1025   simple_mtx_lock(&bufmgr->lock);
1026
1027   /* Get a buffer out of the cache if available.  First, we try to find
1028    * one with a matching memory zone so we can avoid reallocating VMA.
1029    */
1030   bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1031                            flags, true);
1032
1033   /* If that fails, we try for any cached BO, without matching memzone. */
1034   if (!bo) {
1035      bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1036                               flags, false);
1037   }
1038
1039   simple_mtx_unlock(&bufmgr->lock);
1040
1041   if (!bo) {
1042      bo = alloc_fresh_bo(bufmgr, bo_size, local);
1043      if (!bo)
1044         return NULL;
1045   }
1046
1047   if (bo->address == 0ull) {
1048      simple_mtx_lock(&bufmgr->lock);
1049      bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment);
1050      simple_mtx_unlock(&bufmgr->lock);
1051
1052      if (bo->address == 0ull)
1053         goto err_free;
1054   }
1055
1056   bo->name = name;
1057   p_atomic_set(&bo->refcount, 1);
1058   bo->real.reusable = bucket && bufmgr->bo_reuse;
1059   bo->index = -1;
1060   bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
1061
1062   /* By default, capture all driver-internal buffers like shader kernels,
1063    * surface states, dynamic states, border colors, and so on.
1064    */
1065   if (memzone < IRIS_MEMZONE_OTHER)
1066      bo->real.kflags |= EXEC_OBJECT_CAPTURE;
1067
1068   assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode);
1069   bo->real.mmap_mode = mmap_mode;
1070
1071   /* On integrated GPUs, enable snooping to ensure coherency if needed.
1072    * For discrete, we instead use SMEM and avoid WB maps for coherency.
1073    */
1074   if ((flags & BO_ALLOC_COHERENT) &&
1075       !bufmgr->has_llc && bufmgr->vram.size == 0) {
1076      struct drm_i915_gem_caching arg = {
1077         .handle = bo->gem_handle,
1078         .caching = 1,
1079      };
1080      if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) != 0)
1081         goto err_free;
1082
1083      bo->real.reusable = false;
1084   }
1085
1086   DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle,
1087       bo->name, memzone_name(memzone), bo->real.local ? "local" : "system",
1088       (unsigned long long) size);
1089
1090   return bo;
1091
1092err_free:
1093   bo_free(bo);
1094   return NULL;
1095}
1096
1097struct iris_bo *
1098iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
1099                       void *ptr, size_t size,
1100                       enum iris_memory_zone memzone)
1101{
1102   struct drm_gem_close close = { 0, };
1103   struct iris_bo *bo;
1104
1105   bo = bo_calloc();
1106   if (!bo)
1107      return NULL;
1108
1109   struct drm_i915_gem_userptr arg = {
1110      .user_ptr = (uintptr_t)ptr,
1111      .user_size = size,
1112      .flags = bufmgr->has_userptr_probe ? I915_USERPTR_PROBE : 0,
1113   };
1114   if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg))
1115      goto err_free;
1116   bo->gem_handle = arg.handle;
1117
1118   if (!bufmgr->has_userptr_probe) {
1119      /* Check the buffer for validity before we try and use it in a batch */
1120      struct drm_i915_gem_set_domain sd = {
1121         .handle = bo->gem_handle,
1122         .read_domains = I915_GEM_DOMAIN_CPU,
1123      };
1124      if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd))
1125         goto err_close;
1126   }
1127
1128   bo->name = name;
1129   bo->size = size;
1130   bo->real.map = ptr;
1131
1132   bo->bufmgr = bufmgr;
1133   bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
1134
1135   simple_mtx_lock(&bufmgr->lock);
1136   bo->address = vma_alloc(bufmgr, memzone, size, 1);
1137   simple_mtx_unlock(&bufmgr->lock);
1138
1139   if (bo->address == 0ull)
1140      goto err_close;
1141
1142   p_atomic_set(&bo->refcount, 1);
1143   bo->real.userptr = true;
1144   bo->index = -1;
1145   bo->idle = true;
1146   bo->real.mmap_mode = IRIS_MMAP_WB;
1147
1148   return bo;
1149
1150err_close:
1151   close.handle = bo->gem_handle;
1152   intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
1153err_free:
1154   free(bo);
1155   return NULL;
1156}
1157
1158/**
1159 * Returns a iris_bo wrapping the given buffer object handle.
1160 *
1161 * This can be used when one application needs to pass a buffer object
1162 * to another.
1163 */
1164struct iris_bo *
1165iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
1166                             const char *name, unsigned int handle)
1167{
1168   struct iris_bo *bo;
1169
1170   /* At the moment most applications only have a few named bo.
1171    * For instance, in a DRI client only the render buffers passed
1172    * between X and the client are named. And since X returns the
1173    * alternating names for the front/back buffer a linear search
1174    * provides a sufficiently fast match.
1175    */
1176   simple_mtx_lock(&bufmgr->lock);
1177   bo = find_and_ref_external_bo(bufmgr->name_table, handle);
1178   if (bo)
1179      goto out;
1180
1181   struct drm_gem_open open_arg = { .name = handle };
1182   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
1183   if (ret != 0) {
1184      DBG("Couldn't reference %s handle 0x%08x: %s\n",
1185          name, handle, strerror(errno));
1186      bo = NULL;
1187      goto out;
1188   }
1189   /* Now see if someone has used a prime handle to get this
1190    * object from the kernel before by looking through the list
1191    * again for a matching gem_handle
1192    */
1193   bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
1194   if (bo)
1195      goto out;
1196
1197   bo = bo_calloc();
1198   if (!bo)
1199      goto out;
1200
1201   p_atomic_set(&bo->refcount, 1);
1202
1203   bo->size = open_arg.size;
1204   bo->bufmgr = bufmgr;
1205   bo->gem_handle = open_arg.handle;
1206   bo->name = name;
1207   bo->real.global_name = handle;
1208   bo->real.reusable = false;
1209   bo->real.imported = true;
1210   bo->real.mmap_mode = IRIS_MMAP_NONE;
1211   bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
1212   bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1213
1214   _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1215   _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1216
1217   DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
1218
1219out:
1220   simple_mtx_unlock(&bufmgr->lock);
1221   return bo;
1222}
1223
1224static void
1225bo_close(struct iris_bo *bo)
1226{
1227   struct iris_bufmgr *bufmgr = bo->bufmgr;
1228
1229   assert(iris_bo_is_real(bo));
1230
1231   if (iris_bo_is_external(bo)) {
1232      struct hash_entry *entry;
1233
1234      if (bo->real.global_name) {
1235         entry = _mesa_hash_table_search(bufmgr->name_table,
1236                                         &bo->real.global_name);
1237         _mesa_hash_table_remove(bufmgr->name_table, entry);
1238      }
1239
1240      entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
1241      _mesa_hash_table_remove(bufmgr->handle_table, entry);
1242
1243      list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) {
1244         struct drm_gem_close close = { .handle = export->gem_handle };
1245         intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close);
1246
1247         list_del(&export->link);
1248         free(export);
1249      }
1250   } else {
1251      assert(list_is_empty(&bo->real.exports));
1252   }
1253
1254   /* Close this object */
1255   struct drm_gem_close close = { .handle = bo->gem_handle };
1256   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
1257   if (ret != 0) {
1258      DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1259          bo->gem_handle, bo->name, strerror(errno));
1260   }
1261
1262   if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
1263      intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
1264                                bo->size);
1265   }
1266
1267   /* Return the VMA for reuse */
1268   vma_free(bo->bufmgr, bo->address, bo->size);
1269
1270   for (int d = 0; d < bo->deps_size; d++) {
1271      for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
1272         iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
1273         iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
1274      }
1275   }
1276   free(bo->deps);
1277
1278   free(bo);
1279}
1280
1281static void
1282bo_free(struct iris_bo *bo)
1283{
1284   struct iris_bufmgr *bufmgr = bo->bufmgr;
1285
1286   assert(iris_bo_is_real(bo));
1287
1288   if (!bo->real.userptr && bo->real.map)
1289      bo_unmap(bo);
1290
1291   if (bo->idle) {
1292      bo_close(bo);
1293   } else {
1294      /* Defer closing the GEM BO and returning the VMA for reuse until the
1295       * BO is idle.  Just move it to the dead list for now.
1296       */
1297      list_addtail(&bo->head, &bufmgr->zombie_list);
1298   }
1299}
1300
1301/** Frees all cached buffers significantly older than @time. */
1302static void
1303cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
1304{
1305   int i;
1306
1307   if (bufmgr->time == time)
1308      return;
1309
1310   for (i = 0; i < bufmgr->num_buckets; i++) {
1311      struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
1312
1313      list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1314         if (time - bo->real.free_time <= 1)
1315            break;
1316
1317         list_del(&bo->head);
1318
1319         bo_free(bo);
1320      }
1321   }
1322
1323   for (i = 0; i < bufmgr->num_local_buckets; i++) {
1324      struct bo_cache_bucket *bucket = &bufmgr->local_cache_bucket[i];
1325
1326      list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1327         if (time - bo->real.free_time <= 1)
1328            break;
1329
1330         list_del(&bo->head);
1331
1332         bo_free(bo);
1333      }
1334   }
1335
1336   list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1337      /* Stop once we reach a busy BO - all others past this point were
1338       * freed more recently so are likely also busy.
1339       */
1340      if (!bo->idle && iris_bo_busy(bo))
1341         break;
1342
1343      list_del(&bo->head);
1344      bo_close(bo);
1345   }
1346
1347   bufmgr->time = time;
1348}
1349
1350static void
1351bo_unreference_final(struct iris_bo *bo, time_t time)
1352{
1353   struct iris_bufmgr *bufmgr = bo->bufmgr;
1354   struct bo_cache_bucket *bucket;
1355
1356   DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
1357
1358   assert(iris_bo_is_real(bo));
1359
1360   bucket = NULL;
1361   if (bo->real.reusable)
1362      bucket = bucket_for_size(bufmgr, bo->size, bo->real.local);
1363   /* Put the buffer into our internal cache for reuse if we can. */
1364   if (bucket && iris_bo_madvise(bo, I915_MADV_DONTNEED)) {
1365      bo->real.free_time = time;
1366      bo->name = NULL;
1367
1368      list_addtail(&bo->head, &bucket->head);
1369   } else {
1370      bo_free(bo);
1371   }
1372}
1373
1374void
1375iris_bo_unreference(struct iris_bo *bo)
1376{
1377   if (bo == NULL)
1378      return;
1379
1380   assert(p_atomic_read(&bo->refcount) > 0);
1381
1382   if (atomic_add_unless(&bo->refcount, -1, 1)) {
1383      struct iris_bufmgr *bufmgr = bo->bufmgr;
1384      struct timespec time;
1385
1386      clock_gettime(CLOCK_MONOTONIC, &time);
1387
1388      if (bo->gem_handle == 0) {
1389         pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry);
1390      } else {
1391         simple_mtx_lock(&bufmgr->lock);
1392
1393         if (p_atomic_dec_zero(&bo->refcount)) {
1394            bo_unreference_final(bo, time.tv_sec);
1395            cleanup_bo_cache(bufmgr, time.tv_sec);
1396         }
1397
1398         simple_mtx_unlock(&bufmgr->lock);
1399      }
1400   }
1401}
1402
1403static void
1404bo_wait_with_stall_warning(struct pipe_debug_callback *dbg,
1405                           struct iris_bo *bo,
1406                           const char *action)
1407{
1408   bool busy = dbg && !bo->idle;
1409   double elapsed = unlikely(busy) ? -get_time() : 0.0;
1410
1411   iris_bo_wait_rendering(bo);
1412
1413   if (unlikely(busy)) {
1414      elapsed += get_time();
1415      if (elapsed > 1e-5) /* 0.01ms */ {
1416         perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
1417                    action, bo->name, elapsed * 1000);
1418      }
1419   }
1420}
1421
1422static void
1423print_flags(unsigned flags)
1424{
1425   if (flags & MAP_READ)
1426      DBG("READ ");
1427   if (flags & MAP_WRITE)
1428      DBG("WRITE ");
1429   if (flags & MAP_ASYNC)
1430      DBG("ASYNC ");
1431   if (flags & MAP_PERSISTENT)
1432      DBG("PERSISTENT ");
1433   if (flags & MAP_COHERENT)
1434      DBG("COHERENT ");
1435   if (flags & MAP_RAW)
1436      DBG("RAW ");
1437   DBG("\n");
1438}
1439
1440static void *
1441iris_bo_gem_mmap_legacy(struct pipe_debug_callback *dbg, struct iris_bo *bo)
1442{
1443   struct iris_bufmgr *bufmgr = bo->bufmgr;
1444
1445   assert(bufmgr->vram.size == 0);
1446   assert(iris_bo_is_real(bo));
1447   assert(bo->real.mmap_mode == IRIS_MMAP_WB ||
1448          bo->real.mmap_mode == IRIS_MMAP_WC);
1449
1450   struct drm_i915_gem_mmap mmap_arg = {
1451      .handle = bo->gem_handle,
1452      .size = bo->size,
1453      .flags = bo->real.mmap_mode == IRIS_MMAP_WC ? I915_MMAP_WC : 0,
1454   };
1455
1456   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
1457   if (ret != 0) {
1458      DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1459          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1460      return NULL;
1461   }
1462   void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
1463
1464   return map;
1465}
1466
1467static void *
1468iris_bo_gem_mmap_offset(struct pipe_debug_callback *dbg, struct iris_bo *bo)
1469{
1470   struct iris_bufmgr *bufmgr = bo->bufmgr;
1471
1472   assert(iris_bo_is_real(bo));
1473
1474   struct drm_i915_gem_mmap_offset mmap_arg = {
1475      .handle = bo->gem_handle,
1476   };
1477
1478   if (bufmgr->has_local_mem) {
1479      /* On discrete memory platforms, we cannot control the mmap caching mode
1480       * at mmap time.  Instead, it's fixed when the object is created (this
1481       * is a limitation of TTM).
1482       *
1483       * On DG1, our only currently enabled discrete platform, there is no
1484       * control over what mode we get.  For SMEM, we always get WB because
1485       * it's fast (probably what we want) and when the device views SMEM
1486       * across PCIe, it's always snooped.  The only caching mode allowed by
1487       * DG1 hardware for LMEM is WC.
1488       */
1489      if (bo->real.local)
1490         assert(bo->real.mmap_mode == IRIS_MMAP_WC);
1491      else
1492         assert(bo->real.mmap_mode == IRIS_MMAP_WB);
1493
1494      mmap_arg.flags = I915_MMAP_OFFSET_FIXED;
1495   } else {
1496      /* Only integrated platforms get to select a mmap caching mode here */
1497      static const uint32_t mmap_offset_for_mode[] = {
1498         [IRIS_MMAP_UC]    = I915_MMAP_OFFSET_UC,
1499         [IRIS_MMAP_WC]    = I915_MMAP_OFFSET_WC,
1500         [IRIS_MMAP_WB]    = I915_MMAP_OFFSET_WB,
1501      };
1502      assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1503      assert(bo->real.mmap_mode < ARRAY_SIZE(mmap_offset_for_mode));
1504      mmap_arg.flags = mmap_offset_for_mode[bo->real.mmap_mode];
1505   }
1506
1507   /* Get the fake offset back */
1508   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg);
1509   if (ret != 0) {
1510      DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
1511          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1512      return NULL;
1513   }
1514
1515   /* And map it */
1516   void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
1517                    bufmgr->fd, mmap_arg.offset);
1518   if (map == MAP_FAILED) {
1519      DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1520          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1521      return NULL;
1522   }
1523
1524   return map;
1525}
1526
1527void *
1528iris_bo_map(struct pipe_debug_callback *dbg,
1529            struct iris_bo *bo, unsigned flags)
1530{
1531   struct iris_bufmgr *bufmgr = bo->bufmgr;
1532   void *map = NULL;
1533
1534   if (bo->gem_handle == 0) {
1535      struct iris_bo *real = iris_get_backing_bo(bo);
1536      uint64_t offset = bo->address - real->address;
1537      map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset;
1538   } else {
1539      assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1540      if (bo->real.mmap_mode == IRIS_MMAP_NONE)
1541         return NULL;
1542
1543      if (!bo->real.map) {
1544         DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name);
1545         map = bufmgr->has_mmap_offset ? iris_bo_gem_mmap_offset(dbg, bo)
1546                                       : iris_bo_gem_mmap_legacy(dbg, bo);
1547         if (!map) {
1548            return NULL;
1549         }
1550
1551         VG_DEFINED(map, bo->size);
1552
1553         if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) {
1554            VG_NOACCESS(map, bo->size);
1555            os_munmap(map, bo->size);
1556         }
1557      }
1558      assert(bo->real.map);
1559      map = bo->real.map;
1560   }
1561
1562   DBG("iris_bo_map: %d (%s) -> %p\n",
1563       bo->gem_handle, bo->name, bo->real.map);
1564   print_flags(flags);
1565
1566   if (!(flags & MAP_ASYNC)) {
1567      bo_wait_with_stall_warning(dbg, bo, "memory mapping");
1568   }
1569
1570   return map;
1571}
1572
1573/** Waits for all GPU rendering with the object to have completed. */
1574void
1575iris_bo_wait_rendering(struct iris_bo *bo)
1576{
1577   /* We require a kernel recent enough for WAIT_IOCTL support.
1578    * See intel_init_bufmgr()
1579    */
1580   iris_bo_wait(bo, -1);
1581}
1582
1583static int
1584iris_bo_wait_gem(struct iris_bo *bo, int64_t timeout_ns)
1585{
1586   assert(iris_bo_is_real(bo));
1587
1588   struct iris_bufmgr *bufmgr = bo->bufmgr;
1589   struct drm_i915_gem_wait wait = {
1590      .bo_handle = bo->gem_handle,
1591      .timeout_ns = timeout_ns,
1592   };
1593
1594   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1595   if (ret != 0)
1596      return -errno;
1597
1598   return 0;
1599}
1600
1601/**
1602 * Waits on a BO for the given amount of time.
1603 *
1604 * @bo: buffer object to wait for
1605 * @timeout_ns: amount of time to wait in nanoseconds.
1606 *   If value is less than 0, an infinite wait will occur.
1607 *
1608 * Returns 0 if the wait was successful ie. the last batch referencing the
1609 * object has completed within the allotted time. Otherwise some negative return
1610 * value describes the error. Of particular interest is -ETIME when the wait has
1611 * failed to yield the desired result.
1612 *
1613 * Similar to iris_bo_wait_rendering except a timeout parameter allows
1614 * the operation to give up after a certain amount of time. Another subtle
1615 * difference is the internal locking semantics are different (this variant does
1616 * not hold the lock for the duration of the wait). This makes the wait subject
1617 * to a larger userspace race window.
1618 *
1619 * The implementation shall wait until the object is no longer actively
1620 * referenced within a batch buffer at the time of the call. The wait will
1621 * not guarantee that the buffer is re-issued via another thread, or an flinked
1622 * handle. Userspace must make sure this race does not occur if such precision
1623 * is important.
1624 *
1625 * Note that some kernels have broken the infinite wait for negative values
1626 * promise, upgrade to latest stable kernels if this is the case.
1627 */
1628int
1629iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1630{
1631   int ret;
1632
1633   if (iris_bo_is_external(bo))
1634      ret = iris_bo_wait_gem(bo, timeout_ns);
1635   else
1636      ret = iris_bo_wait_syncobj(bo, timeout_ns);
1637
1638   if (ret != 0)
1639      return -errno;
1640
1641   bo->idle = true;
1642
1643   return ret;
1644}
1645
1646static void
1647iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1648{
1649   /* Free aux-map buffers */
1650   intel_aux_map_finish(bufmgr->aux_map_ctx);
1651
1652   /* bufmgr will no longer try to free VMA entries in the aux-map */
1653   bufmgr->aux_map_ctx = NULL;
1654
1655   for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1656      if (bufmgr->bo_slabs[i].groups)
1657         pb_slabs_deinit(&bufmgr->bo_slabs[i]);
1658   }
1659
1660   simple_mtx_destroy(&bufmgr->lock);
1661   simple_mtx_destroy(&bufmgr->bo_deps_lock);
1662
1663   /* Free any cached buffer objects we were going to reuse */
1664   for (int i = 0; i < bufmgr->num_buckets; i++) {
1665      struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
1666
1667      list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1668         list_del(&bo->head);
1669
1670         bo_free(bo);
1671      }
1672   }
1673
1674   for (int i = 0; i < bufmgr->num_local_buckets; i++) {
1675      struct bo_cache_bucket *bucket = &bufmgr->local_cache_bucket[i];
1676
1677      list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1678         list_del(&bo->head);
1679
1680         bo_free(bo);
1681      }
1682   }
1683
1684   /* Close any buffer objects on the dead list. */
1685   list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1686      list_del(&bo->head);
1687      bo_close(bo);
1688   }
1689
1690   _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1691   _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1692
1693   for (int z = 0; z < IRIS_MEMZONE_COUNT; z++) {
1694      if (z != IRIS_MEMZONE_BINDER)
1695         util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1696   }
1697
1698   close(bufmgr->fd);
1699
1700   free(bufmgr);
1701}
1702
1703int
1704iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling)
1705{
1706   struct iris_bufmgr *bufmgr = bo->bufmgr;
1707
1708   if (!bufmgr->has_tiling_uapi) {
1709      *tiling = I915_TILING_NONE;
1710      return 0;
1711   }
1712
1713   struct drm_i915_gem_get_tiling ti = { .handle = bo->gem_handle };
1714   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &ti);
1715
1716   if (ret) {
1717      DBG("gem_get_tiling failed for BO %u: %s\n",
1718          bo->gem_handle, strerror(errno));
1719   }
1720
1721   *tiling = ti.tiling_mode;
1722
1723   return ret;
1724}
1725
1726int
1727iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf)
1728{
1729   struct iris_bufmgr *bufmgr = bo->bufmgr;
1730   uint32_t tiling_mode = isl_tiling_to_i915_tiling(surf->tiling);
1731   int ret;
1732
1733   /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's
1734    * actually not supported by the kernel in those cases.
1735    */
1736   if (!bufmgr->has_tiling_uapi)
1737      return 0;
1738
1739   /* GEM_SET_TILING is slightly broken and overwrites the input on the
1740    * error path, so we have to open code intel_ioctl().
1741    */
1742   do {
1743      struct drm_i915_gem_set_tiling set_tiling = {
1744         .handle = bo->gem_handle,
1745         .tiling_mode = tiling_mode,
1746         .stride = surf->row_pitch_B,
1747      };
1748      ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
1749   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
1750
1751   if (ret) {
1752      DBG("gem_set_tiling failed for BO %u: %s\n",
1753          bo->gem_handle, strerror(errno));
1754   }
1755
1756   return ret;
1757}
1758
1759struct iris_bo *
1760iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd)
1761{
1762   uint32_t handle;
1763   struct iris_bo *bo;
1764
1765   simple_mtx_lock(&bufmgr->lock);
1766   int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1767   if (ret) {
1768      DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1769          strerror(errno));
1770      simple_mtx_unlock(&bufmgr->lock);
1771      return NULL;
1772   }
1773
1774   /*
1775    * See if the kernel has already returned this buffer to us. Just as
1776    * for named buffers, we must not create two bo's pointing at the same
1777    * kernel object
1778    */
1779   bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1780   if (bo)
1781      goto out;
1782
1783   bo = bo_calloc();
1784   if (!bo)
1785      goto out;
1786
1787   p_atomic_set(&bo->refcount, 1);
1788
1789   /* Determine size of bo.  The fd-to-handle ioctl really should
1790    * return the size, but it doesn't.  If we have kernel 3.12 or
1791    * later, we can lseek on the prime fd to get the size.  Older
1792    * kernels will just fail, in which case we fall back to the
1793    * provided (estimated or guess size). */
1794   ret = lseek(prime_fd, 0, SEEK_END);
1795   if (ret != -1)
1796      bo->size = ret;
1797
1798   bo->bufmgr = bufmgr;
1799   bo->name = "prime";
1800   bo->real.reusable = false;
1801   bo->real.imported = true;
1802   bo->real.mmap_mode = IRIS_MMAP_NONE;
1803   bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
1804
1805   /* From the Bspec, Memory Compression - Gfx12:
1806    *
1807    *    The base address for the surface has to be 64K page aligned and the
1808    *    surface is expected to be padded in the virtual domain to be 4 4K
1809    *    pages.
1810    *
1811    * The dmabuf may contain a compressed surface. Align the BO to 64KB just
1812    * in case. We always align to 64KB even on platforms where we don't need
1813    * to, because it's a fairly reasonable thing to do anyway.
1814    */
1815   bo->address =
1816      vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
1817
1818   bo->gem_handle = handle;
1819   _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1820
1821out:
1822   simple_mtx_unlock(&bufmgr->lock);
1823   return bo;
1824}
1825
1826static void
1827iris_bo_mark_exported_locked(struct iris_bo *bo)
1828{
1829   /* We cannot export suballocated BOs. */
1830   assert(iris_bo_is_real(bo));
1831
1832   if (!iris_bo_is_external(bo))
1833      _mesa_hash_table_insert(bo->bufmgr->handle_table, &bo->gem_handle, bo);
1834
1835   if (!bo->real.exported) {
1836      /* If a BO is going to be used externally, it could be sent to the
1837       * display HW. So make sure our CPU mappings don't assume cache
1838       * coherency since display is outside that cache.
1839       */
1840      bo->real.exported = true;
1841      bo->real.reusable = false;
1842   }
1843}
1844
1845void
1846iris_bo_mark_exported(struct iris_bo *bo)
1847{
1848   struct iris_bufmgr *bufmgr = bo->bufmgr;
1849
1850   /* We cannot export suballocated BOs. */
1851   assert(iris_bo_is_real(bo));
1852
1853   if (bo->real.exported) {
1854      assert(!bo->real.reusable);
1855      return;
1856   }
1857
1858   simple_mtx_lock(&bufmgr->lock);
1859   iris_bo_mark_exported_locked(bo);
1860   simple_mtx_unlock(&bufmgr->lock);
1861}
1862
1863int
1864iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
1865{
1866   struct iris_bufmgr *bufmgr = bo->bufmgr;
1867
1868   /* We cannot export suballocated BOs. */
1869   assert(iris_bo_is_real(bo));
1870
1871   iris_bo_mark_exported(bo);
1872
1873   if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1874                          DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
1875      return -errno;
1876
1877   return 0;
1878}
1879
1880uint32_t
1881iris_bo_export_gem_handle(struct iris_bo *bo)
1882{
1883   /* We cannot export suballocated BOs. */
1884   assert(iris_bo_is_real(bo));
1885
1886   iris_bo_mark_exported(bo);
1887
1888   return bo->gem_handle;
1889}
1890
1891int
1892iris_bo_flink(struct iris_bo *bo, uint32_t *name)
1893{
1894   struct iris_bufmgr *bufmgr = bo->bufmgr;
1895
1896   /* We cannot export suballocated BOs. */
1897   assert(iris_bo_is_real(bo));
1898
1899   if (!bo->real.global_name) {
1900      struct drm_gem_flink flink = { .handle = bo->gem_handle };
1901
1902      if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
1903         return -errno;
1904
1905      simple_mtx_lock(&bufmgr->lock);
1906      if (!bo->real.global_name) {
1907         iris_bo_mark_exported_locked(bo);
1908         bo->real.global_name = flink.name;
1909         _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1910      }
1911      simple_mtx_unlock(&bufmgr->lock);
1912   }
1913
1914   *name = bo->real.global_name;
1915   return 0;
1916}
1917
1918int
1919iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
1920                                     uint32_t *out_handle)
1921{
1922   /* We cannot export suballocated BOs. */
1923   assert(iris_bo_is_real(bo));
1924
1925   /* Only add the new GEM handle to the list of export if it belongs to a
1926    * different GEM device. Otherwise we might close the same buffer multiple
1927    * times.
1928    */
1929   struct iris_bufmgr *bufmgr = bo->bufmgr;
1930   int ret = os_same_file_description(drm_fd, bufmgr->fd);
1931   WARN_ONCE(ret < 0,
1932             "Kernel has no file descriptor comparison support: %s\n",
1933             strerror(errno));
1934   if (ret == 0) {
1935      *out_handle = iris_bo_export_gem_handle(bo);
1936      return 0;
1937   }
1938
1939   struct bo_export *export = calloc(1, sizeof(*export));
1940   if (!export)
1941      return -ENOMEM;
1942
1943   export->drm_fd = drm_fd;
1944
1945   int dmabuf_fd = -1;
1946   int err = iris_bo_export_dmabuf(bo, &dmabuf_fd);
1947   if (err) {
1948      free(export);
1949      return err;
1950   }
1951
1952   simple_mtx_lock(&bufmgr->lock);
1953   err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
1954   close(dmabuf_fd);
1955   if (err) {
1956      simple_mtx_unlock(&bufmgr->lock);
1957      free(export);
1958      return err;
1959   }
1960
1961   bool found = false;
1962   list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) {
1963      if (iter->drm_fd != drm_fd)
1964         continue;
1965      /* Here we assume that for a given DRM fd, we'll always get back the
1966       * same GEM handle for a given buffer.
1967       */
1968      assert(iter->gem_handle == export->gem_handle);
1969      free(export);
1970      export = iter;
1971      found = true;
1972      break;
1973   }
1974   if (!found)
1975      list_addtail(&export->link, &bo->real.exports);
1976
1977   simple_mtx_unlock(&bufmgr->lock);
1978
1979   *out_handle = export->gem_handle;
1980
1981   return 0;
1982}
1983
1984static void
1985add_bucket(struct iris_bufmgr *bufmgr, int size, bool local)
1986{
1987   unsigned int i = local ?
1988      bufmgr->num_local_buckets : bufmgr->num_buckets;
1989
1990   struct bo_cache_bucket *buckets = local ?
1991      bufmgr->local_cache_bucket : bufmgr->cache_bucket;
1992
1993   assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
1994
1995   list_inithead(&buckets[i].head);
1996   buckets[i].size = size;
1997
1998   if (local)
1999      bufmgr->num_local_buckets++;
2000   else
2001      bufmgr->num_buckets++;
2002
2003   assert(bucket_for_size(bufmgr, size, local) == &buckets[i]);
2004   assert(bucket_for_size(bufmgr, size - 2048, local) == &buckets[i]);
2005   assert(bucket_for_size(bufmgr, size + 1, local) != &buckets[i]);
2006}
2007
2008static void
2009init_cache_buckets(struct iris_bufmgr *bufmgr, bool local)
2010{
2011   uint64_t size, cache_max_size = 64 * 1024 * 1024;
2012
2013   /* OK, so power of two buckets was too wasteful of memory.
2014    * Give 3 other sizes between each power of two, to hopefully
2015    * cover things accurately enough.  (The alternative is
2016    * probably to just go for exact matching of sizes, and assume
2017    * that for things like composited window resize the tiled
2018    * width/height alignment and rounding of sizes to pages will
2019    * get us useful cache hit rates anyway)
2020    */
2021   add_bucket(bufmgr, PAGE_SIZE, local);
2022   add_bucket(bufmgr, PAGE_SIZE * 2, local);
2023   add_bucket(bufmgr, PAGE_SIZE * 3, local);
2024
2025   /* Initialize the linked lists for BO reuse cache. */
2026   for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
2027      add_bucket(bufmgr, size, local);
2028
2029      add_bucket(bufmgr, size + size * 1 / 4, local);
2030      add_bucket(bufmgr, size + size * 2 / 4, local);
2031      add_bucket(bufmgr, size + size * 3 / 4, local);
2032   }
2033}
2034
2035uint32_t
2036iris_create_hw_context(struct iris_bufmgr *bufmgr)
2037{
2038   struct drm_i915_gem_context_create create = { };
2039   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
2040   if (ret != 0) {
2041      DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno));
2042      return 0;
2043   }
2044
2045   /* Upon declaring a GPU hang, the kernel will zap the guilty context
2046    * back to the default logical HW state and attempt to continue on to
2047    * our next submitted batchbuffer.  However, our render batches assume
2048    * the previous GPU state is preserved, and only emit commands needed
2049    * to incrementally change that state.  In particular, we inherit the
2050    * STATE_BASE_ADDRESS and PIPELINE_SELECT settings, which are critical.
2051    * With default base addresses, our next batches will almost certainly
2052    * cause more GPU hangs, leading to repeated hangs until we're banned
2053    * or the machine is dead.
2054    *
2055    * Here we tell the kernel not to attempt to recover our context but
2056    * immediately (on the next batchbuffer submission) report that the
2057    * context is lost, and we will do the recovery ourselves.  Ideally,
2058    * we'll have two lost batches instead of a continual stream of hangs.
2059    */
2060   struct drm_i915_gem_context_param p = {
2061      .ctx_id = create.ctx_id,
2062      .param = I915_CONTEXT_PARAM_RECOVERABLE,
2063      .value = false,
2064   };
2065   intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p);
2066
2067   return create.ctx_id;
2068}
2069
2070static int
2071iris_hw_context_get_priority(struct iris_bufmgr *bufmgr, uint32_t ctx_id)
2072{
2073   struct drm_i915_gem_context_param p = {
2074      .ctx_id = ctx_id,
2075      .param = I915_CONTEXT_PARAM_PRIORITY,
2076   };
2077   intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p);
2078   return p.value; /* on error, return 0 i.e. default priority */
2079}
2080
2081int
2082iris_hw_context_set_priority(struct iris_bufmgr *bufmgr,
2083                            uint32_t ctx_id,
2084                            int priority)
2085{
2086   struct drm_i915_gem_context_param p = {
2087      .ctx_id = ctx_id,
2088      .param = I915_CONTEXT_PARAM_PRIORITY,
2089      .value = priority,
2090   };
2091   int err;
2092
2093   err = 0;
2094   if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
2095      err = -errno;
2096
2097   return err;
2098}
2099
2100uint32_t
2101iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id)
2102{
2103   uint32_t new_ctx = iris_create_hw_context(bufmgr);
2104
2105   if (new_ctx) {
2106      int priority = iris_hw_context_get_priority(bufmgr, ctx_id);
2107      iris_hw_context_set_priority(bufmgr, new_ctx, priority);
2108   }
2109
2110   return new_ctx;
2111}
2112
2113void
2114iris_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id)
2115{
2116   struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id };
2117
2118   if (ctx_id != 0 &&
2119       intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) {
2120      fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
2121              strerror(errno));
2122   }
2123}
2124
2125int
2126iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *result)
2127{
2128   struct drm_i915_reg_read reg_read = { .offset = offset };
2129   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
2130
2131   *result = reg_read.val;
2132   return ret;
2133}
2134
2135static uint64_t
2136iris_gtt_size(int fd)
2137{
2138   /* We use the default (already allocated) context to determine
2139    * the default configuration of the virtual address space.
2140    */
2141   struct drm_i915_gem_context_param p = {
2142      .param = I915_CONTEXT_PARAM_GTT_SIZE,
2143   };
2144   if (!intel_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p))
2145      return p.value;
2146
2147   return 0;
2148}
2149
2150static struct intel_buffer *
2151intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2152{
2153   struct intel_buffer *buf = malloc(sizeof(struct intel_buffer));
2154   if (!buf)
2155      return NULL;
2156
2157   struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
2158
2159   bool local = bufmgr->vram.size > 0;
2160   unsigned int page_size = getpagesize();
2161   size = MAX2(ALIGN(size, page_size), page_size);
2162
2163   struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, local);
2164
2165   simple_mtx_lock(&bufmgr->lock);
2166   bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
2167   assert(bo->address != 0ull);
2168   simple_mtx_unlock(&bufmgr->lock);
2169
2170   bo->name = "aux-map";
2171   p_atomic_set(&bo->refcount, 1);
2172   bo->index = -1;
2173   bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED |
2174                     EXEC_OBJECT_CAPTURE;
2175   bo->real.mmap_mode = local ? IRIS_MMAP_WC : IRIS_MMAP_WB;
2176
2177   buf->driver_bo = bo;
2178   buf->gpu = bo->address;
2179   buf->gpu_end = buf->gpu + bo->size;
2180   buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
2181   return buf;
2182}
2183
2184static void
2185intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2186{
2187   iris_bo_unreference((struct iris_bo*)buffer->driver_bo);
2188   free(buffer);
2189}
2190
2191static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2192   .alloc = intel_aux_map_buffer_alloc,
2193   .free = intel_aux_map_buffer_free,
2194};
2195
2196static int
2197gem_param(int fd, int name)
2198{
2199   int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */
2200
2201   struct drm_i915_getparam gp = { .param = name, .value = &v };
2202   if (intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
2203      return -1;
2204
2205   return v;
2206}
2207
2208static bool
2209iris_bufmgr_query_meminfo(struct iris_bufmgr *bufmgr)
2210{
2211   struct drm_i915_query_memory_regions *meminfo =
2212      intel_i915_query_alloc(bufmgr->fd, DRM_I915_QUERY_MEMORY_REGIONS);
2213   if (meminfo == NULL)
2214      return false;
2215
2216   for (int i = 0; i < meminfo->num_regions; i++) {
2217      const struct drm_i915_memory_region_info *mem = &meminfo->regions[i];
2218      switch (mem->region.memory_class) {
2219      case I915_MEMORY_CLASS_SYSTEM:
2220         bufmgr->sys.region = mem->region;
2221         bufmgr->sys.size = mem->probed_size;
2222         break;
2223      case I915_MEMORY_CLASS_DEVICE:
2224         bufmgr->vram.region = mem->region;
2225         bufmgr->vram.size = mem->probed_size;
2226         break;
2227      default:
2228         break;
2229      }
2230   }
2231
2232   free(meminfo);
2233
2234   return true;
2235}
2236
2237/**
2238 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2239 * and manage map buffer objections.
2240 *
2241 * \param fd File descriptor of the opened DRM device.
2242 */
2243static struct iris_bufmgr *
2244iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2245{
2246   uint64_t gtt_size = iris_gtt_size(fd);
2247   if (gtt_size <= IRIS_MEMZONE_OTHER_START)
2248      return NULL;
2249
2250   struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
2251   if (bufmgr == NULL)
2252      return NULL;
2253
2254   /* Handles to buffer objects belong to the device fd and are not
2255    * reference counted by the kernel.  If the same fd is used by
2256    * multiple parties (threads sharing the same screen bufmgr, or
2257    * even worse the same device fd passed to multiple libraries)
2258    * ownership of those handles is shared by those independent parties.
2259    *
2260    * Don't do this! Ensure that each library/bufmgr has its own device
2261    * fd so that its namespace does not clash with another.
2262    */
2263   bufmgr->fd = os_dupfd_cloexec(fd);
2264
2265   p_atomic_set(&bufmgr->refcount, 1);
2266
2267   simple_mtx_init(&bufmgr->lock, mtx_plain);
2268   simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
2269
2270   list_inithead(&bufmgr->zombie_list);
2271
2272   bufmgr->has_llc = devinfo->has_llc;
2273   bufmgr->has_local_mem = devinfo->has_local_mem;
2274   bufmgr->has_tiling_uapi = devinfo->has_tiling_uapi;
2275   bufmgr->bo_reuse = bo_reuse;
2276   bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
2277   bufmgr->has_userptr_probe =
2278      gem_param(fd, I915_PARAM_HAS_USERPTR_PROBE) >= 1;
2279   iris_bufmgr_query_meminfo(bufmgr);
2280
2281   STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
2282   const uint64_t _4GB = 1ull << 32;
2283   const uint64_t _2GB = 1ul << 31;
2284
2285   /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
2286   const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
2287
2288   util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER],
2289                      PAGE_SIZE, _4GB_minus_1 - PAGE_SIZE);
2290   util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_BINDLESS],
2291                      IRIS_MEMZONE_BINDLESS_START, IRIS_BINDLESS_SIZE);
2292   util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE],
2293                      IRIS_MEMZONE_SURFACE_START,
2294                      _4GB_minus_1 - IRIS_MAX_BINDERS * IRIS_BINDER_SIZE -
2295                     IRIS_BINDLESS_SIZE);
2296   /* TODO: Why does limiting to 2GB help some state items on gfx12?
2297    *  - CC Viewport Pointer
2298    *  - Blend State Pointer
2299    *  - Color Calc State Pointer
2300    */
2301   const uint64_t dynamic_pool_size =
2302      (devinfo->ver >= 12 ? _2GB : _4GB_minus_1) - IRIS_BORDER_COLOR_POOL_SIZE;
2303   util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_DYNAMIC],
2304                      IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
2305                      dynamic_pool_size);
2306
2307   /* Leave the last 4GB out of the high vma range, so that no state
2308    * base address + size can overflow 48 bits.
2309    */
2310   util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_OTHER],
2311                      IRIS_MEMZONE_OTHER_START,
2312                      (gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START);
2313
2314   init_cache_buckets(bufmgr, false);
2315   init_cache_buckets(bufmgr, true);
2316
2317   unsigned min_slab_order = 8;  /* 256 bytes */
2318   unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
2319   unsigned num_slab_orders_per_allocator =
2320      (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS;
2321
2322   /* Divide the size order range among slab managers. */
2323   for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2324      unsigned min_order = min_slab_order;
2325      unsigned max_order =
2326         MIN2(min_order + num_slab_orders_per_allocator, max_slab_order);
2327
2328      if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order,
2329                         IRIS_HEAP_MAX, true, bufmgr,
2330                         iris_can_reclaim_slab,
2331                         iris_slab_alloc,
2332                         (void *) iris_slab_free)) {
2333         free(bufmgr);
2334         return NULL;
2335      }
2336      min_slab_order = max_order + 1;
2337   }
2338
2339   bufmgr->name_table =
2340      _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2341   bufmgr->handle_table =
2342      _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2343
2344   bufmgr->vma_min_align = devinfo->has_local_mem ? 64 * 1024 : PAGE_SIZE;
2345
2346   if (devinfo->has_aux_map) {
2347      bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator,
2348                                               devinfo);
2349      assert(bufmgr->aux_map_ctx);
2350   }
2351
2352   return bufmgr;
2353}
2354
2355static struct iris_bufmgr *
2356iris_bufmgr_ref(struct iris_bufmgr *bufmgr)
2357{
2358   p_atomic_inc(&bufmgr->refcount);
2359   return bufmgr;
2360}
2361
2362void
2363iris_bufmgr_unref(struct iris_bufmgr *bufmgr)
2364{
2365   simple_mtx_lock(&global_bufmgr_list_mutex);
2366   if (p_atomic_dec_zero(&bufmgr->refcount)) {
2367      list_del(&bufmgr->link);
2368      iris_bufmgr_destroy(bufmgr);
2369   }
2370   simple_mtx_unlock(&global_bufmgr_list_mutex);
2371}
2372
2373/** Returns a new unique id, to be used by screens. */
2374int
2375iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr)
2376{
2377   return p_atomic_inc_return(&bufmgr->next_screen_id) - 1;
2378}
2379
2380/**
2381 * Gets an already existing GEM buffer manager or create a new one.
2382 *
2383 * \param fd File descriptor of the opened DRM device.
2384 */
2385struct iris_bufmgr *
2386iris_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2387{
2388   struct stat st;
2389
2390   if (fstat(fd, &st))
2391      return NULL;
2392
2393   struct iris_bufmgr *bufmgr = NULL;
2394
2395   simple_mtx_lock(&global_bufmgr_list_mutex);
2396   list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
2397      struct stat iter_st;
2398      if (fstat(iter_bufmgr->fd, &iter_st))
2399         continue;
2400
2401      if (st.st_rdev == iter_st.st_rdev) {
2402         assert(iter_bufmgr->bo_reuse == bo_reuse);
2403         bufmgr = iris_bufmgr_ref(iter_bufmgr);
2404         goto unlock;
2405      }
2406   }
2407
2408   bufmgr = iris_bufmgr_create(devinfo, fd, bo_reuse);
2409   if (bufmgr)
2410      list_addtail(&bufmgr->link, &global_bufmgr_list);
2411
2412 unlock:
2413   simple_mtx_unlock(&global_bufmgr_list_mutex);
2414
2415   return bufmgr;
2416}
2417
2418int
2419iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr)
2420{
2421   return bufmgr->fd;
2422}
2423
2424void*
2425iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
2426{
2427   return bufmgr->aux_map_ctx;
2428}
2429
2430simple_mtx_t *
2431iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
2432{
2433   return &bufmgr->bo_deps_lock;
2434}
2435