i915_vma.c revision 1.1 1 /* $NetBSD: i915_vma.c,v 1.1 2021/12/18 20:15:26 riastradh Exp $ */
2
3 /*
4 * Copyright 2016 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 *
25 */
26
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: i915_vma.c,v 1.1 2021/12/18 20:15:26 riastradh Exp $");
29
30 #include <linux/sched/mm.h>
31 #include <drm/drm_gem.h>
32
33 #include "display/intel_frontbuffer.h"
34
35 #include "gt/intel_engine.h"
36 #include "gt/intel_engine_heartbeat.h"
37 #include "gt/intel_gt.h"
38 #include "gt/intel_gt_requests.h"
39
40 #include "i915_drv.h"
41 #include "i915_globals.h"
42 #include "i915_sw_fence_work.h"
43 #include "i915_trace.h"
44 #include "i915_vma.h"
45
46 static struct i915_global_vma {
47 struct i915_global base;
48 struct kmem_cache *slab_vmas;
49 } global;
50
51 struct i915_vma *i915_vma_alloc(void)
52 {
53 return kmem_cache_zalloc(global.slab_vmas, GFP_KERNEL);
54 }
55
56 void i915_vma_free(struct i915_vma *vma)
57 {
58 return kmem_cache_free(global.slab_vmas, vma);
59 }
60
61 #if IS_ENABLED(CONFIG_DRM_I915_ERRLOG_GEM) && IS_ENABLED(CONFIG_DRM_DEBUG_MM)
62
63 #include <linux/stackdepot.h>
64
65 static void vma_print_allocator(struct i915_vma *vma, const char *reason)
66 {
67 unsigned long *entries;
68 unsigned int nr_entries;
69 char buf[512];
70
71 if (!vma->node.stack) {
72 DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: unknown owner\n",
73 vma->node.start, vma->node.size, reason);
74 return;
75 }
76
77 nr_entries = stack_depot_fetch(vma->node.stack, &entries);
78 stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0);
79 DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n",
80 vma->node.start, vma->node.size, reason, buf);
81 }
82
83 #else
84
85 static void vma_print_allocator(struct i915_vma *vma, const char *reason)
86 {
87 }
88
89 #endif
90
91 static inline struct i915_vma *active_to_vma(struct i915_active *ref)
92 {
93 return container_of(ref, typeof(struct i915_vma), active);
94 }
95
96 static int __i915_vma_active(struct i915_active *ref)
97 {
98 return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT;
99 }
100
101 __i915_active_call
102 static void __i915_vma_retire(struct i915_active *ref)
103 {
104 i915_vma_put(active_to_vma(ref));
105 }
106
107 static struct i915_vma *
108 vma_create(struct drm_i915_gem_object *obj,
109 struct i915_address_space *vm,
110 const struct i915_ggtt_view *view)
111 {
112 struct i915_vma *vma;
113 struct rb_node *rb, **p;
114
115 /* The aliasing_ppgtt should never be used directly! */
116 GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm);
117
118 vma = i915_vma_alloc();
119 if (vma == NULL)
120 return ERR_PTR(-ENOMEM);
121
122 kref_init(&vma->ref);
123 mutex_init(&vma->pages_mutex);
124 vma->vm = i915_vm_get(vm);
125 vma->ops = &vm->vma_ops;
126 vma->obj = obj;
127 vma->resv = obj->base.resv;
128 vma->size = obj->base.size;
129 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
130
131 i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire);
132
133 /* Declare ourselves safe for use inside shrinkers */
134 if (IS_ENABLED(CONFIG_LOCKDEP)) {
135 fs_reclaim_acquire(GFP_KERNEL);
136 might_lock(&vma->active.mutex);
137 fs_reclaim_release(GFP_KERNEL);
138 }
139
140 INIT_LIST_HEAD(&vma->closed_link);
141
142 if (view && view->type != I915_GGTT_VIEW_NORMAL) {
143 vma->ggtt_view = *view;
144 if (view->type == I915_GGTT_VIEW_PARTIAL) {
145 GEM_BUG_ON(range_overflows_t(u64,
146 view->partial.offset,
147 view->partial.size,
148 obj->base.size >> PAGE_SHIFT));
149 vma->size = view->partial.size;
150 vma->size <<= PAGE_SHIFT;
151 GEM_BUG_ON(vma->size > obj->base.size);
152 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
153 vma->size = intel_rotation_info_size(&view->rotated);
154 vma->size <<= PAGE_SHIFT;
155 } else if (view->type == I915_GGTT_VIEW_REMAPPED) {
156 vma->size = intel_remapped_info_size(&view->remapped);
157 vma->size <<= PAGE_SHIFT;
158 }
159 }
160
161 if (unlikely(vma->size > vm->total))
162 goto err_vma;
163
164 GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE));
165
166 if (i915_is_ggtt(vm)) {
167 if (unlikely(overflows_type(vma->size, u32)))
168 goto err_vma;
169
170 vma->fence_size = i915_gem_fence_size(vm->i915, vma->size,
171 i915_gem_object_get_tiling(obj),
172 i915_gem_object_get_stride(obj));
173 if (unlikely(vma->fence_size < vma->size || /* overflow */
174 vma->fence_size > vm->total))
175 goto err_vma;
176
177 GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I915_GTT_MIN_ALIGNMENT));
178
179 vma->fence_alignment = i915_gem_fence_alignment(vm->i915, vma->size,
180 i915_gem_object_get_tiling(obj),
181 i915_gem_object_get_stride(obj));
182 GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
183
184 __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
185 }
186
187 spin_lock(&obj->vma.lock);
188
189 rb = NULL;
190 p = &obj->vma.tree.rb_node;
191 while (*p) {
192 struct i915_vma *pos;
193 long cmp;
194
195 rb = *p;
196 pos = rb_entry(rb, struct i915_vma, obj_node);
197
198 /*
199 * If the view already exists in the tree, another thread
200 * already created a matching vma, so return the older instance
201 * and dispose of ours.
202 */
203 cmp = i915_vma_compare(pos, vm, view);
204 if (cmp == 0) {
205 spin_unlock(&obj->vma.lock);
206 i915_vma_free(vma);
207 return pos;
208 }
209
210 if (cmp < 0)
211 p = &rb->rb_right;
212 else
213 p = &rb->rb_left;
214 }
215 rb_link_node(&vma->obj_node, rb, p);
216 rb_insert_color(&vma->obj_node, &obj->vma.tree);
217
218 if (i915_vma_is_ggtt(vma))
219 /*
220 * We put the GGTT vma at the start of the vma-list, followed
221 * by the ppGGTT vma. This allows us to break early when
222 * iterating over only the GGTT vma for an object, see
223 * for_each_ggtt_vma()
224 */
225 list_add(&vma->obj_link, &obj->vma.list);
226 else
227 list_add_tail(&vma->obj_link, &obj->vma.list);
228
229 spin_unlock(&obj->vma.lock);
230
231 return vma;
232
233 err_vma:
234 i915_vma_free(vma);
235 return ERR_PTR(-E2BIG);
236 }
237
238 static struct i915_vma *
239 vma_lookup(struct drm_i915_gem_object *obj,
240 struct i915_address_space *vm,
241 const struct i915_ggtt_view *view)
242 {
243 struct rb_node *rb;
244
245 rb = obj->vma.tree.rb_node;
246 while (rb) {
247 struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
248 long cmp;
249
250 cmp = i915_vma_compare(vma, vm, view);
251 if (cmp == 0)
252 return vma;
253
254 if (cmp < 0)
255 rb = rb->rb_right;
256 else
257 rb = rb->rb_left;
258 }
259
260 return NULL;
261 }
262
263 /**
264 * i915_vma_instance - return the singleton instance of the VMA
265 * @obj: parent &struct drm_i915_gem_object to be mapped
266 * @vm: address space in which the mapping is located
267 * @view: additional mapping requirements
268 *
269 * i915_vma_instance() looks up an existing VMA of the @obj in the @vm with
270 * the same @view characteristics. If a match is not found, one is created.
271 * Once created, the VMA is kept until either the object is freed, or the
272 * address space is closed.
273 *
274 * Returns the vma, or an error pointer.
275 */
276 struct i915_vma *
277 i915_vma_instance(struct drm_i915_gem_object *obj,
278 struct i915_address_space *vm,
279 const struct i915_ggtt_view *view)
280 {
281 struct i915_vma *vma;
282
283 GEM_BUG_ON(view && !i915_is_ggtt(vm));
284 GEM_BUG_ON(!atomic_read(&vm->open));
285
286 spin_lock(&obj->vma.lock);
287 vma = vma_lookup(obj, vm, view);
288 spin_unlock(&obj->vma.lock);
289
290 /* vma_create() will resolve the race if another creates the vma */
291 if (unlikely(!vma))
292 vma = vma_create(obj, vm, view);
293
294 GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
295 return vma;
296 }
297
298 struct i915_vma_work {
299 struct dma_fence_work base;
300 struct i915_vma *vma;
301 struct drm_i915_gem_object *pinned;
302 enum i915_cache_level cache_level;
303 unsigned int flags;
304 };
305
306 static int __vma_bind(struct dma_fence_work *work)
307 {
308 struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
309 struct i915_vma *vma = vw->vma;
310 int err;
311
312 err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags);
313 if (err)
314 atomic_or(I915_VMA_ERROR, &vma->flags);
315
316 return err;
317 }
318
319 static void __vma_release(struct dma_fence_work *work)
320 {
321 struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
322
323 if (vw->pinned)
324 __i915_gem_object_unpin_pages(vw->pinned);
325 }
326
327 static const struct dma_fence_work_ops bind_ops = {
328 .name = "bind",
329 .work = __vma_bind,
330 .release = __vma_release,
331 };
332
333 struct i915_vma_work *i915_vma_work(void)
334 {
335 struct i915_vma_work *vw;
336
337 vw = kzalloc(sizeof(*vw), GFP_KERNEL);
338 if (!vw)
339 return NULL;
340
341 dma_fence_work_init(&vw->base, &bind_ops);
342 vw->base.dma.error = -EAGAIN; /* disable the worker by default */
343
344 return vw;
345 }
346
347 /**
348 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
349 * @vma: VMA to map
350 * @cache_level: mapping cache level
351 * @flags: flags like global or local mapping
352 * @work: preallocated worker for allocating and binding the PTE
353 *
354 * DMA addresses are taken from the scatter-gather table of this object (or of
355 * this VMA in case of non-default GGTT views) and PTE entries set up.
356 * Note that DMA addresses are also the only part of the SG table we care about.
357 */
358 int i915_vma_bind(struct i915_vma *vma,
359 enum i915_cache_level cache_level,
360 u32 flags,
361 struct i915_vma_work *work)
362 {
363 u32 bind_flags;
364 u32 vma_flags;
365 int ret;
366
367 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
368 GEM_BUG_ON(vma->size > vma->node.size);
369
370 if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
371 vma->node.size,
372 vma->vm->total)))
373 return -ENODEV;
374
375 if (GEM_DEBUG_WARN_ON(!flags))
376 return -EINVAL;
377
378 bind_flags = flags;
379 bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
380
381 vma_flags = atomic_read(&vma->flags);
382 vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
383 if (flags & PIN_UPDATE)
384 bind_flags |= vma_flags;
385 else
386 bind_flags &= ~vma_flags;
387 if (bind_flags == 0)
388 return 0;
389
390 GEM_BUG_ON(!vma->pages);
391
392 trace_i915_vma_bind(vma, bind_flags);
393 if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) {
394 work->vma = vma;
395 work->cache_level = cache_level;
396 work->flags = bind_flags | I915_VMA_ALLOC;
397
398 /*
399 * Note we only want to chain up to the migration fence on
400 * the pages (not the object itself). As we don't track that,
401 * yet, we have to use the exclusive fence instead.
402 *
403 * Also note that we do not want to track the async vma as
404 * part of the obj->resv->excl_fence as it only affects
405 * execution and not content or object's backing store lifetime.
406 */
407 GEM_BUG_ON(i915_active_has_exclusive(&vma->active));
408 i915_active_set_exclusive(&vma->active, &work->base.dma);
409 work->base.dma.error = 0; /* enable the queue_work() */
410
411 if (vma->obj) {
412 __i915_gem_object_pin_pages(vma->obj);
413 work->pinned = vma->obj;
414 }
415 } else {
416 GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags);
417 ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
418 if (ret)
419 return ret;
420 }
421
422 atomic_or(bind_flags, &vma->flags);
423 return 0;
424 }
425
426 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
427 {
428 void __iomem *ptr;
429 int err;
430
431 if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
432 err = -ENODEV;
433 goto err;
434 }
435
436 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
437 GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
438
439 ptr = READ_ONCE(vma->iomap);
440 if (ptr == NULL) {
441 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
442 vma->node.start,
443 vma->node.size);
444 if (ptr == NULL) {
445 err = -ENOMEM;
446 goto err;
447 }
448
449 if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) {
450 io_mapping_unmap(ptr);
451 ptr = vma->iomap;
452 }
453 }
454
455 __i915_vma_pin(vma);
456
457 err = i915_vma_pin_fence(vma);
458 if (err)
459 goto err_unpin;
460
461 i915_vma_set_ggtt_write(vma);
462
463 /* NB Access through the GTT requires the device to be awake. */
464 return ptr;
465
466 err_unpin:
467 __i915_vma_unpin(vma);
468 err:
469 return IO_ERR_PTR(err);
470 }
471
472 void i915_vma_flush_writes(struct i915_vma *vma)
473 {
474 if (i915_vma_unset_ggtt_write(vma))
475 intel_gt_flush_ggtt_writes(vma->vm->gt);
476 }
477
478 void i915_vma_unpin_iomap(struct i915_vma *vma)
479 {
480 GEM_BUG_ON(vma->iomap == NULL);
481
482 i915_vma_flush_writes(vma);
483
484 i915_vma_unpin_fence(vma);
485 i915_vma_unpin(vma);
486 }
487
488 void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags)
489 {
490 struct i915_vma *vma;
491 struct drm_i915_gem_object *obj;
492
493 vma = fetch_and_zero(p_vma);
494 if (!vma)
495 return;
496
497 obj = vma->obj;
498 GEM_BUG_ON(!obj);
499
500 i915_vma_unpin(vma);
501 i915_vma_close(vma);
502
503 if (flags & I915_VMA_RELEASE_MAP)
504 i915_gem_object_unpin_map(obj);
505
506 i915_gem_object_put(obj);
507 }
508
509 bool i915_vma_misplaced(const struct i915_vma *vma,
510 u64 size, u64 alignment, u64 flags)
511 {
512 if (!drm_mm_node_allocated(&vma->node))
513 return false;
514
515 if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
516 return true;
517
518 if (vma->node.size < size)
519 return true;
520
521 GEM_BUG_ON(alignment && !is_power_of_2(alignment));
522 if (alignment && !IS_ALIGNED(vma->node.start, alignment))
523 return true;
524
525 if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
526 return true;
527
528 if (flags & PIN_OFFSET_BIAS &&
529 vma->node.start < (flags & PIN_OFFSET_MASK))
530 return true;
531
532 if (flags & PIN_OFFSET_FIXED &&
533 vma->node.start != (flags & PIN_OFFSET_MASK))
534 return true;
535
536 return false;
537 }
538
539 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
540 {
541 bool mappable, fenceable;
542
543 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
544 GEM_BUG_ON(!vma->fence_size);
545
546 fenceable = (vma->node.size >= vma->fence_size &&
547 IS_ALIGNED(vma->node.start, vma->fence_alignment));
548
549 mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end;
550
551 if (mappable && fenceable)
552 set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
553 else
554 clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
555 }
556
557 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
558 {
559 struct drm_mm_node *node = &vma->node;
560 struct drm_mm_node *other;
561
562 /*
563 * On some machines we have to be careful when putting differing types
564 * of snoopable memory together to avoid the prefetcher crossing memory
565 * domains and dying. During vm initialisation, we decide whether or not
566 * these constraints apply and set the drm_mm.color_adjust
567 * appropriately.
568 */
569 if (!i915_vm_has_cache_coloring(vma->vm))
570 return true;
571
572 /* Only valid to be called on an already inserted vma */
573 GEM_BUG_ON(!drm_mm_node_allocated(node));
574 GEM_BUG_ON(list_empty(&node->node_list));
575
576 other = list_prev_entry(node, node_list);
577 if (i915_node_color_differs(other, color) &&
578 !drm_mm_hole_follows(other))
579 return false;
580
581 other = list_next_entry(node, node_list);
582 if (i915_node_color_differs(other, color) &&
583 !drm_mm_hole_follows(node))
584 return false;
585
586 return true;
587 }
588
589 static void assert_bind_count(const struct drm_i915_gem_object *obj)
590 {
591 /*
592 * Combine the assertion that the object is bound and that we have
593 * pinned its pages. But we should never have bound the object
594 * more than we have pinned its pages. (For complete accuracy, we
595 * assume that no else is pinning the pages, but as a rough assertion
596 * that we will not run into problems later, this will do!)
597 */
598 GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < atomic_read(&obj->bind_count));
599 }
600
601 /**
602 * i915_vma_insert - finds a slot for the vma in its address space
603 * @vma: the vma
604 * @size: requested size in bytes (can be larger than the VMA)
605 * @alignment: required alignment
606 * @flags: mask of PIN_* flags to use
607 *
608 * First we try to allocate some free space that meets the requirements for
609 * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
610 * preferrably the oldest idle entry to make room for the new VMA.
611 *
612 * Returns:
613 * 0 on success, negative error code otherwise.
614 */
615 static int
616 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
617 {
618 unsigned long color;
619 u64 start, end;
620 int ret;
621
622 GEM_BUG_ON(i915_vma_is_closed(vma));
623 GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
624 GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
625
626 size = max(size, vma->size);
627 alignment = max(alignment, vma->display_alignment);
628 if (flags & PIN_MAPPABLE) {
629 size = max_t(typeof(size), size, vma->fence_size);
630 alignment = max_t(typeof(alignment),
631 alignment, vma->fence_alignment);
632 }
633
634 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
635 GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
636 GEM_BUG_ON(!is_power_of_2(alignment));
637
638 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
639 GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
640
641 end = vma->vm->total;
642 if (flags & PIN_MAPPABLE)
643 end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end);
644 if (flags & PIN_ZONE_4G)
645 end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
646 GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
647
648 /* If binding the object/GGTT view requires more space than the entire
649 * aperture has, reject it early before evicting everything in a vain
650 * attempt to find space.
651 */
652 if (size > end) {
653 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
654 size, flags & PIN_MAPPABLE ? "mappable" : "total",
655 end);
656 return -ENOSPC;
657 }
658
659 color = 0;
660 if (vma->obj && i915_vm_has_cache_coloring(vma->vm))
661 color = vma->obj->cache_level;
662
663 if (flags & PIN_OFFSET_FIXED) {
664 u64 offset = flags & PIN_OFFSET_MASK;
665 if (!IS_ALIGNED(offset, alignment) ||
666 range_overflows(offset, size, end))
667 return -EINVAL;
668
669 ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
670 size, offset, color,
671 flags);
672 if (ret)
673 return ret;
674 } else {
675 /*
676 * We only support huge gtt pages through the 48b PPGTT,
677 * however we also don't want to force any alignment for
678 * objects which need to be tightly packed into the low 32bits.
679 *
680 * Note that we assume that GGTT are limited to 4GiB for the
681 * forseeable future. See also i915_ggtt_offset().
682 */
683 if (upper_32_bits(end - 1) &&
684 vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
685 /*
686 * We can't mix 64K and 4K PTEs in the same page-table
687 * (2M block), and so to avoid the ugliness and
688 * complexity of coloring we opt for just aligning 64K
689 * objects to 2M.
690 */
691 u64 page_alignment =
692 rounddown_pow_of_two(vma->page_sizes.sg |
693 I915_GTT_PAGE_SIZE_2M);
694
695 /*
696 * Check we don't expand for the limited Global GTT
697 * (mappable aperture is even more precious!). This
698 * also checks that we exclude the aliasing-ppgtt.
699 */
700 GEM_BUG_ON(i915_vma_is_ggtt(vma));
701
702 alignment = max(alignment, page_alignment);
703
704 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
705 size = round_up(size, I915_GTT_PAGE_SIZE_2M);
706 }
707
708 ret = i915_gem_gtt_insert(vma->vm, &vma->node,
709 size, alignment, color,
710 start, end, flags);
711 if (ret)
712 return ret;
713
714 GEM_BUG_ON(vma->node.start < start);
715 GEM_BUG_ON(vma->node.start + vma->node.size > end);
716 }
717 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
718 GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
719
720 if (vma->obj) {
721 struct drm_i915_gem_object *obj = vma->obj;
722
723 atomic_inc(&obj->bind_count);
724 assert_bind_count(obj);
725 }
726 list_add_tail(&vma->vm_link, &vma->vm->bound_list);
727
728 return 0;
729 }
730
731 static void
732 i915_vma_detach(struct i915_vma *vma)
733 {
734 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
735 GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
736
737 /*
738 * And finally now the object is completely decoupled from this
739 * vma, we can drop its hold on the backing storage and allow
740 * it to be reaped by the shrinker.
741 */
742 list_del(&vma->vm_link);
743 if (vma->obj) {
744 struct drm_i915_gem_object *obj = vma->obj;
745
746 assert_bind_count(obj);
747 atomic_dec(&obj->bind_count);
748 }
749 }
750
751 static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
752 {
753 unsigned int bound;
754 bool pinned = true;
755
756 bound = atomic_read(&vma->flags);
757 do {
758 if (unlikely(flags & ~bound))
759 return false;
760
761 if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
762 return false;
763
764 if (!(bound & I915_VMA_PIN_MASK))
765 goto unpinned;
766
767 GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0);
768 } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
769
770 return true;
771
772 unpinned:
773 /*
774 * If pin_count==0, but we are bound, check under the lock to avoid
775 * racing with a concurrent i915_vma_unbind().
776 */
777 mutex_lock(&vma->vm->mutex);
778 do {
779 if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
780 pinned = false;
781 break;
782 }
783
784 if (unlikely(flags & ~bound)) {
785 pinned = false;
786 break;
787 }
788 } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
789 mutex_unlock(&vma->vm->mutex);
790
791 return pinned;
792 }
793
794 static int vma_get_pages(struct i915_vma *vma)
795 {
796 int err = 0;
797
798 if (atomic_add_unless(&vma->pages_count, 1, 0))
799 return 0;
800
801 /* Allocations ahoy! */
802 if (mutex_lock_interruptible(&vma->pages_mutex))
803 return -EINTR;
804
805 if (!atomic_read(&vma->pages_count)) {
806 if (vma->obj) {
807 err = i915_gem_object_pin_pages(vma->obj);
808 if (err)
809 goto unlock;
810 }
811
812 err = vma->ops->set_pages(vma);
813 if (err) {
814 if (vma->obj)
815 i915_gem_object_unpin_pages(vma->obj);
816 goto unlock;
817 }
818 }
819 atomic_inc(&vma->pages_count);
820
821 unlock:
822 mutex_unlock(&vma->pages_mutex);
823
824 return err;
825 }
826
827 static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
828 {
829 /* We allocate under vma_get_pages, so beware the shrinker */
830 mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING);
831 GEM_BUG_ON(atomic_read(&vma->pages_count) < count);
832 if (atomic_sub_return(count, &vma->pages_count) == 0) {
833 vma->ops->clear_pages(vma);
834 GEM_BUG_ON(vma->pages);
835 if (vma->obj)
836 i915_gem_object_unpin_pages(vma->obj);
837 }
838 mutex_unlock(&vma->pages_mutex);
839 }
840
841 static void vma_put_pages(struct i915_vma *vma)
842 {
843 if (atomic_add_unless(&vma->pages_count, -1, 1))
844 return;
845
846 __vma_put_pages(vma, 1);
847 }
848
849 static void vma_unbind_pages(struct i915_vma *vma)
850 {
851 unsigned int count;
852
853 lockdep_assert_held(&vma->vm->mutex);
854
855 /* The upper portion of pages_count is the number of bindings */
856 count = atomic_read(&vma->pages_count);
857 count >>= I915_VMA_PAGES_BIAS;
858 GEM_BUG_ON(!count);
859
860 __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
861 }
862
863 int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
864 {
865 struct i915_vma_work *work = NULL;
866 intel_wakeref_t wakeref = 0;
867 unsigned int bound;
868 int err;
869
870 BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
871 BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
872
873 GEM_BUG_ON(flags & PIN_UPDATE);
874 GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
875
876 /* First try and grab the pin without rebinding the vma */
877 if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
878 return 0;
879
880 err = vma_get_pages(vma);
881 if (err)
882 return err;
883
884 if (flags & vma->vm->bind_async_flags) {
885 work = i915_vma_work();
886 if (!work) {
887 err = -ENOMEM;
888 goto err_pages;
889 }
890 }
891
892 if (flags & PIN_GLOBAL)
893 wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
894
895 /* No more allocations allowed once we hold vm->mutex */
896 err = mutex_lock_interruptible(&vma->vm->mutex);
897 if (err)
898 goto err_fence;
899
900 bound = atomic_read(&vma->flags);
901 if (unlikely(bound & I915_VMA_ERROR)) {
902 err = -ENOMEM;
903 goto err_unlock;
904 }
905
906 if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
907 err = -EAGAIN; /* pins are meant to be fairly temporary */
908 goto err_unlock;
909 }
910
911 if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) {
912 __i915_vma_pin(vma);
913 goto err_unlock;
914 }
915
916 err = i915_active_acquire(&vma->active);
917 if (err)
918 goto err_unlock;
919
920 if (!(bound & I915_VMA_BIND_MASK)) {
921 err = i915_vma_insert(vma, size, alignment, flags);
922 if (err)
923 goto err_active;
924
925 if (i915_is_ggtt(vma->vm))
926 __i915_vma_set_map_and_fenceable(vma);
927 }
928
929 GEM_BUG_ON(!vma->pages);
930 err = i915_vma_bind(vma,
931 vma->obj ? vma->obj->cache_level : 0,
932 flags, work);
933 if (err)
934 goto err_remove;
935
936 /* There should only be at most 2 active bindings (user, global) */
937 GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound);
938 atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
939 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
940
941 __i915_vma_pin(vma);
942 GEM_BUG_ON(!i915_vma_is_pinned(vma));
943 GEM_BUG_ON(!i915_vma_is_bound(vma, flags));
944 GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
945
946 err_remove:
947 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) {
948 i915_vma_detach(vma);
949 drm_mm_remove_node(&vma->node);
950 }
951 err_active:
952 i915_active_release(&vma->active);
953 err_unlock:
954 mutex_unlock(&vma->vm->mutex);
955 err_fence:
956 if (work)
957 dma_fence_work_commit(&work->base);
958 if (wakeref)
959 intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
960 err_pages:
961 vma_put_pages(vma);
962 return err;
963 }
964
965 static void flush_idle_contexts(struct intel_gt *gt)
966 {
967 struct intel_engine_cs *engine;
968 enum intel_engine_id id;
969
970 for_each_engine(engine, gt, id)
971 intel_engine_flush_barriers(engine);
972
973 intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
974 }
975
976 int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
977 {
978 struct i915_address_space *vm = vma->vm;
979 int err;
980
981 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
982
983 do {
984 err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
985 if (err != -ENOSPC)
986 return err;
987
988 /* Unlike i915_vma_pin, we don't take no for an answer! */
989 flush_idle_contexts(vm->gt);
990 if (mutex_lock_interruptible(&vm->mutex) == 0) {
991 i915_gem_evict_vm(vm);
992 mutex_unlock(&vm->mutex);
993 }
994 } while (1);
995 }
996
997 void i915_vma_close(struct i915_vma *vma)
998 {
999 struct intel_gt *gt = vma->vm->gt;
1000 unsigned long flags;
1001
1002 GEM_BUG_ON(i915_vma_is_closed(vma));
1003
1004 /*
1005 * We defer actually closing, unbinding and destroying the VMA until
1006 * the next idle point, or if the object is freed in the meantime. By
1007 * postponing the unbind, we allow for it to be resurrected by the
1008 * client, avoiding the work required to rebind the VMA. This is
1009 * advantageous for DRI, where the client/server pass objects
1010 * between themselves, temporarily opening a local VMA to the
1011 * object, and then closing it again. The same object is then reused
1012 * on the next frame (or two, depending on the depth of the swap queue)
1013 * causing us to rebind the VMA once more. This ends up being a lot
1014 * of wasted work for the steady state.
1015 */
1016 spin_lock_irqsave(>->closed_lock, flags);
1017 list_add(&vma->closed_link, >->closed_vma);
1018 spin_unlock_irqrestore(>->closed_lock, flags);
1019 }
1020
1021 static void __i915_vma_remove_closed(struct i915_vma *vma)
1022 {
1023 struct intel_gt *gt = vma->vm->gt;
1024
1025 spin_lock_irq(>->closed_lock);
1026 list_del_init(&vma->closed_link);
1027 spin_unlock_irq(>->closed_lock);
1028 }
1029
1030 void i915_vma_reopen(struct i915_vma *vma)
1031 {
1032 if (i915_vma_is_closed(vma))
1033 __i915_vma_remove_closed(vma);
1034 }
1035
1036 void i915_vma_release(struct kref *ref)
1037 {
1038 struct i915_vma *vma = container_of(ref, typeof(*vma), ref);
1039
1040 if (drm_mm_node_allocated(&vma->node)) {
1041 mutex_lock(&vma->vm->mutex);
1042 atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
1043 WARN_ON(__i915_vma_unbind(vma));
1044 mutex_unlock(&vma->vm->mutex);
1045 GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
1046 }
1047 GEM_BUG_ON(i915_vma_is_active(vma));
1048
1049 if (vma->obj) {
1050 struct drm_i915_gem_object *obj = vma->obj;
1051
1052 spin_lock(&obj->vma.lock);
1053 list_del(&vma->obj_link);
1054 rb_erase(&vma->obj_node, &obj->vma.tree);
1055 spin_unlock(&obj->vma.lock);
1056 }
1057
1058 __i915_vma_remove_closed(vma);
1059 i915_vm_put(vma->vm);
1060
1061 i915_active_fini(&vma->active);
1062 i915_vma_free(vma);
1063 }
1064
1065 void i915_vma_parked(struct intel_gt *gt)
1066 {
1067 struct i915_vma *vma, *next;
1068
1069 spin_lock_irq(>->closed_lock);
1070 list_for_each_entry_safe(vma, next, >->closed_vma, closed_link) {
1071 struct drm_i915_gem_object *obj = vma->obj;
1072 struct i915_address_space *vm = vma->vm;
1073
1074 /* XXX All to avoid keeping a reference on i915_vma itself */
1075
1076 if (!kref_get_unless_zero(&obj->base.refcount))
1077 continue;
1078
1079 if (i915_vm_tryopen(vm)) {
1080 list_del_init(&vma->closed_link);
1081 } else {
1082 i915_gem_object_put(obj);
1083 obj = NULL;
1084 }
1085
1086 spin_unlock_irq(>->closed_lock);
1087
1088 if (obj) {
1089 __i915_vma_put(vma);
1090 i915_gem_object_put(obj);
1091 }
1092
1093 i915_vm_close(vm);
1094
1095 /* Restart after dropping lock */
1096 spin_lock_irq(>->closed_lock);
1097 next = list_first_entry(>->closed_vma,
1098 typeof(*next), closed_link);
1099 }
1100 spin_unlock_irq(>->closed_lock);
1101 }
1102
1103 static void __i915_vma_iounmap(struct i915_vma *vma)
1104 {
1105 GEM_BUG_ON(i915_vma_is_pinned(vma));
1106
1107 if (vma->iomap == NULL)
1108 return;
1109
1110 io_mapping_unmap(vma->iomap);
1111 vma->iomap = NULL;
1112 }
1113
1114 void i915_vma_revoke_mmap(struct i915_vma *vma)
1115 {
1116 struct drm_vma_offset_node *node;
1117 u64 vma_offset;
1118
1119 if (!i915_vma_has_userfault(vma))
1120 return;
1121
1122 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
1123 GEM_BUG_ON(!vma->obj->userfault_count);
1124
1125 node = &vma->mmo->vma_node;
1126 vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
1127 unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping,
1128 drm_vma_node_offset_addr(node) + vma_offset,
1129 vma->size,
1130 1);
1131
1132 i915_vma_unset_userfault(vma);
1133 if (!--vma->obj->userfault_count)
1134 list_del(&vma->obj->userfault_link);
1135 }
1136
1137 int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
1138 {
1139 int err;
1140
1141 GEM_BUG_ON(!i915_vma_is_pinned(vma));
1142
1143 /* Wait for the vma to be bound before we start! */
1144 err = i915_request_await_active(rq, &vma->active);
1145 if (err)
1146 return err;
1147
1148 return i915_active_add_request(&vma->active, rq);
1149 }
1150
1151 int i915_vma_move_to_active(struct i915_vma *vma,
1152 struct i915_request *rq,
1153 unsigned int flags)
1154 {
1155 struct drm_i915_gem_object *obj = vma->obj;
1156 int err;
1157
1158 assert_object_held(obj);
1159
1160 err = __i915_vma_move_to_active(vma, rq);
1161 if (unlikely(err))
1162 return err;
1163
1164 if (flags & EXEC_OBJECT_WRITE) {
1165 struct intel_frontbuffer *front;
1166
1167 front = __intel_frontbuffer_get(obj);
1168 if (unlikely(front)) {
1169 if (intel_frontbuffer_invalidate(front, ORIGIN_CS))
1170 i915_active_add_request(&front->write, rq);
1171 intel_frontbuffer_put(front);
1172 }
1173
1174 dma_resv_add_excl_fence(vma->resv, &rq->fence);
1175 obj->write_domain = I915_GEM_DOMAIN_RENDER;
1176 obj->read_domains = 0;
1177 } else {
1178 err = dma_resv_reserve_shared(vma->resv, 1);
1179 if (unlikely(err))
1180 return err;
1181
1182 dma_resv_add_shared_fence(vma->resv, &rq->fence);
1183 obj->write_domain = 0;
1184 }
1185 obj->read_domains |= I915_GEM_GPU_DOMAINS;
1186 obj->mm.dirty = true;
1187
1188 GEM_BUG_ON(!i915_vma_is_active(vma));
1189 return 0;
1190 }
1191
1192 int __i915_vma_unbind(struct i915_vma *vma)
1193 {
1194 int ret;
1195
1196 lockdep_assert_held(&vma->vm->mutex);
1197
1198 /*
1199 * First wait upon any activity as retiring the request may
1200 * have side-effects such as unpinning or even unbinding this vma.
1201 *
1202 * XXX Actually waiting under the vm->mutex is a hinderance and
1203 * should be pipelined wherever possible. In cases where that is
1204 * unavoidable, we should lift the wait to before the mutex.
1205 */
1206 ret = i915_vma_sync(vma);
1207 if (ret)
1208 return ret;
1209
1210 if (i915_vma_is_pinned(vma)) {
1211 vma_print_allocator(vma, "is pinned");
1212 return -EAGAIN;
1213 }
1214
1215 /*
1216 * After confirming that no one else is pinning this vma, wait for
1217 * any laggards who may have crept in during the wait (through
1218 * a residual pin skipping the vm->mutex) to complete.
1219 */
1220 ret = i915_vma_sync(vma);
1221 if (ret)
1222 return ret;
1223
1224 if (!drm_mm_node_allocated(&vma->node))
1225 return 0;
1226
1227 GEM_BUG_ON(i915_vma_is_pinned(vma));
1228 GEM_BUG_ON(i915_vma_is_active(vma));
1229
1230 if (i915_vma_is_map_and_fenceable(vma)) {
1231 /*
1232 * Check that we have flushed all writes through the GGTT
1233 * before the unbind, other due to non-strict nature of those
1234 * indirect writes they may end up referencing the GGTT PTE
1235 * after the unbind.
1236 */
1237 i915_vma_flush_writes(vma);
1238 GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
1239
1240 /* release the fence reg _after_ flushing */
1241 ret = i915_vma_revoke_fence(vma);
1242 if (ret)
1243 return ret;
1244
1245 /* Force a pagefault for domain tracking on next user access */
1246 i915_vma_revoke_mmap(vma);
1247
1248 __i915_vma_iounmap(vma);
1249 clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
1250 }
1251 GEM_BUG_ON(vma->fence);
1252 GEM_BUG_ON(i915_vma_has_userfault(vma));
1253
1254 if (likely(atomic_read(&vma->vm->open))) {
1255 trace_i915_vma_unbind(vma);
1256 vma->ops->unbind_vma(vma);
1257 }
1258 atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags);
1259
1260 i915_vma_detach(vma);
1261 vma_unbind_pages(vma);
1262
1263 drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
1264 return 0;
1265 }
1266
1267 int i915_vma_unbind(struct i915_vma *vma)
1268 {
1269 struct i915_address_space *vm = vma->vm;
1270 intel_wakeref_t wakeref = 0;
1271 int err;
1272
1273 if (!drm_mm_node_allocated(&vma->node))
1274 return 0;
1275
1276 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
1277 /* XXX not always required: nop_clear_range */
1278 wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
1279
1280 err = mutex_lock_interruptible(&vm->mutex);
1281 if (err)
1282 return err;
1283
1284 err = __i915_vma_unbind(vma);
1285 mutex_unlock(&vm->mutex);
1286
1287 if (wakeref)
1288 intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
1289
1290 return err;
1291 }
1292
1293 struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
1294 {
1295 i915_gem_object_make_unshrinkable(vma->obj);
1296 return vma;
1297 }
1298
1299 void i915_vma_make_shrinkable(struct i915_vma *vma)
1300 {
1301 i915_gem_object_make_shrinkable(vma->obj);
1302 }
1303
1304 void i915_vma_make_purgeable(struct i915_vma *vma)
1305 {
1306 i915_gem_object_make_purgeable(vma->obj);
1307 }
1308
1309 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1310 #include "selftests/i915_vma.c"
1311 #endif
1312
1313 static void i915_global_vma_shrink(void)
1314 {
1315 kmem_cache_shrink(global.slab_vmas);
1316 }
1317
1318 static void i915_global_vma_exit(void)
1319 {
1320 kmem_cache_destroy(global.slab_vmas);
1321 }
1322
1323 static struct i915_global_vma global = { {
1324 .shrink = i915_global_vma_shrink,
1325 .exit = i915_global_vma_exit,
1326 } };
1327
1328 int __init i915_global_vma_init(void)
1329 {
1330 global.slab_vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
1331 if (!global.slab_vmas)
1332 return -ENOMEM;
1333
1334 i915_global_register(&global.base);
1335 return 0;
1336 }
1337