i915_gem.c revision 1.12 1 /*
2 * Copyright 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric (at) anholt.net>
25 *
26 */
27
28 #ifdef __NetBSD__
29 #if 0 /* XXX uvmhist option? */
30 #include "opt_uvmhist.h"
31 #endif
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35
36 #include <x86/machdep.h> /* x86_select_freelist */
37
38 #include <uvm/uvm.h>
39 #include <uvm/uvm_extern.h>
40 #include <uvm/uvm_fault.h>
41 #include <uvm/uvm_page.h>
42 #include <uvm/uvm_pmap.h>
43 #include <uvm/uvm_prot.h>
44 #endif
45
46 #include <drm/drmP.h>
47 #include <drm/drm_vma_manager.h>
48 #include <drm/i915_drm.h>
49 #include "i915_drv.h"
50 #include "i915_trace.h"
51 #include "intel_drv.h"
52 #include <linux/shmem_fs.h>
53 #include <linux/slab.h>
54 #include <linux/swap.h>
55 #include <linux/pci.h>
56 #include <linux/dma-buf.h>
57 #include <linux/errno.h>
58 #include <linux/time.h>
59 #include <linux/err.h>
60 #include <asm/param.h>
61
62 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
63 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
64 bool force);
65 static __must_check int
66 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
67 bool readonly);
68
69 static void i915_gem_write_fence(struct drm_device *dev, int reg,
70 struct drm_i915_gem_object *obj);
71 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
72 struct drm_i915_fence_reg *fence,
73 bool enable);
74
75 static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
76 struct shrink_control *sc);
77 static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
78 struct shrink_control *sc);
79 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
80 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
81 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
82 static void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
83
84 static bool cpu_cache_is_coherent(struct drm_device *dev,
85 enum i915_cache_level level)
86 {
87 return HAS_LLC(dev) || level != I915_CACHE_NONE;
88 }
89
90 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
91 {
92 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
93 return true;
94
95 return obj->pin_display;
96 }
97
98 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
99 {
100 if (obj->tiling_mode)
101 i915_gem_release_mmap(obj);
102
103 /* As we do not have an associated fence register, we will force
104 * a tiling change if we ever need to acquire one.
105 */
106 obj->fence_dirty = false;
107 obj->fence_reg = I915_FENCE_REG_NONE;
108 }
109
110 /* some bookkeeping */
111 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
112 size_t size)
113 {
114 spin_lock(&dev_priv->mm.object_stat_lock);
115 dev_priv->mm.object_count++;
116 dev_priv->mm.object_memory += size;
117 spin_unlock(&dev_priv->mm.object_stat_lock);
118 }
119
120 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
121 size_t size)
122 {
123 spin_lock(&dev_priv->mm.object_stat_lock);
124 dev_priv->mm.object_count--;
125 dev_priv->mm.object_memory -= size;
126 spin_unlock(&dev_priv->mm.object_stat_lock);
127 }
128
129 static int
130 i915_gem_wait_for_error(struct i915_gpu_error *error)
131 {
132 int ret;
133
134 #define EXIT_COND (!i915_reset_in_progress(error) || \
135 i915_terminally_wedged(error))
136 if (EXIT_COND)
137 return 0;
138
139 /*
140 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
141 * userspace. If it takes that long something really bad is going on and
142 * we should simply try to bail out and fail as gracefully as possible.
143 */
144 ret = wait_event_interruptible_timeout(error->reset_queue,
145 EXIT_COND,
146 10*HZ);
147 if (ret == 0) {
148 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
149 return -EIO;
150 } else if (ret < 0) {
151 return ret;
152 }
153 #undef EXIT_COND
154
155 return 0;
156 }
157
158 int i915_mutex_lock_interruptible(struct drm_device *dev)
159 {
160 struct drm_i915_private *dev_priv = dev->dev_private;
161 int ret;
162
163 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
164 if (ret)
165 return ret;
166
167 ret = mutex_lock_interruptible(&dev->struct_mutex);
168 if (ret)
169 return ret;
170
171 WARN_ON(i915_verify_lists(dev));
172 return 0;
173 }
174
175 static inline bool
176 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
177 {
178 return i915_gem_obj_bound_any(obj) && !obj->active;
179 }
180
181 int
182 i915_gem_init_ioctl(struct drm_device *dev, void *data,
183 struct drm_file *file)
184 {
185 struct drm_i915_private *dev_priv = dev->dev_private;
186 struct drm_i915_gem_init *args = data;
187
188 if (drm_core_check_feature(dev, DRIVER_MODESET))
189 return -ENODEV;
190
191 if (args->gtt_start >= args->gtt_end ||
192 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
193 return -EINVAL;
194
195 /* GEM with user mode setting was never supported on ilk and later. */
196 if (INTEL_INFO(dev)->gen >= 5)
197 return -ENODEV;
198
199 mutex_lock(&dev->struct_mutex);
200 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
201 args->gtt_end);
202 dev_priv->gtt.mappable_end = args->gtt_end;
203 mutex_unlock(&dev->struct_mutex);
204
205 return 0;
206 }
207
208 int
209 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
210 struct drm_file *file)
211 {
212 struct drm_i915_private *dev_priv = dev->dev_private;
213 struct drm_i915_gem_get_aperture *args = data;
214 struct drm_i915_gem_object *obj;
215 size_t pinned;
216
217 pinned = 0;
218 mutex_lock(&dev->struct_mutex);
219 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
220 if (i915_gem_obj_is_pinned(obj))
221 pinned += i915_gem_obj_ggtt_size(obj);
222 mutex_unlock(&dev->struct_mutex);
223
224 args->aper_size = dev_priv->gtt.base.total;
225 args->aper_available_size = args->aper_size - pinned;
226
227 return 0;
228 }
229
230 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
231 {
232 drm_dma_handle_t *phys = obj->phys_handle;
233
234 if (!phys)
235 return;
236
237 if (obj->madv == I915_MADV_WILLNEED) {
238 #ifdef __NetBSD__
239 unsigned i;
240
241 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
242 struct pglist pages;
243 int error;
244
245 TAILQ_INIT(&pages);
246 error = uvm_obj_wirepages(obj->base.gemo_shm_uao,
247 i*PAGE_SIZE, (i+1)*PAGE_SIZE, &pages);
248 if (error)
249 continue;
250
251 struct vm_page *const vm_page = TAILQ_FIRST(&pages);
252 struct page *const page = container_of(vm_page,
253 struct page, p_vmp);
254 char *const dst = kmap_atomic(page);
255 (void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE);
256 kunmap_atomic(page);
257
258 drm_clflush_page(page);
259 vm_page->flags &= ~PG_CLEAN;
260 /* XXX mark page accessed */
261 uvm_obj_unwirepages(obj->base.gemo_shm_uao,
262 i*PAGE_SIZE, (i+1)*PAGE_SIZE);
263 }
264 #else
265 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
266 char *vaddr = phys->vaddr;
267 int i;
268
269 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
270 struct page *page = shmem_read_mapping_page(mapping, i);
271 if (!IS_ERR(page)) {
272 char *dst = kmap_atomic(page);
273 memcpy(dst, vaddr, PAGE_SIZE);
274 drm_clflush_virt_range(dst, PAGE_SIZE);
275 kunmap_atomic(dst);
276
277 set_page_dirty(page);
278 mark_page_accessed(page);
279 page_cache_release(page);
280 }
281 vaddr += PAGE_SIZE;
282 }
283 #endif
284 i915_gem_chipset_flush(obj->base.dev);
285 }
286
287 #ifndef __NetBSD__
288 #ifdef CONFIG_X86
289 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
290 #endif
291 #endif
292 drm_pci_free(obj->base.dev, phys);
293 obj->phys_handle = NULL;
294 }
295
296 int
297 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
298 int align)
299 {
300 drm_dma_handle_t *phys;
301 #ifndef __NetBSD__
302 struct address_space *mapping;
303 #endif
304 char *vaddr;
305 int i;
306
307 if (obj->phys_handle) {
308 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
309 return -EBUSY;
310
311 return 0;
312 }
313
314 if (obj->madv != I915_MADV_WILLNEED)
315 return -EFAULT;
316
317 if (obj->base.filp == NULL)
318 return -EINVAL;
319
320 /* create a new object */
321 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
322 if (!phys)
323 return -ENOMEM;
324
325 vaddr = phys->vaddr;
326 #ifdef __NetBSD__
327 #ifdef CONFIG_X86
328 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
329 #endif
330 mapping = file_inode(obj->base.filp)->i_mapping;
331 #endif
332 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
333 struct page *page;
334 char *src;
335
336 #ifdef __NetBSD__
337 struct pglist pages;
338 int ret;
339 /* XXX errno NetBSD->Linux */
340 ret = -uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
341 (i+1)*PAGE_SIZE, &pages);
342 if (ret) {
343 drm_pci_free(obj->base.dev, phys);
344 return ret;
345 }
346 KASSERT(!TAILQ_EMPTY(&pages));
347 page = TAILQ_FIRST(&pages);
348 #else
349 page = shmem_read_mapping_page(mapping, i);
350 if (IS_ERR(page)) {
351 #ifdef CONFIG_X86
352 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
353 #endif
354 drm_pci_free(obj->base.dev, phys);
355 return PTR_ERR(page);
356 }
357 #endif /* defined(__NetBSD__) */
358
359 src = kmap_atomic(page);
360 memcpy(vaddr, src, PAGE_SIZE);
361 kunmap_atomic(src);
362
363 mark_page_accessed(page);
364 page_cache_release(page);
365
366 vaddr += PAGE_SIZE;
367 }
368
369 obj->phys_handle = phys;
370 return 0;
371 }
372
373 static int
374 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
375 struct drm_i915_gem_pwrite *args,
376 struct drm_file *file_priv)
377 {
378 struct drm_device *dev = obj->base.dev;
379 void *vaddr = obj->phys_handle->vaddr + args->offset;
380 char __user *user_data = to_user_ptr(args->data_ptr);
381
382 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
383 unsigned long unwritten;
384
385 /* The physical object once assigned is fixed for the lifetime
386 * of the obj, so we can safely drop the lock and continue
387 * to access vaddr.
388 */
389 mutex_unlock(&dev->struct_mutex);
390 unwritten = copy_from_user(vaddr, user_data, args->size);
391 mutex_lock(&dev->struct_mutex);
392 if (unwritten)
393 return -EFAULT;
394 }
395
396 i915_gem_chipset_flush(dev);
397 return 0;
398 }
399
400 void *i915_gem_object_alloc(struct drm_device *dev)
401 {
402 struct drm_i915_private *dev_priv = dev->dev_private;
403 return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL);
404 }
405
406 void i915_gem_object_free(struct drm_i915_gem_object *obj)
407 {
408 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
409 kmem_cache_free(dev_priv->slab, obj);
410 }
411
412 static int
413 i915_gem_create(struct drm_file *file,
414 struct drm_device *dev,
415 uint64_t size,
416 uint32_t *handle_p)
417 {
418 struct drm_i915_gem_object *obj;
419 int ret;
420 u32 handle;
421
422 size = roundup(size, PAGE_SIZE);
423 if (size == 0)
424 return -EINVAL;
425
426 /* Allocate the new object */
427 obj = i915_gem_alloc_object(dev, size);
428 if (obj == NULL)
429 return -ENOMEM;
430
431 ret = drm_gem_handle_create(file, &obj->base, &handle);
432 /* drop reference from allocate - handle holds it now */
433 drm_gem_object_unreference_unlocked(&obj->base);
434 if (ret)
435 return ret;
436
437 *handle_p = handle;
438 return 0;
439 }
440
441 int
442 i915_gem_dumb_create(struct drm_file *file,
443 struct drm_device *dev,
444 struct drm_mode_create_dumb *args)
445 {
446 /* have to work out size/pitch and return them */
447 #ifdef __NetBSD__ /* ALIGN already means something. */
448 args->pitch = round_up(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
449 #else
450 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
451 #endif
452 args->size = args->pitch * args->height;
453 return i915_gem_create(file, dev,
454 args->size, &args->handle);
455 }
456
457 /**
458 * Creates a new mm object and returns a handle to it.
459 */
460 int
461 i915_gem_create_ioctl(struct drm_device *dev, void *data,
462 struct drm_file *file)
463 {
464 struct drm_i915_gem_create *args = data;
465
466 return i915_gem_create(file, dev,
467 args->size, &args->handle);
468 }
469
470 static inline int
471 __copy_to_user_swizzled(char __user *cpu_vaddr,
472 const char *gpu_vaddr, int gpu_offset,
473 int length)
474 {
475 int ret, cpu_offset = 0;
476
477 while (length > 0) {
478 #ifdef __NetBSD__
479 int cacheline_end = round_up(gpu_offset + 1, 64);
480 #else
481 int cacheline_end = ALIGN(gpu_offset + 1, 64);
482 #endif
483 int this_length = min(cacheline_end - gpu_offset, length);
484 int swizzled_gpu_offset = gpu_offset ^ 64;
485
486 ret = __copy_to_user(cpu_vaddr + cpu_offset,
487 gpu_vaddr + swizzled_gpu_offset,
488 this_length);
489 if (ret)
490 return ret + length;
491
492 cpu_offset += this_length;
493 gpu_offset += this_length;
494 length -= this_length;
495 }
496
497 return 0;
498 }
499
500 static inline int
501 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
502 const char __user *cpu_vaddr,
503 int length)
504 {
505 int ret, cpu_offset = 0;
506
507 while (length > 0) {
508 #ifdef __NetBSD__
509 int cacheline_end = round_up(gpu_offset + 1, 64);
510 #else
511 int cacheline_end = ALIGN(gpu_offset + 1, 64);
512 #endif
513 int this_length = min(cacheline_end - gpu_offset, length);
514 int swizzled_gpu_offset = gpu_offset ^ 64;
515
516 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
517 cpu_vaddr + cpu_offset,
518 this_length);
519 if (ret)
520 return ret + length;
521
522 cpu_offset += this_length;
523 gpu_offset += this_length;
524 length -= this_length;
525 }
526
527 return 0;
528 }
529
530 /*
531 * Pins the specified object's pages and synchronizes the object with
532 * GPU accesses. Sets needs_clflush to non-zero if the caller should
533 * flush the object from the CPU cache.
534 */
535 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
536 int *needs_clflush)
537 {
538 int ret;
539
540 *needs_clflush = 0;
541
542 if (!obj->base.filp)
543 return -EINVAL;
544
545 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
546 /* If we're not in the cpu read domain, set ourself into the gtt
547 * read domain and manually flush cachelines (if required). This
548 * optimizes for the case when the gpu will dirty the data
549 * anyway again before the next pread happens. */
550 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
551 obj->cache_level);
552 ret = i915_gem_object_wait_rendering(obj, true);
553 if (ret)
554 return ret;
555 }
556
557 ret = i915_gem_object_get_pages(obj);
558 if (ret)
559 return ret;
560
561 i915_gem_object_pin_pages(obj);
562
563 return ret;
564 }
565
566 /* Per-page copy function for the shmem pread fastpath.
567 * Flushes invalid cachelines before reading the target if
568 * needs_clflush is set. */
569 static int
570 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
571 char __user *user_data,
572 bool page_do_bit17_swizzling, bool needs_clflush)
573 {
574 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
575 return -EFAULT;
576 #else
577 char *vaddr;
578 int ret;
579
580 if (unlikely(page_do_bit17_swizzling))
581 return -EINVAL;
582
583 vaddr = kmap_atomic(page);
584 if (needs_clflush)
585 drm_clflush_virt_range(vaddr + shmem_page_offset,
586 page_length);
587 ret = __copy_to_user_inatomic(user_data,
588 vaddr + shmem_page_offset,
589 page_length);
590 kunmap_atomic(vaddr);
591
592 return ret ? -EFAULT : 0;
593 #endif
594 }
595
596 static void
597 shmem_clflush_swizzled_range(char *addr, unsigned long length,
598 bool swizzled)
599 {
600 if (unlikely(swizzled)) {
601 unsigned long start = (unsigned long) addr;
602 unsigned long end = (unsigned long) addr + length;
603
604 /* For swizzling simply ensure that we always flush both
605 * channels. Lame, but simple and it works. Swizzled
606 * pwrite/pread is far from a hotpath - current userspace
607 * doesn't use it at all. */
608 start = round_down(start, 128);
609 end = round_up(end, 128);
610
611 drm_clflush_virt_range((void *)start, end - start);
612 } else {
613 drm_clflush_virt_range(addr, length);
614 }
615
616 }
617
618 /* Only difference to the fast-path function is that this can handle bit17
619 * and uses non-atomic copy and kmap functions. */
620 static int
621 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
622 char __user *user_data,
623 bool page_do_bit17_swizzling, bool needs_clflush)
624 {
625 char *vaddr;
626 int ret;
627
628 vaddr = kmap(page);
629 if (needs_clflush)
630 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
631 page_length,
632 page_do_bit17_swizzling);
633
634 if (page_do_bit17_swizzling)
635 ret = __copy_to_user_swizzled(user_data,
636 vaddr, shmem_page_offset,
637 page_length);
638 else
639 ret = __copy_to_user(user_data,
640 vaddr + shmem_page_offset,
641 page_length);
642 kunmap(page);
643
644 return ret ? - EFAULT : 0;
645 }
646
647 static int
648 i915_gem_shmem_pread(struct drm_device *dev,
649 struct drm_i915_gem_object *obj,
650 struct drm_i915_gem_pread *args,
651 struct drm_file *file)
652 {
653 char __user *user_data;
654 ssize_t remain;
655 loff_t offset;
656 int shmem_page_offset, page_length, ret = 0;
657 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
658 #ifndef __NetBSD__ /* XXX */
659 int prefaulted = 0;
660 #endif
661 int needs_clflush = 0;
662 #ifndef __NetBSD__
663 struct sg_page_iter sg_iter;
664 #endif
665
666 user_data = to_user_ptr(args->data_ptr);
667 remain = args->size;
668
669 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
670
671 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
672 if (ret)
673 return ret;
674
675 offset = args->offset;
676
677 #ifdef __NetBSD__
678 /*
679 * XXX This is a big #ifdef with a lot of duplicated code, but
680 * factoring out the loop head -- which is all that
681 * substantially differs -- is probably more trouble than it's
682 * worth at the moment.
683 */
684 while (0 < remain) {
685 /* Get the next page. */
686 shmem_page_offset = offset_in_page(offset);
687 KASSERT(shmem_page_offset < PAGE_SIZE);
688 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
689 struct page *const page = i915_gem_object_get_page(obj,
690 atop(offset));
691
692 /* Decide whether to swizzle bit 17. */
693 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
694 (page_to_phys(page) & (1 << 17)) != 0;
695
696 /* Try the fast path. */
697 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
698 user_data, page_do_bit17_swizzling, needs_clflush);
699 if (ret == 0)
700 goto next_page;
701
702 /* Fast path failed. Try the slow path. */
703 mutex_unlock(&dev->struct_mutex);
704 /* XXX prefault */
705 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
706 user_data, page_do_bit17_swizzling, needs_clflush);
707 mutex_lock(&dev->struct_mutex);
708 if (ret)
709 goto out;
710
711 next_page: KASSERT(page_length <= remain);
712 remain -= page_length;
713 user_data += page_length;
714 offset += page_length;
715 }
716 #else
717 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
718 offset >> PAGE_SHIFT) {
719 struct page *page = sg_page_iter_page(&sg_iter);
720
721 if (remain <= 0)
722 break;
723
724 /* Operation in this page
725 *
726 * shmem_page_offset = offset within page in shmem file
727 * page_length = bytes to copy for this page
728 */
729 shmem_page_offset = offset_in_page(offset);
730 page_length = remain;
731 if ((shmem_page_offset + page_length) > PAGE_SIZE)
732 page_length = PAGE_SIZE - shmem_page_offset;
733
734 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
735 (page_to_phys(page) & (1 << 17)) != 0;
736
737 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
738 user_data, page_do_bit17_swizzling,
739 needs_clflush);
740 if (ret == 0)
741 goto next_page;
742
743 mutex_unlock(&dev->struct_mutex);
744
745 if (likely(!i915.prefault_disable) && !prefaulted) {
746 ret = fault_in_multipages_writeable(user_data, remain);
747 /* Userspace is tricking us, but we've already clobbered
748 * its pages with the prefault and promised to write the
749 * data up to the first fault. Hence ignore any errors
750 * and just continue. */
751 (void)ret;
752 prefaulted = 1;
753 }
754
755 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
756 user_data, page_do_bit17_swizzling,
757 needs_clflush);
758
759 mutex_lock(&dev->struct_mutex);
760
761 if (ret)
762 goto out;
763
764 next_page:
765 remain -= page_length;
766 user_data += page_length;
767 offset += page_length;
768 }
769 #endif
770
771 out:
772 i915_gem_object_unpin_pages(obj);
773
774 return ret;
775 }
776
777 /**
778 * Reads data from the object referenced by handle.
779 *
780 * On error, the contents of *data are undefined.
781 */
782 int
783 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
784 struct drm_file *file)
785 {
786 struct drm_i915_gem_pread *args = data;
787 struct drm_i915_gem_object *obj;
788 int ret = 0;
789
790 if (args->size == 0)
791 return 0;
792
793 if (!access_ok(VERIFY_WRITE,
794 to_user_ptr(args->data_ptr),
795 args->size))
796 return -EFAULT;
797
798 ret = i915_mutex_lock_interruptible(dev);
799 if (ret)
800 return ret;
801
802 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
803 if (&obj->base == NULL) {
804 ret = -ENOENT;
805 goto unlock;
806 }
807
808 /* Bounds check source. */
809 if (args->offset > obj->base.size ||
810 args->size > obj->base.size - args->offset) {
811 ret = -EINVAL;
812 goto out;
813 }
814
815 #ifndef __NetBSD__ /* XXX drm prime */
816 /* prime objects have no backing filp to GEM pread/pwrite
817 * pages from.
818 */
819 if (!obj->base.filp) {
820 ret = -EINVAL;
821 goto out;
822 }
823 #endif
824
825 trace_i915_gem_object_pread(obj, args->offset, args->size);
826
827 ret = i915_gem_shmem_pread(dev, obj, args, file);
828
829 out:
830 drm_gem_object_unreference(&obj->base);
831 unlock:
832 mutex_unlock(&dev->struct_mutex);
833 return ret;
834 }
835
836 /* This is the fast write path which cannot handle
837 * page faults in the source data
838 */
839
840 static inline int
841 fast_user_write(struct io_mapping *mapping,
842 loff_t page_base, int page_offset,
843 char __user *user_data,
844 int length)
845 {
846 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
847 return -EFAULT;
848 #else
849 void __iomem *vaddr_atomic;
850 void *vaddr;
851 unsigned long unwritten;
852
853 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
854 /* We can use the cpu mem copy function because this is X86. */
855 vaddr = (void __force*)vaddr_atomic + page_offset;
856 unwritten = __copy_from_user_inatomic_nocache(vaddr,
857 user_data, length);
858 io_mapping_unmap_atomic(vaddr_atomic);
859 return unwritten;
860 #endif
861 }
862
863 /**
864 * This is the fast pwrite path, where we copy the data directly from the
865 * user into the GTT, uncached.
866 */
867 static int
868 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
869 struct drm_i915_gem_object *obj,
870 struct drm_i915_gem_pwrite *args,
871 struct drm_file *file)
872 {
873 struct drm_i915_private *dev_priv = dev->dev_private;
874 ssize_t remain;
875 loff_t offset, page_base;
876 char __user *user_data;
877 int page_offset, page_length, ret;
878
879 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
880 if (ret)
881 goto out;
882
883 ret = i915_gem_object_set_to_gtt_domain(obj, true);
884 if (ret)
885 goto out_unpin;
886
887 ret = i915_gem_object_put_fence(obj);
888 if (ret)
889 goto out_unpin;
890
891 user_data = to_user_ptr(args->data_ptr);
892 remain = args->size;
893
894 offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
895
896 while (remain > 0) {
897 /* Operation in this page
898 *
899 * page_base = page offset within aperture
900 * page_offset = offset within page
901 * page_length = bytes to copy for this page
902 */
903 page_base = offset & PAGE_MASK;
904 page_offset = offset_in_page(offset);
905 page_length = remain;
906 if ((page_offset + remain) > PAGE_SIZE)
907 page_length = PAGE_SIZE - page_offset;
908
909 /* If we get a fault while copying data, then (presumably) our
910 * source page isn't available. Return the error and we'll
911 * retry in the slow path.
912 */
913 if (fast_user_write(dev_priv->gtt.mappable, page_base,
914 page_offset, user_data, page_length)) {
915 ret = -EFAULT;
916 goto out_unpin;
917 }
918
919 remain -= page_length;
920 user_data += page_length;
921 offset += page_length;
922 }
923
924 out_unpin:
925 i915_gem_object_ggtt_unpin(obj);
926 out:
927 return ret;
928 }
929
930 /* Per-page copy function for the shmem pwrite fastpath.
931 * Flushes invalid cachelines before writing to the target if
932 * needs_clflush_before is set and flushes out any written cachelines after
933 * writing if needs_clflush is set. */
934 static int
935 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
936 char __user *user_data,
937 bool page_do_bit17_swizzling,
938 bool needs_clflush_before,
939 bool needs_clflush_after)
940 {
941 #ifdef __NetBSD__
942 return -EFAULT;
943 #else
944 char *vaddr;
945 int ret;
946
947 if (unlikely(page_do_bit17_swizzling))
948 return -EINVAL;
949
950 vaddr = kmap_atomic(page);
951 if (needs_clflush_before)
952 drm_clflush_virt_range(vaddr + shmem_page_offset,
953 page_length);
954 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
955 user_data, page_length);
956 if (needs_clflush_after)
957 drm_clflush_virt_range(vaddr + shmem_page_offset,
958 page_length);
959 kunmap_atomic(vaddr);
960
961 return ret ? -EFAULT : 0;
962 #endif
963 }
964
965 /* Only difference to the fast-path function is that this can handle bit17
966 * and uses non-atomic copy and kmap functions. */
967 static int
968 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
969 char __user *user_data,
970 bool page_do_bit17_swizzling,
971 bool needs_clflush_before,
972 bool needs_clflush_after)
973 {
974 char *vaddr;
975 int ret;
976
977 vaddr = kmap(page);
978 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
979 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
980 page_length,
981 page_do_bit17_swizzling);
982 if (page_do_bit17_swizzling)
983 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
984 user_data,
985 page_length);
986 else
987 ret = __copy_from_user(vaddr + shmem_page_offset,
988 user_data,
989 page_length);
990 if (needs_clflush_after)
991 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
992 page_length,
993 page_do_bit17_swizzling);
994 kunmap(page);
995
996 return ret ? -EFAULT : 0;
997 }
998
999 static int
1000 i915_gem_shmem_pwrite(struct drm_device *dev,
1001 struct drm_i915_gem_object *obj,
1002 struct drm_i915_gem_pwrite *args,
1003 struct drm_file *file)
1004 {
1005 ssize_t remain;
1006 loff_t offset;
1007 char __user *user_data;
1008 int shmem_page_offset, page_length, ret = 0;
1009 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1010 int hit_slowpath = 0;
1011 int needs_clflush_after = 0;
1012 int needs_clflush_before = 0;
1013 #ifndef __NetBSD__
1014 struct sg_page_iter sg_iter;
1015 #endif
1016
1017 user_data = to_user_ptr(args->data_ptr);
1018 remain = args->size;
1019
1020 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1021
1022 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1023 /* If we're not in the cpu write domain, set ourself into the gtt
1024 * write domain and manually flush cachelines (if required). This
1025 * optimizes for the case when the gpu will use the data
1026 * right away and we therefore have to clflush anyway. */
1027 needs_clflush_after = cpu_write_needs_clflush(obj);
1028 ret = i915_gem_object_wait_rendering(obj, false);
1029 if (ret)
1030 return ret;
1031 }
1032 /* Same trick applies to invalidate partially written cachelines read
1033 * before writing. */
1034 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1035 needs_clflush_before =
1036 !cpu_cache_is_coherent(dev, obj->cache_level);
1037
1038 ret = i915_gem_object_get_pages(obj);
1039 if (ret)
1040 return ret;
1041
1042 i915_gem_object_pin_pages(obj);
1043
1044 offset = args->offset;
1045 obj->dirty = 1;
1046
1047 #ifdef __NetBSD__
1048 while (0 < remain) {
1049 /* Get the next page. */
1050 shmem_page_offset = offset_in_page(offset);
1051 KASSERT(shmem_page_offset < PAGE_SIZE);
1052 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
1053 struct page *const page = i915_gem_object_get_page(obj,
1054 atop(offset));
1055
1056 /* Decide whether to flush the cache or swizzle bit 17. */
1057 const bool partial_cacheline_write = needs_clflush_before &&
1058 ((shmem_page_offset | page_length)
1059 & (cpu_info_primary.ci_cflush_lsize - 1));
1060 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1061 (page_to_phys(page) & (1 << 17)) != 0;
1062
1063 /* Try the fast path. */
1064 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1065 user_data, page_do_bit17_swizzling,
1066 partial_cacheline_write, needs_clflush_after);
1067 if (ret == 0)
1068 goto next_page;
1069
1070 /* Fast path failed. Try the slow path. */
1071 hit_slowpath = 1;
1072 mutex_unlock(&dev->struct_mutex);
1073 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1074 user_data, page_do_bit17_swizzling,
1075 partial_cacheline_write, needs_clflush_after);
1076 mutex_lock(&dev->struct_mutex);
1077 if (ret)
1078 goto out;
1079
1080 next_page: KASSERT(page_length <= remain);
1081 remain -= page_length;
1082 user_data += page_length;
1083 offset += page_length;
1084 }
1085 #else
1086 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1087 offset >> PAGE_SHIFT) {
1088 struct page *page = sg_page_iter_page(&sg_iter);
1089 int partial_cacheline_write;
1090
1091 if (remain <= 0)
1092 break;
1093
1094 /* Operation in this page
1095 *
1096 * shmem_page_offset = offset within page in shmem file
1097 * page_length = bytes to copy for this page
1098 */
1099 shmem_page_offset = offset_in_page(offset);
1100
1101 page_length = remain;
1102 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1103 page_length = PAGE_SIZE - shmem_page_offset;
1104
1105 /* If we don't overwrite a cacheline completely we need to be
1106 * careful to have up-to-date data by first clflushing. Don't
1107 * overcomplicate things and flush the entire patch. */
1108 partial_cacheline_write = needs_clflush_before &&
1109 ((shmem_page_offset | page_length)
1110 & (boot_cpu_data.x86_clflush_size - 1));
1111
1112 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1113 (page_to_phys(page) & (1 << 17)) != 0;
1114
1115 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1116 user_data, page_do_bit17_swizzling,
1117 partial_cacheline_write,
1118 needs_clflush_after);
1119 if (ret == 0)
1120 goto next_page;
1121
1122 hit_slowpath = 1;
1123 mutex_unlock(&dev->struct_mutex);
1124 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1125 user_data, page_do_bit17_swizzling,
1126 partial_cacheline_write,
1127 needs_clflush_after);
1128
1129 mutex_lock(&dev->struct_mutex);
1130
1131 if (ret)
1132 goto out;
1133
1134 next_page:
1135 remain -= page_length;
1136 user_data += page_length;
1137 offset += page_length;
1138 }
1139 #endif
1140
1141 out:
1142 i915_gem_object_unpin_pages(obj);
1143
1144 if (hit_slowpath) {
1145 /*
1146 * Fixup: Flush cpu caches in case we didn't flush the dirty
1147 * cachelines in-line while writing and the object moved
1148 * out of the cpu write domain while we've dropped the lock.
1149 */
1150 if (!needs_clflush_after &&
1151 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1152 if (i915_gem_clflush_object(obj, obj->pin_display))
1153 i915_gem_chipset_flush(dev);
1154 }
1155 }
1156
1157 if (needs_clflush_after)
1158 i915_gem_chipset_flush(dev);
1159
1160 return ret;
1161 }
1162
1163 /**
1164 * Writes data to the object referenced by handle.
1165 *
1166 * On error, the contents of the buffer that were to be modified are undefined.
1167 */
1168 int
1169 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1170 struct drm_file *file)
1171 {
1172 struct drm_i915_gem_pwrite *args = data;
1173 struct drm_i915_gem_object *obj;
1174 int ret;
1175
1176 if (args->size == 0)
1177 return 0;
1178
1179 if (!access_ok(VERIFY_READ,
1180 to_user_ptr(args->data_ptr),
1181 args->size))
1182 return -EFAULT;
1183
1184 #ifndef __NetBSD__ /* XXX prefault */
1185 if (likely(!i915.prefault_disable)) {
1186 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1187 args->size);
1188 if (ret)
1189 return -EFAULT;
1190 }
1191 #endif
1192
1193 ret = i915_mutex_lock_interruptible(dev);
1194 if (ret)
1195 return ret;
1196
1197 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1198 if (&obj->base == NULL) {
1199 ret = -ENOENT;
1200 goto unlock;
1201 }
1202
1203 /* Bounds check destination. */
1204 if (args->offset > obj->base.size ||
1205 args->size > obj->base.size - args->offset) {
1206 ret = -EINVAL;
1207 goto out;
1208 }
1209
1210 #ifndef __NetBSD__ /* XXX drm prime */
1211 /* prime objects have no backing filp to GEM pread/pwrite
1212 * pages from.
1213 */
1214 if (!obj->base.filp) {
1215 ret = -EINVAL;
1216 goto out;
1217 }
1218 #endif
1219
1220 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1221
1222 ret = -EFAULT;
1223 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1224 * it would end up going through the fenced access, and we'll get
1225 * different detiling behavior between reading and writing.
1226 * pread/pwrite currently are reading and writing from the CPU
1227 * perspective, requiring manual detiling by the client.
1228 */
1229 if (obj->phys_handle) {
1230 ret = i915_gem_phys_pwrite(obj, args, file);
1231 goto out;
1232 }
1233
1234 if (obj->tiling_mode == I915_TILING_NONE &&
1235 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1236 cpu_write_needs_clflush(obj)) {
1237 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1238 /* Note that the gtt paths might fail with non-page-backed user
1239 * pointers (e.g. gtt mappings when moving data between
1240 * textures). Fallback to the shmem path in that case. */
1241 }
1242
1243 if (ret == -EFAULT || ret == -ENOSPC)
1244 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1245
1246 out:
1247 drm_gem_object_unreference(&obj->base);
1248 unlock:
1249 mutex_unlock(&dev->struct_mutex);
1250 return ret;
1251 }
1252
1253 int
1254 i915_gem_check_wedge(struct i915_gpu_error *error,
1255 bool interruptible)
1256 {
1257 if (i915_reset_in_progress(error)) {
1258 /* Non-interruptible callers can't handle -EAGAIN, hence return
1259 * -EIO unconditionally for these. */
1260 if (!interruptible)
1261 return -EIO;
1262
1263 /* Recovery complete, but the reset failed ... */
1264 if (i915_terminally_wedged(error))
1265 return -EIO;
1266
1267 return -EAGAIN;
1268 }
1269
1270 return 0;
1271 }
1272
1273 /*
1274 * Compare seqno against outstanding lazy request. Emit a request if they are
1275 * equal.
1276 */
1277 static int
1278 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1279 {
1280 int ret;
1281
1282 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1283
1284 ret = 0;
1285 if (seqno == ring->outstanding_lazy_seqno)
1286 ret = i915_add_request(ring, NULL);
1287
1288 return ret;
1289 }
1290
1291 static void fake_irq(unsigned long data)
1292 {
1293 wake_up_process((struct task_struct *)data);
1294 }
1295
1296 static bool missed_irq(struct drm_i915_private *dev_priv,
1297 struct intel_ring_buffer *ring)
1298 {
1299 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1300 }
1301
1302 static bool can_wait_boost(struct drm_i915_file_private *file_priv)
1303 {
1304 if (file_priv == NULL)
1305 return true;
1306
1307 return !atomic_xchg(&file_priv->rps_wait_boost, true);
1308 }
1309
1310 /**
1311 * __wait_seqno - wait until execution of seqno has finished
1312 * @ring: the ring expected to report seqno
1313 * @seqno: duh!
1314 * @reset_counter: reset sequence associated with the given seqno
1315 * @interruptible: do an interruptible wait (normally yes)
1316 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1317 *
1318 * Note: It is of utmost importance that the passed in seqno and reset_counter
1319 * values have been read by the caller in an smp safe manner. Where read-side
1320 * locks are involved, it is sufficient to read the reset_counter before
1321 * unlocking the lock that protects the seqno. For lockless tricks, the
1322 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1323 * inserted.
1324 *
1325 * Returns 0 if the seqno was found within the alloted time. Else returns the
1326 * errno with remaining time filled in timeout argument.
1327 */
1328 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1329 unsigned reset_counter,
1330 bool interruptible,
1331 struct timespec *timeout,
1332 struct drm_i915_file_private *file_priv)
1333 {
1334 struct drm_device *dev = ring->dev;
1335 struct drm_i915_private *dev_priv = dev->dev_private;
1336 const bool irq_test_in_progress =
1337 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1338 struct timespec before, now;
1339 DEFINE_WAIT(wait);
1340 unsigned long timeout_expire;
1341 int ret;
1342
1343 WARN(dev_priv->pm.irqs_disabled, "IRQs disabled\n");
1344
1345 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1346 return 0;
1347
1348 timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
1349
1350 if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1351 gen6_rps_boost(dev_priv);
1352 if (file_priv)
1353 mod_delayed_work(dev_priv->wq,
1354 &file_priv->mm.idle_work,
1355 msecs_to_jiffies(100));
1356 }
1357
1358 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1359 return -ENODEV;
1360
1361 /* Record current time in case interrupted by signal, or wedged */
1362 trace_i915_gem_request_wait_begin(ring, seqno);
1363 getrawmonotonic(&before);
1364 for (;;) {
1365 struct timer_list timer;
1366
1367 prepare_to_wait(&ring->irq_queue, &wait,
1368 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
1369
1370 /* We need to check whether any gpu reset happened in between
1371 * the caller grabbing the seqno and now ... */
1372 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1373 /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1374 * is truely gone. */
1375 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1376 if (ret == 0)
1377 ret = -EAGAIN;
1378 break;
1379 }
1380
1381 if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
1382 ret = 0;
1383 break;
1384 }
1385
1386 if (interruptible && signal_pending(current)) {
1387 ret = -ERESTARTSYS;
1388 break;
1389 }
1390
1391 if (timeout && time_after_eq(jiffies, timeout_expire)) {
1392 ret = -ETIME;
1393 break;
1394 }
1395
1396 timer.function = NULL;
1397 if (timeout || missed_irq(dev_priv, ring)) {
1398 unsigned long expire;
1399
1400 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
1401 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1402 mod_timer(&timer, expire);
1403 }
1404
1405 io_schedule();
1406
1407 if (timer.function) {
1408 del_singleshot_timer_sync(&timer);
1409 destroy_timer_on_stack(&timer);
1410 }
1411 }
1412 getrawmonotonic(&now);
1413 trace_i915_gem_request_wait_end(ring, seqno);
1414
1415 if (!irq_test_in_progress)
1416 ring->irq_put(ring);
1417
1418 finish_wait(&ring->irq_queue, &wait);
1419
1420 if (timeout) {
1421 struct timespec sleep_time = timespec_sub(now, before);
1422 *timeout = timespec_sub(*timeout, sleep_time);
1423 if (!timespec_valid(timeout)) /* i.e. negative time remains */
1424 set_normalized_timespec(timeout, 0, 0);
1425 }
1426
1427 return ret;
1428 }
1429
1430 /**
1431 * Waits for a sequence number to be signaled, and cleans up the
1432 * request and object lists appropriately for that event.
1433 */
1434 int
1435 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1436 {
1437 struct drm_device *dev = ring->dev;
1438 struct drm_i915_private *dev_priv = dev->dev_private;
1439 bool interruptible = dev_priv->mm.interruptible;
1440 int ret;
1441
1442 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1443 BUG_ON(seqno == 0);
1444
1445 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1446 if (ret)
1447 return ret;
1448
1449 ret = i915_gem_check_olr(ring, seqno);
1450 if (ret)
1451 return ret;
1452
1453 return __wait_seqno(ring, seqno,
1454 atomic_read(&dev_priv->gpu_error.reset_counter),
1455 interruptible, NULL, NULL);
1456 }
1457
1458 static int
1459 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
1460 struct intel_ring_buffer *ring)
1461 {
1462 i915_gem_retire_requests_ring(ring);
1463
1464 /* Manually manage the write flush as we may have not yet
1465 * retired the buffer.
1466 *
1467 * Note that the last_write_seqno is always the earlier of
1468 * the two (read/write) seqno, so if we haved successfully waited,
1469 * we know we have passed the last write.
1470 */
1471 obj->last_write_seqno = 0;
1472 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1473
1474 return 0;
1475 }
1476
1477 /**
1478 * Ensures that all rendering to the object has completed and the object is
1479 * safe to unbind from the GTT or access from the CPU.
1480 */
1481 static __must_check int
1482 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1483 bool readonly)
1484 {
1485 struct intel_ring_buffer *ring = obj->ring;
1486 u32 seqno;
1487 int ret;
1488
1489 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1490 if (seqno == 0)
1491 return 0;
1492
1493 ret = i915_wait_seqno(ring, seqno);
1494 if (ret)
1495 return ret;
1496
1497 return i915_gem_object_wait_rendering__tail(obj, ring);
1498 }
1499
1500 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1501 * as the object state may change during this call.
1502 */
1503 static __must_check int
1504 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1505 struct drm_i915_file_private *file_priv,
1506 bool readonly)
1507 {
1508 struct drm_device *dev = obj->base.dev;
1509 struct drm_i915_private *dev_priv = dev->dev_private;
1510 struct intel_ring_buffer *ring = obj->ring;
1511 unsigned reset_counter;
1512 u32 seqno;
1513 int ret;
1514
1515 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1516 BUG_ON(!dev_priv->mm.interruptible);
1517
1518 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1519 if (seqno == 0)
1520 return 0;
1521
1522 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1523 if (ret)
1524 return ret;
1525
1526 ret = i915_gem_check_olr(ring, seqno);
1527 if (ret)
1528 return ret;
1529
1530 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1531 mutex_unlock(&dev->struct_mutex);
1532 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
1533 mutex_lock(&dev->struct_mutex);
1534 if (ret)
1535 return ret;
1536
1537 return i915_gem_object_wait_rendering__tail(obj, ring);
1538 }
1539
1540 /**
1541 * Called when user space prepares to use an object with the CPU, either
1542 * through the mmap ioctl's mapping or a GTT mapping.
1543 */
1544 int
1545 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1546 struct drm_file *file)
1547 {
1548 struct drm_i915_gem_set_domain *args = data;
1549 struct drm_i915_gem_object *obj;
1550 uint32_t read_domains = args->read_domains;
1551 uint32_t write_domain = args->write_domain;
1552 int ret;
1553
1554 /* Only handle setting domains to types used by the CPU. */
1555 if (write_domain & I915_GEM_GPU_DOMAINS)
1556 return -EINVAL;
1557
1558 if (read_domains & I915_GEM_GPU_DOMAINS)
1559 return -EINVAL;
1560
1561 /* Having something in the write domain implies it's in the read
1562 * domain, and only that read domain. Enforce that in the request.
1563 */
1564 if (write_domain != 0 && read_domains != write_domain)
1565 return -EINVAL;
1566
1567 ret = i915_mutex_lock_interruptible(dev);
1568 if (ret)
1569 return ret;
1570
1571 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1572 if (&obj->base == NULL) {
1573 ret = -ENOENT;
1574 goto unlock;
1575 }
1576
1577 /* Try to flush the object off the GPU without holding the lock.
1578 * We will repeat the flush holding the lock in the normal manner
1579 * to catch cases where we are gazumped.
1580 */
1581 ret = i915_gem_object_wait_rendering__nonblocking(obj,
1582 file->driver_priv,
1583 !write_domain);
1584 if (ret)
1585 goto unref;
1586
1587 if (read_domains & I915_GEM_DOMAIN_GTT) {
1588 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1589
1590 /* Silently promote "you're not bound, there was nothing to do"
1591 * to success, since the client was just asking us to
1592 * make sure everything was done.
1593 */
1594 if (ret == -EINVAL)
1595 ret = 0;
1596 } else {
1597 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1598 }
1599
1600 unref:
1601 drm_gem_object_unreference(&obj->base);
1602 unlock:
1603 mutex_unlock(&dev->struct_mutex);
1604 return ret;
1605 }
1606
1607 /**
1608 * Called when user space has done writes to this buffer
1609 */
1610 int
1611 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1612 struct drm_file *file)
1613 {
1614 struct drm_i915_gem_sw_finish *args = data;
1615 struct drm_i915_gem_object *obj;
1616 int ret = 0;
1617
1618 ret = i915_mutex_lock_interruptible(dev);
1619 if (ret)
1620 return ret;
1621
1622 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1623 if (&obj->base == NULL) {
1624 ret = -ENOENT;
1625 goto unlock;
1626 }
1627
1628 /* Pinned buffers may be scanout, so flush the cache */
1629 if (obj->pin_display)
1630 i915_gem_object_flush_cpu_write_domain(obj, true);
1631
1632 drm_gem_object_unreference(&obj->base);
1633 unlock:
1634 mutex_unlock(&dev->struct_mutex);
1635 return ret;
1636 }
1637
1638 /**
1639 * Maps the contents of an object, returning the address it is mapped
1640 * into.
1641 *
1642 * While the mapping holds a reference on the contents of the object, it doesn't
1643 * imply a ref on the object itself.
1644 */
1645 int
1646 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1647 struct drm_file *file)
1648 {
1649 struct drm_i915_gem_mmap *args = data;
1650 struct drm_gem_object *obj;
1651 unsigned long addr;
1652 #ifdef __NetBSD__
1653 int ret;
1654 #endif
1655
1656 obj = drm_gem_object_lookup(dev, file, args->handle);
1657 if (obj == NULL)
1658 return -ENOENT;
1659
1660 #ifndef __NetBSD__ /* XXX drm prime */
1661 /* prime objects have no backing filp to GEM mmap
1662 * pages from.
1663 */
1664 if (!obj->filp) {
1665 drm_gem_object_unreference_unlocked(obj);
1666 return -EINVAL;
1667 }
1668 #endif
1669
1670 #ifdef __NetBSD__
1671 addr = (*curproc->p_emul->e_vm_default_addr)(curproc,
1672 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size);
1673 /* XXX errno NetBSD->Linux */
1674 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size,
1675 obj->gemo_shm_uao, args->offset, 0,
1676 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE),
1677 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL,
1678 0));
1679 if (ret) {
1680 drm_gem_object_unreference_unlocked(obj);
1681 return ret;
1682 }
1683 uao_reference(obj->gemo_shm_uao);
1684 drm_gem_object_unreference_unlocked(obj);
1685 #else
1686 addr = vm_mmap(obj->filp, 0, args->size,
1687 PROT_READ | PROT_WRITE, MAP_SHARED,
1688 args->offset);
1689 drm_gem_object_unreference_unlocked(obj);
1690 if (IS_ERR((void *)addr))
1691 return addr;
1692 #endif
1693
1694 args->addr_ptr = (uint64_t) addr;
1695
1696 return 0;
1697 }
1698
1699 #ifdef __NetBSD__ /* XXX gem gtt fault */
1700 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t,
1701 struct vm_page **, int, int, vm_prot_t, int, paddr_t);
1702
1703 int
1704 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1705 int npages, int centeridx, vm_prot_t access_type, int flags)
1706 {
1707 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1708 struct drm_gem_object *gem_obj =
1709 container_of(uobj, struct drm_gem_object, gemo_uvmobj);
1710 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
1711 struct drm_device *dev = obj->base.dev;
1712 struct drm_i915_private *dev_priv = dev->dev_private;
1713 voff_t byte_offset;
1714 pgoff_t page_offset;
1715 int ret = 0;
1716 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0;
1717
1718 byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start));
1719 KASSERT(byte_offset <= obj->base.size);
1720 page_offset = (byte_offset >> PAGE_SHIFT);
1721
1722 ret = i915_mutex_lock_interruptible(dev);
1723 if (ret)
1724 goto out;
1725
1726 trace_i915_gem_object_fault(obj, page_offset, true, write);
1727
1728 /* Now bind it into the GTT if needed */
1729 ret = i915_gem_object_pin(obj, 0, true, false);
1730 if (ret)
1731 goto unlock;
1732
1733 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1734 if (ret)
1735 goto unpin;
1736
1737 ret = i915_gem_object_get_fence(obj);
1738 if (ret)
1739 goto unpin;
1740
1741 obj->fault_mappable = true;
1742
1743 /* Finally, remap it using the new GTT offset */
1744 /* XXX errno NetBSD->Linux */
1745 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type,
1746 flags, (dev_priv->mm.gtt_base_addr + obj->gtt_offset));
1747 unpin:
1748 i915_gem_object_unpin(obj);
1749 unlock:
1750 mutex_unlock(&dev->struct_mutex);
1751 out:
1752 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
1753 if (ret == -ERESTART)
1754 uvm_wait("i915flt");
1755 return ret;
1756 }
1757
1758 /*
1759 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c.
1760 *
1761 * XXX pmap_enter_default instead of pmap_enter because of a problem
1762 * with using weak aliases in kernel modules or something.
1763 */
1764 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned);
1765
1766 static int
1767 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1768 int npages, int centeridx, vm_prot_t access_type, int flags,
1769 paddr_t gtt_paddr)
1770 {
1771 struct vm_map_entry *entry = ufi->entry;
1772 vaddr_t curr_va;
1773 off_t curr_offset;
1774 paddr_t paddr;
1775 u_int mmapflags;
1776 int lcv, retval;
1777 vm_prot_t mapprot;
1778 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist);
1779 UVMHIST_LOG(maphist," flags=%d", flags,0,0,0);
1780
1781 /*
1782 * we do not allow device mappings to be mapped copy-on-write
1783 * so we kill any attempt to do so here.
1784 */
1785
1786 if (UVM_ET_ISCOPYONWRITE(entry)) {
1787 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)",
1788 entry->etype, 0,0,0);
1789 return(EIO);
1790 }
1791
1792 /*
1793 * now we must determine the offset in udv to use and the VA to
1794 * use for pmap_enter. note that we always use orig_map's pmap
1795 * for pmap_enter (even if we have a submap). since virtual
1796 * addresses in a submap must match the main map, this is ok.
1797 */
1798
1799 /* udv offset = (offset from start of entry) + entry's offset */
1800 curr_offset = entry->offset + (vaddr - entry->start);
1801 /* pmap va = vaddr (virtual address of pps[0]) */
1802 curr_va = vaddr;
1803
1804 /*
1805 * loop over the page range entering in as needed
1806 */
1807
1808 retval = 0;
1809 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
1810 curr_va += PAGE_SIZE) {
1811 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
1812 continue;
1813
1814 if (pps[lcv] == PGO_DONTCARE)
1815 continue;
1816
1817 paddr = (gtt_paddr + curr_offset);
1818 mmapflags = 0;
1819 mapprot = ufi->entry->protection;
1820 UVMHIST_LOG(maphist,
1821 " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d",
1822 ufi->orig_map->pmap, curr_va, paddr, mapprot);
1823 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot,
1824 PMAP_CANFAIL | mapprot | mmapflags) != 0) {
1825 /*
1826 * pmap_enter() didn't have the resource to
1827 * enter this mapping. Unlock everything,
1828 * wait for the pagedaemon to free up some
1829 * pages, and then tell uvm_fault() to start
1830 * the fault again.
1831 *
1832 * XXX Needs some rethinking for the PGO_ALLPAGES
1833 * XXX case.
1834 */
1835 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */
1836 return (ERESTART);
1837 }
1838 }
1839
1840 pmap_update(ufi->orig_map->pmap);
1841 return (retval);
1842 }
1843 #else
1844 /**
1845 * i915_gem_fault - fault a page into the GTT
1846 * vma: VMA in question
1847 * vmf: fault info
1848 *
1849 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1850 * from userspace. The fault handler takes care of binding the object to
1851 * the GTT (if needed), allocating and programming a fence register (again,
1852 * only if needed based on whether the old reg is still valid or the object
1853 * is tiled) and inserting a new PTE into the faulting process.
1854 *
1855 * Note that the faulting process may involve evicting existing objects
1856 * from the GTT and/or fence registers to make room. So performance may
1857 * suffer if the GTT working set is large or there are few fence registers
1858 * left.
1859 */
1860 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1861 {
1862 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1863 struct drm_device *dev = obj->base.dev;
1864 struct drm_i915_private *dev_priv = dev->dev_private;
1865 pgoff_t page_offset;
1866 unsigned long pfn;
1867 int ret = 0;
1868 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1869
1870 intel_runtime_pm_get(dev_priv);
1871
1872 /* We don't use vmf->pgoff since that has the fake offset */
1873 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1874 PAGE_SHIFT;
1875
1876 ret = i915_mutex_lock_interruptible(dev);
1877 if (ret)
1878 goto out;
1879
1880 trace_i915_gem_object_fault(obj, page_offset, true, write);
1881
1882 /* Try to flush the object off the GPU first without holding the lock.
1883 * Upon reacquiring the lock, we will perform our sanity checks and then
1884 * repeat the flush holding the lock in the normal manner to catch cases
1885 * where we are gazumped.
1886 */
1887 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1888 if (ret)
1889 goto unlock;
1890
1891 /* Access to snoopable pages through the GTT is incoherent. */
1892 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1893 ret = -EINVAL;
1894 goto unlock;
1895 }
1896
1897 /* Now bind it into the GTT if needed */
1898 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
1899 if (ret)
1900 goto unlock;
1901
1902 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1903 if (ret)
1904 goto unpin;
1905
1906 ret = i915_gem_object_get_fence(obj);
1907 if (ret)
1908 goto unpin;
1909
1910 obj->fault_mappable = true;
1911
1912 pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
1913 pfn >>= PAGE_SHIFT;
1914 pfn += page_offset;
1915
1916 /* Finally, remap it using the new GTT offset */
1917 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1918 unpin:
1919 i915_gem_object_ggtt_unpin(obj);
1920 unlock:
1921 mutex_unlock(&dev->struct_mutex);
1922 out:
1923 switch (ret) {
1924 case -EIO:
1925 /* If this -EIO is due to a gpu hang, give the reset code a
1926 * chance to clean up the mess. Otherwise return the proper
1927 * SIGBUS. */
1928 if (i915_terminally_wedged(&dev_priv->gpu_error)) {
1929 ret = VM_FAULT_SIGBUS;
1930 break;
1931 }
1932 case -EAGAIN:
1933 /*
1934 * EAGAIN means the gpu is hung and we'll wait for the error
1935 * handler to reset everything when re-faulting in
1936 * i915_mutex_lock_interruptible.
1937 */
1938 case 0:
1939 case -ERESTARTSYS:
1940 case -EINTR:
1941 case -EBUSY:
1942 /*
1943 * EBUSY is ok: this just means that another thread
1944 * already did the job.
1945 */
1946 ret = VM_FAULT_NOPAGE;
1947 break;
1948 case -ENOMEM:
1949 ret = VM_FAULT_OOM;
1950 break;
1951 case -ENOSPC:
1952 case -EFAULT:
1953 ret = VM_FAULT_SIGBUS;
1954 break;
1955 default:
1956 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1957 ret = VM_FAULT_SIGBUS;
1958 break;
1959 }
1960
1961 intel_runtime_pm_put(dev_priv);
1962 return ret;
1963 }
1964
1965 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1966 {
1967 struct i915_vma *vma;
1968
1969 /*
1970 * Only the global gtt is relevant for gtt memory mappings, so restrict
1971 * list traversal to objects bound into the global address space. Note
1972 * that the active list should be empty, but better safe than sorry.
1973 */
1974 WARN_ON(!list_empty(&dev_priv->gtt.base.active_list));
1975 list_for_each_entry(vma, &dev_priv->gtt.base.active_list, mm_list)
1976 i915_gem_release_mmap(vma->obj);
1977 list_for_each_entry(vma, &dev_priv->gtt.base.inactive_list, mm_list)
1978 i915_gem_release_mmap(vma->obj);
1979 }
1980 #endif
1981
1982 /**
1983 * i915_gem_release_mmap - remove physical page mappings
1984 * @obj: obj in question
1985 *
1986 * Preserve the reservation of the mmapping with the DRM core code, but
1987 * relinquish ownership of the pages back to the system.
1988 *
1989 * It is vital that we remove the page mapping if we have mapped a tiled
1990 * object through the GTT and then lose the fence register due to
1991 * resource pressure. Similarly if the object has been moved out of the
1992 * aperture, than pages mapped into userspace must be revoked. Removing the
1993 * mapping will then trigger a page fault on the next user access, allowing
1994 * fixup by i915_gem_fault().
1995 */
1996 void
1997 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1998 {
1999 if (!obj->fault_mappable)
2000 return;
2001
2002 #ifdef __NetBSD__ /* XXX gem gtt fault */
2003 {
2004 struct vm_page *page;
2005
2006 mutex_enter(obj->base.gemo_shm_uao->vmobjlock);
2007 KASSERT(obj->pages != NULL);
2008 /* Force a fresh fault for each page. */
2009 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue)
2010 pmap_page_protect(page, VM_PROT_NONE);
2011 mutex_exit(obj->base.gemo_shm_uao->vmobjlock);
2012 }
2013 #else
2014 drm_vma_node_unmap(&obj->base.vma_node,
2015 obj->base.dev->anon_inode->i_mapping);
2016 #endif
2017 obj->fault_mappable = false;
2018 }
2019
2020 uint32_t
2021 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2022 {
2023 uint32_t gtt_size;
2024
2025 if (INTEL_INFO(dev)->gen >= 4 ||
2026 tiling_mode == I915_TILING_NONE)
2027 return size;
2028
2029 /* Previous chips need a power-of-two fence region when tiling */
2030 if (INTEL_INFO(dev)->gen == 3)
2031 gtt_size = 1024*1024;
2032 else
2033 gtt_size = 512*1024;
2034
2035 while (gtt_size < size)
2036 gtt_size <<= 1;
2037
2038 return gtt_size;
2039 }
2040
2041 /**
2042 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2043 * @obj: object to check
2044 *
2045 * Return the required GTT alignment for an object, taking into account
2046 * potential fence register mapping.
2047 */
2048 uint32_t
2049 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2050 int tiling_mode, bool fenced)
2051 {
2052 /*
2053 * Minimum alignment is 4k (GTT page size), but might be greater
2054 * if a fence register is needed for the object.
2055 */
2056 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2057 tiling_mode == I915_TILING_NONE)
2058 return 4096;
2059
2060 /*
2061 * Previous chips need to be aligned to the size of the smallest
2062 * fence register that can contain the object.
2063 */
2064 return i915_gem_get_gtt_size(dev, size, tiling_mode);
2065 }
2066
2067 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2068 {
2069 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2070 int ret;
2071
2072 if (drm_vma_node_has_offset(&obj->base.vma_node))
2073 return 0;
2074
2075 dev_priv->mm.shrinker_no_lock_stealing = true;
2076
2077 ret = drm_gem_create_mmap_offset(&obj->base);
2078 if (ret != -ENOSPC)
2079 goto out;
2080
2081 /* Badly fragmented mmap space? The only way we can recover
2082 * space is by destroying unwanted objects. We can't randomly release
2083 * mmap_offsets as userspace expects them to be persistent for the
2084 * lifetime of the objects. The closest we can is to release the
2085 * offsets on purgeable objects by truncating it and marking it purged,
2086 * which prevents userspace from ever using that object again.
2087 */
2088 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
2089 ret = drm_gem_create_mmap_offset(&obj->base);
2090 if (ret != -ENOSPC)
2091 goto out;
2092
2093 i915_gem_shrink_all(dev_priv);
2094 ret = drm_gem_create_mmap_offset(&obj->base);
2095 out:
2096 dev_priv->mm.shrinker_no_lock_stealing = false;
2097
2098 return ret;
2099 }
2100
2101 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2102 {
2103 drm_gem_free_mmap_offset(&obj->base);
2104 }
2105
2106 int
2107 i915_gem_mmap_gtt(struct drm_file *file,
2108 struct drm_device *dev,
2109 uint32_t handle,
2110 uint64_t *offset)
2111 {
2112 struct drm_i915_private *dev_priv = dev->dev_private;
2113 struct drm_i915_gem_object *obj;
2114 int ret;
2115
2116 ret = i915_mutex_lock_interruptible(dev);
2117 if (ret)
2118 return ret;
2119
2120 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
2121 if (&obj->base == NULL) {
2122 ret = -ENOENT;
2123 goto unlock;
2124 }
2125
2126 if (obj->base.size > dev_priv->gtt.mappable_end) {
2127 ret = -E2BIG;
2128 goto out;
2129 }
2130
2131 if (obj->madv != I915_MADV_WILLNEED) {
2132 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2133 ret = -EFAULT;
2134 goto out;
2135 }
2136
2137 ret = i915_gem_object_create_mmap_offset(obj);
2138 if (ret)
2139 goto out;
2140
2141 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2142
2143 out:
2144 drm_gem_object_unreference(&obj->base);
2145 unlock:
2146 mutex_unlock(&dev->struct_mutex);
2147 return ret;
2148 }
2149
2150 /**
2151 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2152 * @dev: DRM device
2153 * @data: GTT mapping ioctl data
2154 * @file: GEM object info
2155 *
2156 * Simply returns the fake offset to userspace so it can mmap it.
2157 * The mmap call will end up in drm_gem_mmap(), which will set things
2158 * up so we can get faults in the handler above.
2159 *
2160 * The fault handler will take care of binding the object into the GTT
2161 * (since it may have been evicted to make room for something), allocating
2162 * a fence register, and mapping the appropriate aperture address into
2163 * userspace.
2164 */
2165 int
2166 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2167 struct drm_file *file)
2168 {
2169 struct drm_i915_gem_mmap_gtt *args = data;
2170
2171 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2172 }
2173
2174 /* Immediately discard the backing storage */
2175 static void
2176 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2177 {
2178 #ifndef __NetBSD__
2179 struct inode *inode;
2180 #endif
2181
2182 i915_gem_object_free_mmap_offset(obj);
2183
2184 #ifdef __NetBSD__
2185 {
2186 struct uvm_object *const uobj = obj->base.gemo_shm_uao;
2187
2188 if (uobj != NULL) {
2189 /* XXX Calling pgo_put like this is bogus. */
2190 mutex_enter(uobj->vmobjlock);
2191 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size,
2192 (PGO_ALLPAGES | PGO_FREE));
2193 }
2194 }
2195 #else
2196 if (obj->base.filp == NULL)
2197 return;
2198
2199 /* Our goal here is to return as much of the memory as
2200 * is possible back to the system as we are called from OOM.
2201 * To do this we must instruct the shmfs to drop all of its
2202 * backing pages, *now*.
2203 */
2204 inode = file_inode(obj->base.filp);
2205 shmem_truncate_range(inode, 0, (loff_t)-1);
2206 #endif
2207
2208 obj->madv = __I915_MADV_PURGED;
2209 }
2210
2211 static inline int
2212 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
2213 {
2214 return obj->madv == I915_MADV_DONTNEED;
2215 }
2216
2217 #ifdef __NetBSD__
2218 static void
2219 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2220 {
2221 struct drm_device *const dev = obj->base.dev;
2222 int ret;
2223
2224 /* XXX Cargo-culted from the Linux code. */
2225 BUG_ON(obj->madv == __I915_MADV_PURGED);
2226
2227 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2228 if (ret) {
2229 WARN_ON(ret != -EIO);
2230 i915_gem_clflush_object(obj);
2231 obj->base.read_domains = obj->base.write_domain =
2232 I915_GEM_DOMAIN_CPU;
2233 }
2234
2235 if (i915_gem_object_needs_bit17_swizzle(obj))
2236 i915_gem_object_save_bit_17_swizzle(obj);
2237
2238 /* XXX Maintain dirty flag? */
2239
2240 bus_dmamap_destroy(dev->dmat, obj->igo_dmamap);
2241 bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2242 obj->base.size, obj->pages, obj->igo_nsegs);
2243
2244 kfree(obj->pages);
2245 }
2246 #else
2247 static void
2248 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2249 {
2250 struct sg_page_iter sg_iter;
2251 int ret;
2252
2253 BUG_ON(obj->madv == __I915_MADV_PURGED);
2254
2255 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2256 if (ret) {
2257 /* In the event of a disaster, abandon all caches and
2258 * hope for the best.
2259 */
2260 WARN_ON(ret != -EIO);
2261 i915_gem_clflush_object(obj, true);
2262 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2263 }
2264
2265 if (i915_gem_object_needs_bit17_swizzle(obj))
2266 i915_gem_object_save_bit_17_swizzle(obj);
2267
2268 if (obj->madv == I915_MADV_DONTNEED)
2269 obj->dirty = 0;
2270
2271 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2272 struct page *page = sg_page_iter_page(&sg_iter);
2273
2274 if (obj->dirty)
2275 set_page_dirty(page);
2276
2277 if (obj->madv == I915_MADV_WILLNEED)
2278 mark_page_accessed(page);
2279
2280 page_cache_release(page);
2281 }
2282 obj->dirty = 0;
2283
2284 sg_free_table(obj->pages);
2285 kfree(obj->pages);
2286 }
2287 #endif
2288
2289 int
2290 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2291 {
2292 const struct drm_i915_gem_object_ops *ops = obj->ops;
2293
2294 if (obj->pages == NULL)
2295 return 0;
2296
2297 if (obj->pages_pin_count)
2298 return -EBUSY;
2299
2300 BUG_ON(i915_gem_obj_bound_any(obj));
2301
2302 /* ->put_pages might need to allocate memory for the bit17 swizzle
2303 * array, hence protect them from being reaped by removing them from gtt
2304 * lists early. */
2305 list_del(&obj->global_list);
2306
2307 ops->put_pages(obj);
2308 obj->pages = NULL;
2309
2310 if (i915_gem_object_is_purgeable(obj))
2311 i915_gem_object_truncate(obj);
2312
2313 return 0;
2314 }
2315
2316 static unsigned long
2317 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
2318 bool purgeable_only)
2319 {
2320 struct list_head still_bound_list;
2321 struct drm_i915_gem_object *obj, *next;
2322 unsigned long count = 0;
2323
2324 list_for_each_entry_safe(obj, next,
2325 &dev_priv->mm.unbound_list,
2326 global_list) {
2327 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2328 i915_gem_object_put_pages(obj) == 0) {
2329 count += obj->base.size >> PAGE_SHIFT;
2330 if (count >= target)
2331 return count;
2332 }
2333 }
2334
2335 /*
2336 * As we may completely rewrite the bound list whilst unbinding
2337 * (due to retiring requests) we have to strictly process only
2338 * one element of the list at the time, and recheck the list
2339 * on every iteration.
2340 */
2341 INIT_LIST_HEAD(&still_bound_list);
2342 while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
2343 struct i915_vma *vma, *v;
2344
2345 obj = list_first_entry(&dev_priv->mm.bound_list,
2346 typeof(*obj), global_list);
2347 list_move_tail(&obj->global_list, &still_bound_list);
2348
2349 if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
2350 continue;
2351
2352 /*
2353 * Hold a reference whilst we unbind this object, as we may
2354 * end up waiting for and retiring requests. This might
2355 * release the final reference (held by the active list)
2356 * and result in the object being freed from under us.
2357 * in this object being freed.
2358 *
2359 * Note 1: Shrinking the bound list is special since only active
2360 * (and hence bound objects) can contain such limbo objects, so
2361 * we don't need special tricks for shrinking the unbound list.
2362 * The only other place where we have to be careful with active
2363 * objects suddenly disappearing due to retiring requests is the
2364 * eviction code.
2365 *
2366 * Note 2: Even though the bound list doesn't hold a reference
2367 * to the object we can safely grab one here: The final object
2368 * unreferencing and the bound_list are both protected by the
2369 * dev->struct_mutex and so we won't ever be able to observe an
2370 * object on the bound_list with a reference count equals 0.
2371 */
2372 drm_gem_object_reference(&obj->base);
2373
2374 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
2375 if (i915_vma_unbind(vma))
2376 break;
2377
2378 if (i915_gem_object_put_pages(obj) == 0)
2379 count += obj->base.size >> PAGE_SHIFT;
2380
2381 drm_gem_object_unreference(&obj->base);
2382 }
2383 list_splice(&still_bound_list, &dev_priv->mm.bound_list);
2384
2385 return count;
2386 }
2387
2388 static unsigned long
2389 i915_gem_purge(struct drm_i915_private *dev_priv, long target)
2390 {
2391 return __i915_gem_shrink(dev_priv, target, true);
2392 }
2393
2394 static unsigned long
2395 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
2396 {
2397 struct drm_i915_gem_object *obj, *next;
2398 long freed = 0;
2399
2400 i915_gem_evict_everything(dev_priv->dev);
2401
2402 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
2403 global_list) {
2404 if (i915_gem_object_put_pages(obj) == 0)
2405 freed += obj->base.size >> PAGE_SHIFT;
2406 }
2407 return freed;
2408 }
2409
2410 #ifdef __NetBSD__
2411 static int
2412 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2413 {
2414 struct drm_device *const dev = obj->base.dev;
2415 struct vm_page *page;
2416 int error;
2417
2418 /* XXX Cargo-culted from the Linux code. */
2419 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2420 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2421
2422 KASSERT(obj->pages == NULL);
2423 TAILQ_INIT(&obj->igo_pageq);
2424 obj->pages = kcalloc((obj->base.size / PAGE_SIZE),
2425 sizeof(obj->pages[0]), GFP_KERNEL);
2426 if (obj->pages == NULL) {
2427 error = -ENOMEM;
2428 goto fail0;
2429 }
2430
2431 /* XXX errno NetBSD->Linux */
2432 error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao,
2433 0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages,
2434 (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT);
2435 if (error)
2436 /* XXX Try i915_gem_purge, i915_gem_shrink_all. */
2437 goto fail1;
2438 KASSERT(0 < obj->igo_nsegs);
2439 KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE));
2440
2441 /*
2442 * Check that the paddrs will fit in 40 bits, or 32 bits on i965.
2443 *
2444 * XXX This is wrong; we ought to pass this constraint to
2445 * bus_dmamem_wire_uvm_object instead.
2446 */
2447 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) {
2448 const uint64_t mask =
2449 (IS_BROADWATER(dev) || IS_CRESTLINE(dev)?
2450 0xffffffffULL : 0xffffffffffULL);
2451 if (VM_PAGE_TO_PHYS(page) & ~mask) {
2452 DRM_ERROR("GEM physical address exceeds %u bits"
2453 ": %"PRIxMAX"\n",
2454 popcount64(mask),
2455 (uintmax_t)VM_PAGE_TO_PHYS(page));
2456 error = -EIO;
2457 goto fail2;
2458 }
2459 }
2460
2461 /* XXX errno NetBSD->Linux */
2462 error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs,
2463 PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap);
2464 if (error)
2465 goto fail2;
2466
2467 /* XXX Cargo-culted from the Linux code. */
2468 if (i915_gem_object_needs_bit17_swizzle(obj))
2469 i915_gem_object_do_bit_17_swizzle(obj);
2470
2471 /* Success! */
2472 return 0;
2473
2474 fail2: bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2475 obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE));
2476 fail1: kfree(obj->pages);
2477 obj->pages = NULL;
2478 fail0: KASSERT(error);
2479 return error;
2480 }
2481 #else
2482 static int
2483 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2484 {
2485 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2486 int page_count, i;
2487 struct address_space *mapping;
2488 struct sg_table *st;
2489 struct scatterlist *sg;
2490 struct sg_page_iter sg_iter;
2491 struct page *page;
2492 unsigned long last_pfn = 0; /* suppress gcc warning */
2493 gfp_t gfp;
2494
2495 /* Assert that the object is not currently in any GPU domain. As it
2496 * wasn't in the GTT, there shouldn't be any way it could have been in
2497 * a GPU cache
2498 */
2499 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2500 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2501
2502 st = kmalloc(sizeof(*st), GFP_KERNEL);
2503 if (st == NULL)
2504 return -ENOMEM;
2505
2506 page_count = obj->base.size / PAGE_SIZE;
2507 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2508 kfree(st);
2509 return -ENOMEM;
2510 }
2511
2512 /* Get the list of pages out of our struct file. They'll be pinned
2513 * at this point until we release them.
2514 *
2515 * Fail silently without starting the shrinker
2516 */
2517 mapping = file_inode(obj->base.filp)->i_mapping;
2518 gfp = mapping_gfp_mask(mapping);
2519 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2520 gfp &= ~(__GFP_IO | __GFP_WAIT);
2521 sg = st->sgl;
2522 st->nents = 0;
2523 for (i = 0; i < page_count; i++) {
2524 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2525 if (IS_ERR(page)) {
2526 i915_gem_purge(dev_priv, page_count);
2527 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2528 }
2529 if (IS_ERR(page)) {
2530 /* We've tried hard to allocate the memory by reaping
2531 * our own buffer, now let the real VM do its job and
2532 * go down in flames if truly OOM.
2533 */
2534 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
2535 gfp |= __GFP_IO | __GFP_WAIT;
2536
2537 i915_gem_shrink_all(dev_priv);
2538 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2539 if (IS_ERR(page))
2540 goto err_pages;
2541
2542 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2543 gfp &= ~(__GFP_IO | __GFP_WAIT);
2544 }
2545 #ifdef CONFIG_SWIOTLB
2546 if (swiotlb_nr_tbl()) {
2547 st->nents++;
2548 sg_set_page(sg, page, PAGE_SIZE, 0);
2549 sg = sg_next(sg);
2550 continue;
2551 }
2552 #endif
2553 if (!i || page_to_pfn(page) != last_pfn + 1) {
2554 if (i)
2555 sg = sg_next(sg);
2556 st->nents++;
2557 sg_set_page(sg, page, PAGE_SIZE, 0);
2558 } else {
2559 sg->length += PAGE_SIZE;
2560 }
2561 last_pfn = page_to_pfn(page);
2562
2563 /* Check that the i965g/gm workaround works. */
2564 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2565 }
2566 #ifdef CONFIG_SWIOTLB
2567 if (!swiotlb_nr_tbl())
2568 #endif
2569 sg_mark_end(sg);
2570 obj->pages = st;
2571
2572 if (i915_gem_object_needs_bit17_swizzle(obj))
2573 i915_gem_object_do_bit_17_swizzle(obj);
2574
2575 return 0;
2576
2577 err_pages:
2578 sg_mark_end(sg);
2579 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
2580 page_cache_release(sg_page_iter_page(&sg_iter));
2581 sg_free_table(st);
2582 kfree(st);
2583 return PTR_ERR(page);
2584 }
2585 #endif
2586
2587 /* Ensure that the associated pages are gathered from the backing storage
2588 * and pinned into our object. i915_gem_object_get_pages() may be called
2589 * multiple times before they are released by a single call to
2590 * i915_gem_object_put_pages() - once the pages are no longer referenced
2591 * either as a result of memory pressure (reaping pages under the shrinker)
2592 * or as the object is itself released.
2593 */
2594 int
2595 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2596 {
2597 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2598 const struct drm_i915_gem_object_ops *ops = obj->ops;
2599 int ret;
2600
2601 if (obj->pages)
2602 return 0;
2603
2604 if (obj->madv != I915_MADV_WILLNEED) {
2605 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2606 return -EFAULT;
2607 }
2608
2609 BUG_ON(obj->pages_pin_count);
2610
2611 ret = ops->get_pages(obj);
2612 if (ret)
2613 return ret;
2614
2615 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2616 return 0;
2617 }
2618
2619 static void
2620 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
2621 struct intel_ring_buffer *ring)
2622 {
2623 struct drm_device *dev = obj->base.dev;
2624 struct drm_i915_private *dev_priv = dev->dev_private;
2625 u32 seqno = intel_ring_get_seqno(ring);
2626
2627 BUG_ON(ring == NULL);
2628 if (obj->ring != ring && obj->last_write_seqno) {
2629 /* Keep the seqno relative to the current ring */
2630 obj->last_write_seqno = seqno;
2631 }
2632 obj->ring = ring;
2633
2634 /* Add a reference if we're newly entering the active list. */
2635 if (!obj->active) {
2636 drm_gem_object_reference(&obj->base);
2637 obj->active = 1;
2638 }
2639
2640 list_move_tail(&obj->ring_list, &ring->active_list);
2641
2642 obj->last_read_seqno = seqno;
2643
2644 if (obj->fenced_gpu_access) {
2645 obj->last_fenced_seqno = seqno;
2646
2647 /* Bump MRU to take account of the delayed flush */
2648 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2649 struct drm_i915_fence_reg *reg;
2650
2651 reg = &dev_priv->fence_regs[obj->fence_reg];
2652 list_move_tail(®->lru_list,
2653 &dev_priv->mm.fence_list);
2654 }
2655 }
2656 }
2657
2658 void i915_vma_move_to_active(struct i915_vma *vma,
2659 struct intel_ring_buffer *ring)
2660 {
2661 list_move_tail(&vma->mm_list, &vma->vm->active_list);
2662 return i915_gem_object_move_to_active(vma->obj, ring);
2663 }
2664
2665 static void
2666 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
2667 {
2668 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2669 struct i915_address_space *vm;
2670 struct i915_vma *vma;
2671
2672 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
2673 BUG_ON(!obj->active);
2674
2675 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
2676 vma = i915_gem_obj_to_vma(obj, vm);
2677 if (vma && !list_empty(&vma->mm_list))
2678 list_move_tail(&vma->mm_list, &vm->inactive_list);
2679 }
2680
2681 list_del_init(&obj->ring_list);
2682 obj->ring = NULL;
2683
2684 obj->last_read_seqno = 0;
2685 obj->last_write_seqno = 0;
2686 obj->base.write_domain = 0;
2687
2688 obj->last_fenced_seqno = 0;
2689 obj->fenced_gpu_access = false;
2690
2691 obj->active = 0;
2692 drm_gem_object_unreference(&obj->base);
2693
2694 WARN_ON(i915_verify_lists(dev));
2695 }
2696
2697 static int
2698 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2699 {
2700 struct drm_i915_private *dev_priv = dev->dev_private;
2701 struct intel_ring_buffer *ring;
2702 int ret, i, j;
2703
2704 /* Carefully retire all requests without writing to the rings */
2705 for_each_ring(ring, dev_priv, i) {
2706 ret = intel_ring_idle(ring);
2707 if (ret)
2708 return ret;
2709 }
2710 i915_gem_retire_requests(dev);
2711
2712 /* Finally reset hw state */
2713 for_each_ring(ring, dev_priv, i) {
2714 intel_ring_init_seqno(ring, seqno);
2715
2716 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
2717 ring->sync_seqno[j] = 0;
2718 }
2719
2720 return 0;
2721 }
2722
2723 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2724 {
2725 struct drm_i915_private *dev_priv = dev->dev_private;
2726 int ret;
2727
2728 if (seqno == 0)
2729 return -EINVAL;
2730
2731 /* HWS page needs to be set less than what we
2732 * will inject to ring
2733 */
2734 ret = i915_gem_init_seqno(dev, seqno - 1);
2735 if (ret)
2736 return ret;
2737
2738 /* Carefully set the last_seqno value so that wrap
2739 * detection still works
2740 */
2741 dev_priv->next_seqno = seqno;
2742 dev_priv->last_seqno = seqno - 1;
2743 if (dev_priv->last_seqno == 0)
2744 dev_priv->last_seqno--;
2745
2746 return 0;
2747 }
2748
2749 int
2750 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2751 {
2752 struct drm_i915_private *dev_priv = dev->dev_private;
2753
2754 /* reserve 0 for non-seqno */
2755 if (dev_priv->next_seqno == 0) {
2756 int ret = i915_gem_init_seqno(dev, 0);
2757 if (ret)
2758 return ret;
2759
2760 dev_priv->next_seqno = 1;
2761 }
2762
2763 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2764 return 0;
2765 }
2766
2767 int __i915_add_request(struct intel_ring_buffer *ring,
2768 struct drm_file *file,
2769 struct drm_i915_gem_object *obj,
2770 u32 *out_seqno)
2771 {
2772 struct drm_i915_private *dev_priv = ring->dev->dev_private;
2773 struct drm_i915_gem_request *request;
2774 u32 request_ring_position, request_start;
2775 int ret;
2776
2777 request_start = intel_ring_get_tail(ring);
2778 /*
2779 * Emit any outstanding flushes - execbuf can fail to emit the flush
2780 * after having emitted the batchbuffer command. Hence we need to fix
2781 * things up similar to emitting the lazy request. The difference here
2782 * is that the flush _must_ happen before the next request, no matter
2783 * what.
2784 */
2785 ret = intel_ring_flush_all_caches(ring);
2786 if (ret)
2787 return ret;
2788
2789 request = ring->preallocated_lazy_request;
2790 if (WARN_ON(request == NULL))
2791 return -ENOMEM;
2792
2793 /* Record the position of the start of the request so that
2794 * should we detect the updated seqno part-way through the
2795 * GPU processing the request, we never over-estimate the
2796 * position of the head.
2797 */
2798 request_ring_position = intel_ring_get_tail(ring);
2799
2800 ret = ring->add_request(ring);
2801 if (ret)
2802 return ret;
2803
2804 request->seqno = intel_ring_get_seqno(ring);
2805 request->ring = ring;
2806 request->head = request_start;
2807 request->tail = request_ring_position;
2808
2809 /* Whilst this request exists, batch_obj will be on the
2810 * active_list, and so will hold the active reference. Only when this
2811 * request is retired will the the batch_obj be moved onto the
2812 * inactive_list and lose its active reference. Hence we do not need
2813 * to explicitly hold another reference here.
2814 */
2815 request->batch_obj = obj;
2816
2817 /* Hold a reference to the current context so that we can inspect
2818 * it later in case a hangcheck error event fires.
2819 */
2820 request->ctx = ring->last_context;
2821 if (request->ctx)
2822 i915_gem_context_reference(request->ctx);
2823
2824 request->emitted_jiffies = jiffies;
2825 list_add_tail(&request->list, &ring->request_list);
2826 request->file_priv = NULL;
2827
2828 if (file) {
2829 struct drm_i915_file_private *file_priv = file->driver_priv;
2830
2831 spin_lock(&file_priv->mm.lock);
2832 request->file_priv = file_priv;
2833 list_add_tail(&request->client_list,
2834 &file_priv->mm.request_list);
2835 spin_unlock(&file_priv->mm.lock);
2836 }
2837
2838 trace_i915_gem_request_add(ring, request->seqno);
2839 ring->outstanding_lazy_seqno = 0;
2840 ring->preallocated_lazy_request = NULL;
2841
2842 if (!dev_priv->ums.mm_suspended) {
2843 i915_queue_hangcheck(ring->dev);
2844
2845 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
2846 queue_delayed_work(dev_priv->wq,
2847 &dev_priv->mm.retire_work,
2848 round_jiffies_up_relative(HZ));
2849 intel_mark_busy(dev_priv->dev);
2850 }
2851
2852 if (out_seqno)
2853 *out_seqno = request->seqno;
2854 return 0;
2855 }
2856
2857 static inline void
2858 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
2859 {
2860 struct drm_i915_file_private *file_priv = request->file_priv;
2861
2862 if (!file_priv)
2863 return;
2864
2865 spin_lock(&file_priv->mm.lock);
2866 list_del(&request->client_list);
2867 request->file_priv = NULL;
2868 spin_unlock(&file_priv->mm.lock);
2869 }
2870
2871 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2872 const struct i915_hw_context *ctx)
2873 {
2874 unsigned long elapsed;
2875
2876 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2877
2878 if (ctx->hang_stats.banned)
2879 return true;
2880
2881 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
2882 if (!i915_gem_context_is_default(ctx)) {
2883 DRM_DEBUG("context hanging too fast, banning!\n");
2884 return true;
2885 } else if (dev_priv->gpu_error.stop_rings == 0) {
2886 DRM_ERROR("gpu hanging too fast, banning!\n");
2887 return true;
2888 }
2889 }
2890
2891 return false;
2892 }
2893
2894 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2895 struct i915_hw_context *ctx,
2896 const bool guilty)
2897 {
2898 struct i915_ctx_hang_stats *hs;
2899
2900 if (WARN_ON(!ctx))
2901 return;
2902
2903 hs = &ctx->hang_stats;
2904
2905 if (guilty) {
2906 hs->banned = i915_context_is_banned(dev_priv, ctx);
2907 hs->batch_active++;
2908 hs->guilty_ts = get_seconds();
2909 } else {
2910 hs->batch_pending++;
2911 }
2912 }
2913
2914 static void i915_gem_free_request(struct drm_i915_gem_request *request)
2915 {
2916 list_del(&request->list);
2917 i915_gem_request_remove_from_client(request);
2918
2919 if (request->ctx)
2920 i915_gem_context_unreference(request->ctx);
2921
2922 kfree(request);
2923 }
2924
2925 struct drm_i915_gem_request *
2926 i915_gem_find_active_request(struct intel_ring_buffer *ring)
2927 {
2928 struct drm_i915_gem_request *request;
2929 u32 completed_seqno;
2930
2931 completed_seqno = ring->get_seqno(ring, false);
2932
2933 list_for_each_entry(request, &ring->request_list, list) {
2934 if (i915_seqno_passed(completed_seqno, request->seqno))
2935 continue;
2936
2937 return request;
2938 }
2939
2940 return NULL;
2941 }
2942
2943 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2944 struct intel_ring_buffer *ring)
2945 {
2946 struct drm_i915_gem_request *request;
2947 bool ring_hung;
2948
2949 request = i915_gem_find_active_request(ring);
2950
2951 if (request == NULL)
2952 return;
2953
2954 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2955
2956 i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2957
2958 list_for_each_entry_continue(request, &ring->request_list, list)
2959 i915_set_reset_status(dev_priv, request->ctx, false);
2960 }
2961
2962 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
2963 struct intel_ring_buffer *ring)
2964 {
2965 while (!list_empty(&ring->active_list)) {
2966 struct drm_i915_gem_object *obj;
2967
2968 obj = list_first_entry(&ring->active_list,
2969 struct drm_i915_gem_object,
2970 ring_list);
2971
2972 i915_gem_object_move_to_inactive(obj);
2973 }
2974
2975 /*
2976 * We must free the requests after all the corresponding objects have
2977 * been moved off active lists. Which is the same order as the normal
2978 * retire_requests function does. This is important if object hold
2979 * implicit references on things like e.g. ppgtt address spaces through
2980 * the request.
2981 */
2982 while (!list_empty(&ring->request_list)) {
2983 struct drm_i915_gem_request *request;
2984
2985 request = list_first_entry(&ring->request_list,
2986 struct drm_i915_gem_request,
2987 list);
2988
2989 i915_gem_free_request(request);
2990 }
2991 }
2992
2993 void i915_gem_restore_fences(struct drm_device *dev)
2994 {
2995 struct drm_i915_private *dev_priv = dev->dev_private;
2996 int i;
2997
2998 for (i = 0; i < dev_priv->num_fence_regs; i++) {
2999 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
3000
3001 /*
3002 * Commit delayed tiling changes if we have an object still
3003 * attached to the fence, otherwise just clear the fence.
3004 */
3005 if (reg->obj) {
3006 i915_gem_object_update_fence(reg->obj, reg,
3007 reg->obj->tiling_mode);
3008 } else {
3009 i915_gem_write_fence(dev, i, NULL);
3010 }
3011 }
3012 }
3013
3014 void i915_gem_reset(struct drm_device *dev)
3015 {
3016 struct drm_i915_private *dev_priv = dev->dev_private;
3017 struct intel_ring_buffer *ring;
3018 int i;
3019
3020 /*
3021 * Before we free the objects from the requests, we need to inspect
3022 * them for finding the guilty party. As the requests only borrow
3023 * their reference to the objects, the inspection must be done first.
3024 */
3025 for_each_ring(ring, dev_priv, i)
3026 i915_gem_reset_ring_status(dev_priv, ring);
3027
3028 for_each_ring(ring, dev_priv, i)
3029 i915_gem_reset_ring_cleanup(dev_priv, ring);
3030
3031 i915_gem_cleanup_ringbuffer(dev);
3032
3033 i915_gem_context_reset(dev);
3034
3035 i915_gem_restore_fences(dev);
3036 }
3037
3038 /**
3039 * This function clears the request list as sequence numbers are passed.
3040 */
3041 static void
3042 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
3043 {
3044 uint32_t seqno;
3045
3046 if (list_empty(&ring->request_list))
3047 return;
3048
3049 WARN_ON(i915_verify_lists(ring->dev));
3050
3051 seqno = ring->get_seqno(ring, true);
3052
3053 /* Move any buffers on the active list that are no longer referenced
3054 * by the ringbuffer to the flushing/inactive lists as appropriate,
3055 * before we free the context associated with the requests.
3056 */
3057 while (!list_empty(&ring->active_list)) {
3058 struct drm_i915_gem_object *obj;
3059
3060 obj = list_first_entry(&ring->active_list,
3061 struct drm_i915_gem_object,
3062 ring_list);
3063
3064 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
3065 break;
3066
3067 i915_gem_object_move_to_inactive(obj);
3068 }
3069
3070
3071 while (!list_empty(&ring->request_list)) {
3072 struct drm_i915_gem_request *request;
3073
3074 request = list_first_entry(&ring->request_list,
3075 struct drm_i915_gem_request,
3076 list);
3077
3078 if (!i915_seqno_passed(seqno, request->seqno))
3079 break;
3080
3081 trace_i915_gem_request_retire(ring, request->seqno);
3082 /* We know the GPU must have read the request to have
3083 * sent us the seqno + interrupt, so use the position
3084 * of tail of the request to update the last known position
3085 * of the GPU head.
3086 */
3087 ring->last_retired_head = request->tail;
3088
3089 i915_gem_free_request(request);
3090 }
3091
3092 if (unlikely(ring->trace_irq_seqno &&
3093 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
3094 ring->irq_put(ring);
3095 ring->trace_irq_seqno = 0;
3096 }
3097
3098 WARN_ON(i915_verify_lists(ring->dev));
3099 }
3100
3101 bool
3102 i915_gem_retire_requests(struct drm_device *dev)
3103 {
3104 struct drm_i915_private *dev_priv = dev->dev_private;
3105 struct intel_ring_buffer *ring;
3106 bool idle = true;
3107 int i;
3108
3109 for_each_ring(ring, dev_priv, i) {
3110 i915_gem_retire_requests_ring(ring);
3111 idle &= list_empty(&ring->request_list);
3112 }
3113
3114 if (idle)
3115 mod_delayed_work(dev_priv->wq,
3116 &dev_priv->mm.idle_work,
3117 msecs_to_jiffies(100));
3118
3119 return idle;
3120 }
3121
3122 static void
3123 i915_gem_retire_work_handler(struct work_struct *work)
3124 {
3125 struct drm_i915_private *dev_priv =
3126 container_of(work, typeof(*dev_priv), mm.retire_work.work);
3127 struct drm_device *dev = dev_priv->dev;
3128 bool idle;
3129
3130 /* Come back later if the device is busy... */
3131 idle = false;
3132 if (mutex_trylock(&dev->struct_mutex)) {
3133 idle = i915_gem_retire_requests(dev);
3134 mutex_unlock(&dev->struct_mutex);
3135 }
3136 if (!idle)
3137 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3138 round_jiffies_up_relative(HZ));
3139 }
3140
3141 static void
3142 i915_gem_idle_work_handler(struct work_struct *work)
3143 {
3144 struct drm_i915_private *dev_priv =
3145 container_of(work, typeof(*dev_priv), mm.idle_work.work);
3146
3147 intel_mark_idle(dev_priv->dev);
3148 }
3149
3150 /**
3151 * Ensures that an object will eventually get non-busy by flushing any required
3152 * write domains, emitting any outstanding lazy request and retiring and
3153 * completed requests.
3154 */
3155 static int
3156 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3157 {
3158 int ret;
3159
3160 if (obj->active) {
3161 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
3162 if (ret)
3163 return ret;
3164
3165 i915_gem_retire_requests_ring(obj->ring);
3166 }
3167
3168 return 0;
3169 }
3170
3171 /**
3172 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3173 * @DRM_IOCTL_ARGS: standard ioctl arguments
3174 *
3175 * Returns 0 if successful, else an error is returned with the remaining time in
3176 * the timeout parameter.
3177 * -ETIME: object is still busy after timeout
3178 * -ERESTARTSYS: signal interrupted the wait
3179 * -ENONENT: object doesn't exist
3180 * Also possible, but rare:
3181 * -EAGAIN: GPU wedged
3182 * -ENOMEM: damn
3183 * -ENODEV: Internal IRQ fail
3184 * -E?: The add request failed
3185 *
3186 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3187 * non-zero timeout parameter the wait ioctl will wait for the given number of
3188 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3189 * without holding struct_mutex the object may become re-busied before this
3190 * function completes. A similar but shorter * race condition exists in the busy
3191 * ioctl
3192 */
3193 int
3194 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3195 {
3196 struct drm_i915_private *dev_priv = dev->dev_private;
3197 struct drm_i915_gem_wait *args = data;
3198 struct drm_i915_gem_object *obj;
3199 struct intel_ring_buffer *ring = NULL;
3200 struct timespec timeout_stack, *timeout = NULL;
3201 unsigned reset_counter;
3202 u32 seqno = 0;
3203 int ret = 0;
3204
3205 if (args->timeout_ns >= 0) {
3206 timeout_stack = ns_to_timespec(args->timeout_ns);
3207 timeout = &timeout_stack;
3208 }
3209
3210 ret = i915_mutex_lock_interruptible(dev);
3211 if (ret)
3212 return ret;
3213
3214 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
3215 if (&obj->base == NULL) {
3216 mutex_unlock(&dev->struct_mutex);
3217 return -ENOENT;
3218 }
3219
3220 /* Need to make sure the object gets inactive eventually. */
3221 ret = i915_gem_object_flush_active(obj);
3222 if (ret)
3223 goto out;
3224
3225 if (obj->active) {
3226 seqno = obj->last_read_seqno;
3227 ring = obj->ring;
3228 }
3229
3230 if (seqno == 0)
3231 goto out;
3232
3233 /* Do this after OLR check to make sure we make forward progress polling
3234 * on this IOCTL with a 0 timeout (like busy ioctl)
3235 */
3236 if (!args->timeout_ns) {
3237 ret = -ETIME;
3238 goto out;
3239 }
3240
3241 drm_gem_object_unreference(&obj->base);
3242 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3243 mutex_unlock(&dev->struct_mutex);
3244
3245 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
3246 if (timeout)
3247 args->timeout_ns = timespec_to_ns(timeout);
3248 return ret;
3249
3250 out:
3251 drm_gem_object_unreference(&obj->base);
3252 mutex_unlock(&dev->struct_mutex);
3253 return ret;
3254 }
3255
3256 /**
3257 * i915_gem_object_sync - sync an object to a ring.
3258 *
3259 * @obj: object which may be in use on another ring.
3260 * @to: ring we wish to use the object on. May be NULL.
3261 *
3262 * This code is meant to abstract object synchronization with the GPU.
3263 * Calling with NULL implies synchronizing the object with the CPU
3264 * rather than a particular GPU ring.
3265 *
3266 * Returns 0 if successful, else propagates up the lower layer error.
3267 */
3268 int
3269 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3270 struct intel_ring_buffer *to)
3271 {
3272 struct intel_ring_buffer *from = obj->ring;
3273 u32 seqno;
3274 int ret, idx;
3275
3276 if (from == NULL || to == from)
3277 return 0;
3278
3279 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
3280 return i915_gem_object_wait_rendering(obj, false);
3281
3282 idx = intel_ring_sync_index(from, to);
3283
3284 seqno = obj->last_read_seqno;
3285 if (seqno <= from->sync_seqno[idx])
3286 return 0;
3287
3288 ret = i915_gem_check_olr(obj->ring, seqno);
3289 if (ret)
3290 return ret;
3291
3292 trace_i915_gem_ring_sync_to(from, to, seqno);
3293 ret = to->sync_to(to, from, seqno);
3294 if (!ret)
3295 /* We use last_read_seqno because sync_to()
3296 * might have just caused seqno wrap under
3297 * the radar.
3298 */
3299 from->sync_seqno[idx] = obj->last_read_seqno;
3300
3301 return ret;
3302 }
3303
3304 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3305 {
3306 u32 old_write_domain, old_read_domains;
3307
3308 /* Force a pagefault for domain tracking on next user access */
3309 i915_gem_release_mmap(obj);
3310
3311 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3312 return;
3313
3314 /* Wait for any direct GTT access to complete */
3315 mb();
3316
3317 old_read_domains = obj->base.read_domains;
3318 old_write_domain = obj->base.write_domain;
3319
3320 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3321 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3322
3323 trace_i915_gem_object_change_domain(obj,
3324 old_read_domains,
3325 old_write_domain);
3326 }
3327
3328 int i915_vma_unbind(struct i915_vma *vma)
3329 {
3330 struct drm_i915_gem_object *obj = vma->obj;
3331 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3332 int ret;
3333
3334 if (list_empty(&vma->vma_link))
3335 return 0;
3336
3337 if (!drm_mm_node_allocated(&vma->node)) {
3338 i915_gem_vma_destroy(vma);
3339 return 0;
3340 }
3341
3342 if (vma->pin_count)
3343 return -EBUSY;
3344
3345 BUG_ON(obj->pages == NULL);
3346
3347 ret = i915_gem_object_finish_gpu(obj);
3348 if (ret)
3349 return ret;
3350 /* Continue on if we fail due to EIO, the GPU is hung so we
3351 * should be safe and we need to cleanup or else we might
3352 * cause memory corruption through use-after-free.
3353 */
3354
3355 i915_gem_object_finish_gtt(obj);
3356
3357 /* release the fence reg _after_ flushing */
3358 ret = i915_gem_object_put_fence(obj);
3359 if (ret)
3360 return ret;
3361
3362 trace_i915_vma_unbind(vma);
3363
3364 vma->unbind_vma(vma);
3365
3366 i915_gem_gtt_finish_object(obj);
3367
3368 list_del_init(&vma->mm_list);
3369 /* Avoid an unnecessary call to unbind on rebind. */
3370 if (i915_is_ggtt(vma->vm))
3371 obj->map_and_fenceable = true;
3372
3373 drm_mm_remove_node(&vma->node);
3374 i915_gem_vma_destroy(vma);
3375
3376 /* Since the unbound list is global, only move to that list if
3377 * no more VMAs exist. */
3378 if (list_empty(&obj->vma_list))
3379 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3380
3381 /* And finally now the object is completely decoupled from this vma,
3382 * we can drop its hold on the backing storage and allow it to be
3383 * reaped by the shrinker.
3384 */
3385 i915_gem_object_unpin_pages(obj);
3386
3387 return 0;
3388 }
3389
3390 int i915_gpu_idle(struct drm_device *dev)
3391 {
3392 struct drm_i915_private *dev_priv = dev->dev_private;
3393 struct intel_ring_buffer *ring;
3394 int ret, i;
3395
3396 /* Flush everything onto the inactive list. */
3397 for_each_ring(ring, dev_priv, i) {
3398 ret = i915_switch_context(ring, ring->default_context);
3399 if (ret)
3400 return ret;
3401
3402 ret = intel_ring_idle(ring);
3403 if (ret)
3404 return ret;
3405 }
3406
3407 return 0;
3408 }
3409
3410 static void i965_write_fence_reg(struct drm_device *dev, int reg,
3411 struct drm_i915_gem_object *obj)
3412 {
3413 struct drm_i915_private *dev_priv = dev->dev_private;
3414 int fence_reg;
3415 int fence_pitch_shift;
3416
3417 if (INTEL_INFO(dev)->gen >= 6) {
3418 fence_reg = FENCE_REG_SANDYBRIDGE_0;
3419 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
3420 } else {
3421 fence_reg = FENCE_REG_965_0;
3422 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
3423 }
3424
3425 fence_reg += reg * 8;
3426
3427 /* To w/a incoherency with non-atomic 64-bit register updates,
3428 * we split the 64-bit update into two 32-bit writes. In order
3429 * for a partial fence not to be evaluated between writes, we
3430 * precede the update with write to turn off the fence register,
3431 * and only enable the fence as the last step.
3432 *
3433 * For extra levels of paranoia, we make sure each step lands
3434 * before applying the next step.
3435 */
3436 I915_WRITE(fence_reg, 0);
3437 POSTING_READ(fence_reg);
3438
3439 if (obj) {
3440 u32 size = i915_gem_obj_ggtt_size(obj);
3441 uint64_t val;
3442
3443 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
3444 0xfffff000) << 32;
3445 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
3446 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
3447 if (obj->tiling_mode == I915_TILING_Y)
3448 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3449 val |= I965_FENCE_REG_VALID;
3450
3451 I915_WRITE(fence_reg + 4, val >> 32);
3452 POSTING_READ(fence_reg + 4);
3453
3454 I915_WRITE(fence_reg + 0, val);
3455 POSTING_READ(fence_reg);
3456 } else {
3457 I915_WRITE(fence_reg + 4, 0);
3458 POSTING_READ(fence_reg + 4);
3459 }
3460 }
3461
3462 static void i915_write_fence_reg(struct drm_device *dev, int reg,
3463 struct drm_i915_gem_object *obj)
3464 {
3465 struct drm_i915_private *dev_priv = dev->dev_private;
3466 u32 val;
3467
3468 if (obj) {
3469 u32 size = i915_gem_obj_ggtt_size(obj);
3470 int pitch_val;
3471 int tile_width;
3472
3473 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
3474 (size & -size) != size ||
3475 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3476 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3477 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
3478
3479 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
3480 tile_width = 128;
3481 else
3482 tile_width = 512;
3483
3484 /* Note: pitch better be a power of two tile widths */
3485 pitch_val = obj->stride / tile_width;
3486 pitch_val = ffs(pitch_val) - 1;
3487
3488 val = i915_gem_obj_ggtt_offset(obj);
3489 if (obj->tiling_mode == I915_TILING_Y)
3490 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3491 val |= I915_FENCE_SIZE_BITS(size);
3492 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3493 val |= I830_FENCE_REG_VALID;
3494 } else
3495 val = 0;
3496
3497 if (reg < 8)
3498 reg = FENCE_REG_830_0 + reg * 4;
3499 else
3500 reg = FENCE_REG_945_8 + (reg - 8) * 4;
3501
3502 I915_WRITE(reg, val);
3503 POSTING_READ(reg);
3504 }
3505
3506 static void i830_write_fence_reg(struct drm_device *dev, int reg,
3507 struct drm_i915_gem_object *obj)
3508 {
3509 struct drm_i915_private *dev_priv = dev->dev_private;
3510 uint32_t val;
3511
3512 if (obj) {
3513 u32 size = i915_gem_obj_ggtt_size(obj);
3514 uint32_t pitch_val;
3515
3516 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
3517 (size & -size) != size ||
3518 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3519 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
3520 i915_gem_obj_ggtt_offset(obj), size);
3521
3522 pitch_val = obj->stride / 128;
3523 pitch_val = ffs(pitch_val) - 1;
3524
3525 val = i915_gem_obj_ggtt_offset(obj);
3526 if (obj->tiling_mode == I915_TILING_Y)
3527 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3528 val |= I830_FENCE_SIZE_BITS(size);
3529 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3530 val |= I830_FENCE_REG_VALID;
3531 } else
3532 val = 0;
3533
3534 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
3535 POSTING_READ(FENCE_REG_830_0 + reg * 4);
3536 }
3537
3538 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
3539 {
3540 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
3541 }
3542
3543 static void i915_gem_write_fence(struct drm_device *dev, int reg,
3544 struct drm_i915_gem_object *obj)
3545 {
3546 struct drm_i915_private *dev_priv = dev->dev_private;
3547
3548 /* Ensure that all CPU reads are completed before installing a fence
3549 * and all writes before removing the fence.
3550 */
3551 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
3552 mb();
3553
3554 WARN(obj && (!obj->stride || !obj->tiling_mode),
3555 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
3556 obj->stride, obj->tiling_mode);
3557
3558 switch (INTEL_INFO(dev)->gen) {
3559 case 8:
3560 case 7:
3561 case 6:
3562 case 5:
3563 case 4: i965_write_fence_reg(dev, reg, obj); break;
3564 case 3: i915_write_fence_reg(dev, reg, obj); break;
3565 case 2: i830_write_fence_reg(dev, reg, obj); break;
3566 default: BUG();
3567 }
3568
3569 /* And similarly be paranoid that no direct access to this region
3570 * is reordered to before the fence is installed.
3571 */
3572 if (i915_gem_object_needs_mb(obj))
3573 mb();
3574 }
3575
3576 static inline int fence_number(struct drm_i915_private *dev_priv,
3577 struct drm_i915_fence_reg *fence)
3578 {
3579 return fence - dev_priv->fence_regs;
3580 }
3581
3582 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
3583 struct drm_i915_fence_reg *fence,
3584 bool enable)
3585 {
3586 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3587 int reg = fence_number(dev_priv, fence);
3588
3589 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
3590
3591 if (enable) {
3592 obj->fence_reg = reg;
3593 fence->obj = obj;
3594 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
3595 } else {
3596 obj->fence_reg = I915_FENCE_REG_NONE;
3597 fence->obj = NULL;
3598 list_del_init(&fence->lru_list);
3599 }
3600 obj->fence_dirty = false;
3601 }
3602
3603 static int
3604 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
3605 {
3606 if (obj->last_fenced_seqno) {
3607 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
3608 if (ret)
3609 return ret;
3610
3611 obj->last_fenced_seqno = 0;
3612 }
3613
3614 obj->fenced_gpu_access = false;
3615 return 0;
3616 }
3617
3618 int
3619 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
3620 {
3621 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3622 struct drm_i915_fence_reg *fence;
3623 int ret;
3624
3625 ret = i915_gem_object_wait_fence(obj);
3626 if (ret)
3627 return ret;
3628
3629 if (obj->fence_reg == I915_FENCE_REG_NONE)
3630 return 0;
3631
3632 fence = &dev_priv->fence_regs[obj->fence_reg];
3633
3634 i915_gem_object_fence_lost(obj);
3635 i915_gem_object_update_fence(obj, fence, false);
3636
3637 return 0;
3638 }
3639
3640 static struct drm_i915_fence_reg *
3641 i915_find_fence_reg(struct drm_device *dev)
3642 {
3643 struct drm_i915_private *dev_priv = dev->dev_private;
3644 struct drm_i915_fence_reg *reg, *avail;
3645 int i;
3646
3647 /* First try to find a free reg */
3648 avail = NULL;
3649 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
3650 reg = &dev_priv->fence_regs[i];
3651 if (!reg->obj)
3652 return reg;
3653
3654 if (!reg->pin_count)
3655 avail = reg;
3656 }
3657
3658 if (avail == NULL)
3659 goto deadlock;
3660
3661 /* None available, try to steal one or wait for a user to finish */
3662 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
3663 if (reg->pin_count)
3664 continue;
3665
3666 return reg;
3667 }
3668
3669 deadlock:
3670 /* Wait for completion of pending flips which consume fences */
3671 if (intel_has_pending_fb_unpin(dev))
3672 return ERR_PTR(-EAGAIN);
3673
3674 return ERR_PTR(-EDEADLK);
3675 }
3676
3677 /**
3678 * i915_gem_object_get_fence - set up fencing for an object
3679 * @obj: object to map through a fence reg
3680 *
3681 * When mapping objects through the GTT, userspace wants to be able to write
3682 * to them without having to worry about swizzling if the object is tiled.
3683 * This function walks the fence regs looking for a free one for @obj,
3684 * stealing one if it can't find any.
3685 *
3686 * It then sets up the reg based on the object's properties: address, pitch
3687 * and tiling format.
3688 *
3689 * For an untiled surface, this removes any existing fence.
3690 */
3691 int
3692 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
3693 {
3694 struct drm_device *dev = obj->base.dev;
3695 struct drm_i915_private *dev_priv = dev->dev_private;
3696 bool enable = obj->tiling_mode != I915_TILING_NONE;
3697 struct drm_i915_fence_reg *reg;
3698 int ret;
3699
3700 /* Have we updated the tiling parameters upon the object and so
3701 * will need to serialise the write to the associated fence register?
3702 */
3703 if (obj->fence_dirty) {
3704 ret = i915_gem_object_wait_fence(obj);
3705 if (ret)
3706 return ret;
3707 }
3708
3709 /* Just update our place in the LRU if our fence is getting reused. */
3710 if (obj->fence_reg != I915_FENCE_REG_NONE) {
3711 reg = &dev_priv->fence_regs[obj->fence_reg];
3712 if (!obj->fence_dirty) {
3713 list_move_tail(®->lru_list,
3714 &dev_priv->mm.fence_list);
3715 return 0;
3716 }
3717 } else if (enable) {
3718 reg = i915_find_fence_reg(dev);
3719 if (IS_ERR(reg))
3720 return PTR_ERR(reg);
3721
3722 if (reg->obj) {
3723 struct drm_i915_gem_object *old = reg->obj;
3724
3725 ret = i915_gem_object_wait_fence(old);
3726 if (ret)
3727 return ret;
3728
3729 i915_gem_object_fence_lost(old);
3730 }
3731 } else
3732 return 0;
3733
3734 i915_gem_object_update_fence(obj, reg, enable);
3735
3736 return 0;
3737 }
3738
3739 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
3740 struct drm_mm_node *gtt_space,
3741 unsigned long cache_level)
3742 {
3743 struct drm_mm_node *other;
3744
3745 /* On non-LLC machines we have to be careful when putting differing
3746 * types of snoopable memory together to avoid the prefetcher
3747 * crossing memory domains and dying.
3748 */
3749 if (HAS_LLC(dev))
3750 return true;
3751
3752 if (!drm_mm_node_allocated(gtt_space))
3753 return true;
3754
3755 if (list_empty(>t_space->node_list))
3756 return true;
3757
3758 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3759 if (other->allocated && !other->hole_follows && other->color != cache_level)
3760 return false;
3761
3762 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3763 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3764 return false;
3765
3766 return true;
3767 }
3768
3769 static void i915_gem_verify_gtt(struct drm_device *dev)
3770 {
3771 #if WATCH_GTT
3772 struct drm_i915_private *dev_priv = dev->dev_private;
3773 struct drm_i915_gem_object *obj;
3774 int err = 0;
3775
3776 list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) {
3777 if (obj->gtt_space == NULL) {
3778 printk(KERN_ERR "object found on GTT list with no space reserved\n");
3779 err++;
3780 continue;
3781 }
3782
3783 if (obj->cache_level != obj->gtt_space->color) {
3784 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
3785 i915_gem_obj_ggtt_offset(obj),
3786 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3787 obj->cache_level,
3788 obj->gtt_space->color);
3789 err++;
3790 continue;
3791 }
3792
3793 if (!i915_gem_valid_gtt_space(dev,
3794 obj->gtt_space,
3795 obj->cache_level)) {
3796 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
3797 i915_gem_obj_ggtt_offset(obj),
3798 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3799 obj->cache_level);
3800 err++;
3801 continue;
3802 }
3803 }
3804
3805 WARN_ON(err);
3806 #endif
3807 }
3808
3809 /**
3810 * Finds free space in the GTT aperture and binds the object there.
3811 */
3812 static struct i915_vma *
3813 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3814 struct i915_address_space *vm,
3815 unsigned alignment,
3816 uint64_t flags)
3817 {
3818 struct drm_device *dev = obj->base.dev;
3819 struct drm_i915_private *dev_priv = dev->dev_private;
3820 u32 size, fence_size, fence_alignment, unfenced_alignment;
3821 unsigned long start =
3822 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3823 unsigned long end =
3824 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
3825 struct i915_vma *vma;
3826 int ret;
3827
3828 fence_size = i915_gem_get_gtt_size(dev,
3829 obj->base.size,
3830 obj->tiling_mode);
3831 fence_alignment = i915_gem_get_gtt_alignment(dev,
3832 obj->base.size,
3833 obj->tiling_mode, true);
3834 unfenced_alignment =
3835 i915_gem_get_gtt_alignment(dev,
3836 obj->base.size,
3837 obj->tiling_mode, false);
3838
3839 if (alignment == 0)
3840 alignment = flags & PIN_MAPPABLE ? fence_alignment :
3841 unfenced_alignment;
3842 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
3843 DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
3844 return ERR_PTR(-EINVAL);
3845 }
3846
3847 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3848
3849 /* If the object is bigger than the entire aperture, reject it early
3850 * before evicting everything in a vain attempt to find space.
3851 */
3852 if (obj->base.size > end) {
3853 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
3854 obj->base.size,
3855 flags & PIN_MAPPABLE ? "mappable" : "total",
3856 end);
3857 return ERR_PTR(-E2BIG);
3858 }
3859
3860 ret = i915_gem_object_get_pages(obj);
3861 if (ret)
3862 return ERR_PTR(ret);
3863
3864 i915_gem_object_pin_pages(obj);
3865
3866 vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
3867 if (IS_ERR(vma))
3868 goto err_unpin;
3869
3870 search_free:
3871 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3872 size, alignment,
3873 obj->cache_level,
3874 start, end,
3875 DRM_MM_SEARCH_DEFAULT,
3876 DRM_MM_CREATE_DEFAULT);
3877 if (ret) {
3878 ret = i915_gem_evict_something(dev, vm, size, alignment,
3879 obj->cache_level,
3880 start, end,
3881 flags);
3882 if (ret == 0)
3883 goto search_free;
3884
3885 goto err_free_vma;
3886 }
3887 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
3888 obj->cache_level))) {
3889 ret = -EINVAL;
3890 goto err_remove_node;
3891 }
3892
3893 ret = i915_gem_gtt_prepare_object(obj);
3894 if (ret)
3895 goto err_remove_node;
3896
3897 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3898 list_add_tail(&vma->mm_list, &vm->inactive_list);
3899
3900 if (i915_is_ggtt(vm)) {
3901 bool mappable, fenceable;
3902
3903 fenceable = (vma->node.size == fence_size &&
3904 (vma->node.start & (fence_alignment - 1)) == 0);
3905
3906 mappable = (vma->node.start + obj->base.size <=
3907 dev_priv->gtt.mappable_end);
3908
3909 obj->map_and_fenceable = mappable && fenceable;
3910 }
3911
3912 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
3913
3914 trace_i915_vma_bind(vma, flags);
3915 vma->bind_vma(vma, obj->cache_level,
3916 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
3917
3918 i915_gem_verify_gtt(dev);
3919 return vma;
3920
3921 err_remove_node:
3922 drm_mm_remove_node(&vma->node);
3923 err_free_vma:
3924 i915_gem_vma_destroy(vma);
3925 vma = ERR_PTR(ret);
3926 err_unpin:
3927 i915_gem_object_unpin_pages(obj);
3928 return vma;
3929 }
3930
3931 bool
3932 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3933 bool force)
3934 {
3935 /* If we don't have a page list set up, then we're not pinned
3936 * to GPU, and we can ignore the cache flush because it'll happen
3937 * again at bind time.
3938 */
3939 if (obj->pages == NULL)
3940 return false;
3941
3942 /*
3943 * Stolen memory is always coherent with the GPU as it is explicitly
3944 * marked as wc by the system, or the system is cache-coherent.
3945 */
3946 if (obj->stolen)
3947 return false;
3948
3949 /* If the GPU is snooping the contents of the CPU cache,
3950 * we do not need to manually clear the CPU cache lines. However,
3951 * the caches are only snooped when the render cache is
3952 * flushed/invalidated. As we always have to emit invalidations
3953 * and flushes when moving into and out of the RENDER domain, correct
3954 * snooping behaviour occurs naturally as the result of our domain
3955 * tracking.
3956 */
3957 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
3958 return false;
3959
3960 trace_i915_gem_object_clflush(obj);
3961 #ifdef __NetBSD__
3962 drm_clflush_pglist(&obj->igo_pageq);
3963 #else
3964 drm_clflush_sg(obj->pages);
3965 #endif
3966
3967 return true;
3968 }
3969
3970 /** Flushes the GTT write domain for the object if it's dirty. */
3971 static void
3972 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3973 {
3974 uint32_t old_write_domain;
3975
3976 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3977 return;
3978
3979 /* No actual flushing is required for the GTT write domain. Writes
3980 * to it immediately go to main memory as far as we know, so there's
3981 * no chipset flush. It also doesn't land in render cache.
3982 *
3983 * However, we do have to enforce the order so that all writes through
3984 * the GTT land before any writes to the device, such as updates to
3985 * the GATT itself.
3986 */
3987 wmb();
3988
3989 old_write_domain = obj->base.write_domain;
3990 obj->base.write_domain = 0;
3991
3992 trace_i915_gem_object_change_domain(obj,
3993 obj->base.read_domains,
3994 old_write_domain);
3995 }
3996
3997 /** Flushes the CPU write domain for the object if it's dirty. */
3998 static void
3999 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
4000 bool force)
4001 {
4002 uint32_t old_write_domain;
4003
4004 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
4005 return;
4006
4007 if (i915_gem_clflush_object(obj, force))
4008 i915_gem_chipset_flush(obj->base.dev);
4009
4010 old_write_domain = obj->base.write_domain;
4011 obj->base.write_domain = 0;
4012
4013 trace_i915_gem_object_change_domain(obj,
4014 obj->base.read_domains,
4015 old_write_domain);
4016 }
4017
4018 /**
4019 * Moves a single object to the GTT read, and possibly write domain.
4020 *
4021 * This function returns when the move is complete, including waiting on
4022 * flushes to occur.
4023 */
4024 int
4025 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4026 {
4027 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4028 uint32_t old_write_domain, old_read_domains;
4029 int ret;
4030
4031 /* Not valid to be called on unbound objects. */
4032 if (!i915_gem_obj_bound_any(obj))
4033 return -EINVAL;
4034
4035 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4036 return 0;
4037
4038 ret = i915_gem_object_wait_rendering(obj, !write);
4039 if (ret)
4040 return ret;
4041
4042 i915_gem_object_flush_cpu_write_domain(obj, false);
4043
4044 /* Serialise direct access to this object with the barriers for
4045 * coherent writes from the GPU, by effectively invalidating the
4046 * GTT domain upon first access.
4047 */
4048 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
4049 mb();
4050
4051 old_write_domain = obj->base.write_domain;
4052 old_read_domains = obj->base.read_domains;
4053
4054 /* It should now be out of any other write domains, and we can update
4055 * the domain values for our changes.
4056 */
4057 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4058 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4059 if (write) {
4060 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
4061 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
4062 obj->dirty = 1;
4063 }
4064
4065 trace_i915_gem_object_change_domain(obj,
4066 old_read_domains,
4067 old_write_domain);
4068
4069 /* And bump the LRU for this access */
4070 if (i915_gem_object_is_inactive(obj)) {
4071 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4072 if (vma)
4073 list_move_tail(&vma->mm_list,
4074 &dev_priv->gtt.base.inactive_list);
4075
4076 }
4077
4078 return 0;
4079 }
4080
4081 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4082 enum i915_cache_level cache_level)
4083 {
4084 struct drm_device *dev = obj->base.dev;
4085 struct i915_vma *vma, *next;
4086 int ret;
4087
4088 if (obj->cache_level == cache_level)
4089 return 0;
4090
4091 if (i915_gem_obj_is_pinned(obj)) {
4092 DRM_DEBUG("can not change the cache level of pinned objects\n");
4093 return -EBUSY;
4094 }
4095
4096 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4097 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
4098 ret = i915_vma_unbind(vma);
4099 if (ret)
4100 return ret;
4101 }
4102 }
4103
4104 if (i915_gem_obj_bound_any(obj)) {
4105 ret = i915_gem_object_finish_gpu(obj);
4106 if (ret)
4107 return ret;
4108
4109 i915_gem_object_finish_gtt(obj);
4110
4111 /* Before SandyBridge, you could not use tiling or fence
4112 * registers with snooped memory, so relinquish any fences
4113 * currently pointing to our region in the aperture.
4114 */
4115 if (INTEL_INFO(dev)->gen < 6) {
4116 ret = i915_gem_object_put_fence(obj);
4117 if (ret)
4118 return ret;
4119 }
4120
4121 list_for_each_entry(vma, &obj->vma_list, vma_link)
4122 if (drm_mm_node_allocated(&vma->node))
4123 vma->bind_vma(vma, cache_level,
4124 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
4125 }
4126
4127 list_for_each_entry(vma, &obj->vma_list, vma_link)
4128 vma->node.color = cache_level;
4129 obj->cache_level = cache_level;
4130
4131 if (cpu_write_needs_clflush(obj)) {
4132 u32 old_read_domains, old_write_domain;
4133
4134 /* If we're coming from LLC cached, then we haven't
4135 * actually been tracking whether the data is in the
4136 * CPU cache or not, since we only allow one bit set
4137 * in obj->write_domain and have been skipping the clflushes.
4138 * Just set it to the CPU cache for now.
4139 */
4140 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
4141
4142 old_read_domains = obj->base.read_domains;
4143 old_write_domain = obj->base.write_domain;
4144
4145 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4146 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4147
4148 trace_i915_gem_object_change_domain(obj,
4149 old_read_domains,
4150 old_write_domain);
4151 }
4152
4153 i915_gem_verify_gtt(dev);
4154 return 0;
4155 }
4156
4157 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4158 struct drm_file *file)
4159 {
4160 struct drm_i915_gem_caching *args = data;
4161 struct drm_i915_gem_object *obj;
4162 int ret;
4163
4164 ret = i915_mutex_lock_interruptible(dev);
4165 if (ret)
4166 return ret;
4167
4168 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4169 if (&obj->base == NULL) {
4170 ret = -ENOENT;
4171 goto unlock;
4172 }
4173
4174 switch (obj->cache_level) {
4175 case I915_CACHE_LLC:
4176 case I915_CACHE_L3_LLC:
4177 args->caching = I915_CACHING_CACHED;
4178 break;
4179
4180 case I915_CACHE_WT:
4181 args->caching = I915_CACHING_DISPLAY;
4182 break;
4183
4184 default:
4185 args->caching = I915_CACHING_NONE;
4186 break;
4187 }
4188
4189 drm_gem_object_unreference(&obj->base);
4190 unlock:
4191 mutex_unlock(&dev->struct_mutex);
4192 return ret;
4193 }
4194
4195 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4196 struct drm_file *file)
4197 {
4198 struct drm_i915_gem_caching *args = data;
4199 struct drm_i915_gem_object *obj;
4200 enum i915_cache_level level;
4201 int ret;
4202
4203 switch (args->caching) {
4204 case I915_CACHING_NONE:
4205 level = I915_CACHE_NONE;
4206 break;
4207 case I915_CACHING_CACHED:
4208 level = I915_CACHE_LLC;
4209 break;
4210 case I915_CACHING_DISPLAY:
4211 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4212 break;
4213 default:
4214 return -EINVAL;
4215 }
4216
4217 ret = i915_mutex_lock_interruptible(dev);
4218 if (ret)
4219 return ret;
4220
4221 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4222 if (&obj->base == NULL) {
4223 ret = -ENOENT;
4224 goto unlock;
4225 }
4226
4227 ret = i915_gem_object_set_cache_level(obj, level);
4228
4229 drm_gem_object_unreference(&obj->base);
4230 unlock:
4231 mutex_unlock(&dev->struct_mutex);
4232 return ret;
4233 }
4234
4235 static bool is_pin_display(struct drm_i915_gem_object *obj)
4236 {
4237 /* There are 3 sources that pin objects:
4238 * 1. The display engine (scanouts, sprites, cursors);
4239 * 2. Reservations for execbuffer;
4240 * 3. The user.
4241 *
4242 * We can ignore reservations as we hold the struct_mutex and
4243 * are only called outside of the reservation path. The user
4244 * can only increment pin_count once, and so if after
4245 * subtracting the potential reference by the user, any pin_count
4246 * remains, it must be due to another use by the display engine.
4247 */
4248 return i915_gem_obj_to_ggtt(obj)->pin_count - !!obj->user_pin_count;
4249 }
4250
4251 /*
4252 * Prepare buffer for display plane (scanout, cursors, etc).
4253 * Can be called from an uninterruptible phase (modesetting) and allows
4254 * any flushes to be pipelined (for pageflips).
4255 */
4256 int
4257 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4258 u32 alignment,
4259 struct intel_ring_buffer *pipelined)
4260 {
4261 u32 old_read_domains, old_write_domain;
4262 int ret;
4263
4264 if (pipelined != obj->ring) {
4265 ret = i915_gem_object_sync(obj, pipelined);
4266 if (ret)
4267 return ret;
4268 }
4269
4270 /* Mark the pin_display early so that we account for the
4271 * display coherency whilst setting up the cache domains.
4272 */
4273 obj->pin_display = true;
4274
4275 /* The display engine is not coherent with the LLC cache on gen6. As
4276 * a result, we make sure that the pinning that is about to occur is
4277 * done with uncached PTEs. This is lowest common denominator for all
4278 * chipsets.
4279 *
4280 * However for gen6+, we could do better by using the GFDT bit instead
4281 * of uncaching, which would allow us to flush all the LLC-cached data
4282 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4283 */
4284 ret = i915_gem_object_set_cache_level(obj,
4285 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4286 if (ret)
4287 goto err_unpin_display;
4288
4289 /* As the user may map the buffer once pinned in the display plane
4290 * (e.g. libkms for the bootup splash), we have to ensure that we
4291 * always use map_and_fenceable for all scanout buffers.
4292 */
4293 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
4294 if (ret)
4295 goto err_unpin_display;
4296
4297 i915_gem_object_flush_cpu_write_domain(obj, true);
4298
4299 old_write_domain = obj->base.write_domain;
4300 old_read_domains = obj->base.read_domains;
4301
4302 /* It should now be out of any other write domains, and we can update
4303 * the domain values for our changes.
4304 */
4305 obj->base.write_domain = 0;
4306 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4307
4308 trace_i915_gem_object_change_domain(obj,
4309 old_read_domains,
4310 old_write_domain);
4311
4312 return 0;
4313
4314 err_unpin_display:
4315 obj->pin_display = is_pin_display(obj);
4316 return ret;
4317 }
4318
4319 void
4320 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
4321 {
4322 i915_gem_object_ggtt_unpin(obj);
4323 obj->pin_display = is_pin_display(obj);
4324 }
4325
4326 int
4327 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
4328 {
4329 int ret;
4330
4331 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
4332 return 0;
4333
4334 ret = i915_gem_object_wait_rendering(obj, false);
4335 if (ret)
4336 return ret;
4337
4338 /* Ensure that we invalidate the GPU's caches and TLBs. */
4339 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
4340 return 0;
4341 }
4342
4343 /**
4344 * Moves a single object to the CPU read, and possibly write domain.
4345 *
4346 * This function returns when the move is complete, including waiting on
4347 * flushes to occur.
4348 */
4349 int
4350 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4351 {
4352 uint32_t old_write_domain, old_read_domains;
4353 int ret;
4354
4355 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4356 return 0;
4357
4358 ret = i915_gem_object_wait_rendering(obj, !write);
4359 if (ret)
4360 return ret;
4361
4362 i915_gem_object_flush_gtt_write_domain(obj);
4363
4364 old_write_domain = obj->base.write_domain;
4365 old_read_domains = obj->base.read_domains;
4366
4367 /* Flush the CPU cache if it's still invalid. */
4368 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4369 i915_gem_clflush_object(obj, false);
4370
4371 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4372 }
4373
4374 /* It should now be out of any other write domains, and we can update
4375 * the domain values for our changes.
4376 */
4377 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4378
4379 /* If we're writing through the CPU, then the GPU read domains will
4380 * need to be invalidated at next use.
4381 */
4382 if (write) {
4383 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4384 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4385 }
4386
4387 trace_i915_gem_object_change_domain(obj,
4388 old_read_domains,
4389 old_write_domain);
4390
4391 return 0;
4392 }
4393
4394 /* Throttle our rendering by waiting until the ring has completed our requests
4395 * emitted over 20 msec ago.
4396 *
4397 * Note that if we were to use the current jiffies each time around the loop,
4398 * we wouldn't escape the function with any frames outstanding if the time to
4399 * render a frame was over 20ms.
4400 *
4401 * This should get us reasonable parallelism between CPU and GPU but also
4402 * relatively low latency when blocking on a particular request to finish.
4403 */
4404 static int
4405 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4406 {
4407 struct drm_i915_private *dev_priv = dev->dev_private;
4408 struct drm_i915_file_private *file_priv = file->driver_priv;
4409 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
4410 struct drm_i915_gem_request *request;
4411 struct intel_ring_buffer *ring = NULL;
4412 unsigned reset_counter;
4413 u32 seqno = 0;
4414 int ret;
4415
4416 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4417 if (ret)
4418 return ret;
4419
4420 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4421 if (ret)
4422 return ret;
4423
4424 spin_lock(&file_priv->mm.lock);
4425 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4426 if (time_after_eq(request->emitted_jiffies, recent_enough))
4427 break;
4428
4429 ring = request->ring;
4430 seqno = request->seqno;
4431 }
4432 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
4433 spin_unlock(&file_priv->mm.lock);
4434
4435 if (seqno == 0)
4436 return 0;
4437
4438 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
4439 if (ret == 0)
4440 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4441
4442 return ret;
4443 }
4444
4445 static bool
4446 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4447 {
4448 struct drm_i915_gem_object *obj = vma->obj;
4449
4450 if (alignment &&
4451 vma->node.start & (alignment - 1))
4452 return true;
4453
4454 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4455 return true;
4456
4457 if (flags & PIN_OFFSET_BIAS &&
4458 vma->node.start < (flags & PIN_OFFSET_MASK))
4459 return true;
4460
4461 return false;
4462 }
4463
4464 int
4465 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4466 struct i915_address_space *vm,
4467 uint32_t alignment,
4468 uint64_t flags)
4469 {
4470 struct i915_vma *vma;
4471 int ret;
4472
4473 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4474 return -EINVAL;
4475
4476 vma = i915_gem_obj_to_vma(obj, vm);
4477 if (vma) {
4478 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4479 return -EBUSY;
4480
4481 if (i915_vma_misplaced(vma, alignment, flags)) {
4482 WARN(vma->pin_count,
4483 "bo is already pinned with incorrect alignment:"
4484 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
4485 " obj->map_and_fenceable=%d\n",
4486 i915_gem_obj_offset(obj, vm), alignment,
4487 !!(flags & PIN_MAPPABLE),
4488 obj->map_and_fenceable);
4489 ret = i915_vma_unbind(vma);
4490 if (ret)
4491 return ret;
4492
4493 vma = NULL;
4494 }
4495 }
4496
4497 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4498 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
4499 if (IS_ERR(vma))
4500 return PTR_ERR(vma);
4501 }
4502
4503 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
4504 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
4505
4506 vma->pin_count++;
4507 if (flags & PIN_MAPPABLE)
4508 obj->pin_mappable |= true;
4509
4510 return 0;
4511 }
4512
4513 void
4514 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
4515 {
4516 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4517
4518 BUG_ON(!vma);
4519 BUG_ON(vma->pin_count == 0);
4520 BUG_ON(!i915_gem_obj_ggtt_bound(obj));
4521
4522 if (--vma->pin_count == 0)
4523 obj->pin_mappable = false;
4524 }
4525
4526 int
4527 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4528 struct drm_file *file)
4529 {
4530 struct drm_i915_gem_pin *args = data;
4531 struct drm_i915_gem_object *obj;
4532 int ret;
4533
4534 if (INTEL_INFO(dev)->gen >= 6)
4535 return -ENODEV;
4536
4537 ret = i915_mutex_lock_interruptible(dev);
4538 if (ret)
4539 return ret;
4540
4541 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4542 if (&obj->base == NULL) {
4543 ret = -ENOENT;
4544 goto unlock;
4545 }
4546
4547 if (obj->madv != I915_MADV_WILLNEED) {
4548 DRM_DEBUG("Attempting to pin a purgeable buffer\n");
4549 ret = -EFAULT;
4550 goto out;
4551 }
4552
4553 if (obj->pin_filp != NULL && obj->pin_filp != file) {
4554 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n",
4555 args->handle);
4556 ret = -EINVAL;
4557 goto out;
4558 }
4559
4560 if (obj->user_pin_count == ULONG_MAX) {
4561 ret = -EBUSY;
4562 goto out;
4563 }
4564
4565 if (obj->user_pin_count == 0) {
4566 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
4567 if (ret)
4568 goto out;
4569 }
4570
4571 obj->user_pin_count++;
4572 obj->pin_filp = file;
4573
4574 args->offset = i915_gem_obj_ggtt_offset(obj);
4575 out:
4576 drm_gem_object_unreference(&obj->base);
4577 unlock:
4578 mutex_unlock(&dev->struct_mutex);
4579 return ret;
4580 }
4581
4582 int
4583 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4584 struct drm_file *file)
4585 {
4586 struct drm_i915_gem_pin *args = data;
4587 struct drm_i915_gem_object *obj;
4588 int ret;
4589
4590 ret = i915_mutex_lock_interruptible(dev);
4591 if (ret)
4592 return ret;
4593
4594 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4595 if (&obj->base == NULL) {
4596 ret = -ENOENT;
4597 goto unlock;
4598 }
4599
4600 if (obj->pin_filp != file) {
4601 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4602 args->handle);
4603 ret = -EINVAL;
4604 goto out;
4605 }
4606 obj->user_pin_count--;
4607 if (obj->user_pin_count == 0) {
4608 obj->pin_filp = NULL;
4609 i915_gem_object_ggtt_unpin(obj);
4610 }
4611
4612 out:
4613 drm_gem_object_unreference(&obj->base);
4614 unlock:
4615 mutex_unlock(&dev->struct_mutex);
4616 return ret;
4617 }
4618
4619 int
4620 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4621 struct drm_file *file)
4622 {
4623 struct drm_i915_gem_busy *args = data;
4624 struct drm_i915_gem_object *obj;
4625 int ret;
4626
4627 ret = i915_mutex_lock_interruptible(dev);
4628 if (ret)
4629 return ret;
4630
4631 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4632 if (&obj->base == NULL) {
4633 ret = -ENOENT;
4634 goto unlock;
4635 }
4636
4637 /* Count all active objects as busy, even if they are currently not used
4638 * by the gpu. Users of this interface expect objects to eventually
4639 * become non-busy without any further actions, therefore emit any
4640 * necessary flushes here.
4641 */
4642 ret = i915_gem_object_flush_active(obj);
4643
4644 args->busy = obj->active;
4645 if (obj->ring) {
4646 BUILD_BUG_ON(I915_NUM_RINGS > 16);
4647 args->busy |= intel_ring_flag(obj->ring) << 16;
4648 }
4649
4650 drm_gem_object_unreference(&obj->base);
4651 unlock:
4652 mutex_unlock(&dev->struct_mutex);
4653 return ret;
4654 }
4655
4656 int
4657 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4658 struct drm_file *file_priv)
4659 {
4660 return i915_gem_ring_throttle(dev, file_priv);
4661 }
4662
4663 int
4664 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4665 struct drm_file *file_priv)
4666 {
4667 struct drm_i915_gem_madvise *args = data;
4668 struct drm_i915_gem_object *obj;
4669 int ret;
4670
4671 switch (args->madv) {
4672 case I915_MADV_DONTNEED:
4673 case I915_MADV_WILLNEED:
4674 break;
4675 default:
4676 return -EINVAL;
4677 }
4678
4679 ret = i915_mutex_lock_interruptible(dev);
4680 if (ret)
4681 return ret;
4682
4683 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4684 if (&obj->base == NULL) {
4685 ret = -ENOENT;
4686 goto unlock;
4687 }
4688
4689 if (i915_gem_obj_is_pinned(obj)) {
4690 ret = -EINVAL;
4691 goto out;
4692 }
4693
4694 if (obj->madv != __I915_MADV_PURGED)
4695 obj->madv = args->madv;
4696
4697 /* if the object is no longer attached, discard its backing storage */
4698 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
4699 i915_gem_object_truncate(obj);
4700
4701 args->retained = obj->madv != __I915_MADV_PURGED;
4702
4703 out:
4704 drm_gem_object_unreference(&obj->base);
4705 unlock:
4706 mutex_unlock(&dev->struct_mutex);
4707 return ret;
4708 }
4709
4710 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4711 const struct drm_i915_gem_object_ops *ops)
4712 {
4713 INIT_LIST_HEAD(&obj->global_list);
4714 INIT_LIST_HEAD(&obj->ring_list);
4715 INIT_LIST_HEAD(&obj->obj_exec_link);
4716 INIT_LIST_HEAD(&obj->vma_list);
4717
4718 obj->ops = ops;
4719
4720 obj->fence_reg = I915_FENCE_REG_NONE;
4721 obj->madv = I915_MADV_WILLNEED;
4722 /* Avoid an unnecessary call to unbind on the first bind. */
4723 obj->map_and_fenceable = true;
4724
4725 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4726 }
4727
4728 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4729 .get_pages = i915_gem_object_get_pages_gtt,
4730 .put_pages = i915_gem_object_put_pages_gtt,
4731 };
4732
4733 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4734 size_t size)
4735 {
4736 struct drm_i915_gem_object *obj;
4737 #ifdef __NetBSD__
4738 uint64_t maxaddr;
4739 #else
4740 struct address_space *mapping;
4741 gfp_t mask;
4742 #endif
4743
4744 obj = i915_gem_object_alloc(dev);
4745 if (obj == NULL)
4746 return NULL;
4747
4748 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4749 i915_gem_object_free(obj);
4750 return NULL;
4751 }
4752
4753 #ifdef __NetBSD__
4754 /*
4755 * 965GM can't handle >32-bit paddrs; all other models can't
4756 * handle >40-bit paddrs.
4757 *
4758 * XXX I think this table is incomplete. It should be
4759 * synchronized with the other DMA address constraints
4760 * scattered throughout DRM.
4761 *
4762 * XXX DMA limits
4763 */
4764 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev))
4765 maxaddr = 0xffffffffULL;
4766 else
4767 maxaddr = 0xffffffffffULL;
4768 uao_set_pgfl(obj->base.gemo_shm_uao, x86_select_freelist(maxaddr));
4769 #else
4770 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4771 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4772 /* 965gm cannot relocate objects above 4GiB. */
4773 mask &= ~__GFP_HIGHMEM;
4774 mask |= __GFP_DMA32;
4775 }
4776
4777 mapping = file_inode(obj->base.filp)->i_mapping;
4778 mapping_set_gfp_mask(mapping, mask);
4779 #endif
4780
4781 i915_gem_object_init(obj, &i915_gem_object_ops);
4782
4783 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4784 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4785
4786 if (HAS_LLC(dev)) {
4787 /* On some devices, we can have the GPU use the LLC (the CPU
4788 * cache) for about a 10% performance improvement
4789 * compared to uncached. Graphics requests other than
4790 * display scanout are coherent with the CPU in
4791 * accessing this cache. This means in this mode we
4792 * don't need to clflush on the CPU side, and on the
4793 * GPU side we only need to flush internal caches to
4794 * get data visible to the CPU.
4795 *
4796 * However, we maintain the display planes as UC, and so
4797 * need to rebind when first used as such.
4798 */
4799 obj->cache_level = I915_CACHE_LLC;
4800 } else
4801 obj->cache_level = I915_CACHE_NONE;
4802
4803 trace_i915_gem_object_create(obj);
4804
4805 return obj;
4806 }
4807
4808 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4809 {
4810 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4811 struct drm_device *dev = obj->base.dev;
4812 struct drm_i915_private *dev_priv = dev->dev_private;
4813 struct i915_vma *vma, *next;
4814
4815 intel_runtime_pm_get(dev_priv);
4816
4817 trace_i915_gem_object_destroy(obj);
4818
4819 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4820 int ret;
4821
4822 vma->pin_count = 0;
4823 ret = i915_vma_unbind(vma);
4824 if (WARN_ON(ret == -ERESTARTSYS)) {
4825 bool was_interruptible;
4826
4827 was_interruptible = dev_priv->mm.interruptible;
4828 dev_priv->mm.interruptible = false;
4829
4830 WARN_ON(i915_vma_unbind(vma));
4831
4832 dev_priv->mm.interruptible = was_interruptible;
4833 }
4834 }
4835
4836 i915_gem_object_detach_phys(obj);
4837
4838 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4839 * before progressing. */
4840 if (obj->stolen)
4841 i915_gem_object_unpin_pages(obj);
4842
4843 if (WARN_ON(obj->pages_pin_count))
4844 obj->pages_pin_count = 0;
4845 i915_gem_object_put_pages(obj);
4846 i915_gem_object_free_mmap_offset(obj);
4847 i915_gem_object_release_stolen(obj);
4848
4849 BUG_ON(obj->pages);
4850
4851 #ifndef __NetBSD__ /* XXX drm prime */
4852 if (obj->base.import_attach)
4853 drm_prime_gem_destroy(&obj->base, NULL);
4854 #endif
4855
4856 drm_gem_object_release(&obj->base);
4857 i915_gem_info_remove_obj(dev_priv, obj->base.size);
4858
4859 kfree(obj->bit_17);
4860 i915_gem_object_free(obj);
4861
4862 intel_runtime_pm_put(dev_priv);
4863 }
4864
4865 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4866 struct i915_address_space *vm)
4867 {
4868 struct i915_vma *vma;
4869 list_for_each_entry(vma, &obj->vma_list, vma_link)
4870 if (vma->vm == vm)
4871 return vma;
4872
4873 return NULL;
4874 }
4875
4876 void i915_gem_vma_destroy(struct i915_vma *vma)
4877 {
4878 WARN_ON(vma->node.allocated);
4879
4880 /* Keep the vma as a placeholder in the execbuffer reservation lists */
4881 if (!list_empty(&vma->exec_list))
4882 return;
4883
4884 list_del(&vma->vma_link);
4885
4886 kfree(vma);
4887 }
4888
4889 int
4890 i915_gem_suspend(struct drm_device *dev)
4891 {
4892 struct drm_i915_private *dev_priv = dev->dev_private;
4893 int ret = 0;
4894
4895 mutex_lock(&dev->struct_mutex);
4896 if (dev_priv->ums.mm_suspended)
4897 goto err;
4898
4899 ret = i915_gpu_idle(dev);
4900 if (ret)
4901 goto err;
4902
4903 i915_gem_retire_requests(dev);
4904
4905 /* Under UMS, be paranoid and evict. */
4906 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4907 i915_gem_evict_everything(dev);
4908
4909 i915_kernel_lost_context(dev);
4910 i915_gem_cleanup_ringbuffer(dev);
4911
4912 /* Hack! Don't let anybody do execbuf while we don't control the chip.
4913 * We need to replace this with a semaphore, or something.
4914 * And not confound ums.mm_suspended!
4915 */
4916 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev,
4917 DRIVER_MODESET);
4918 mutex_unlock(&dev->struct_mutex);
4919
4920 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
4921 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4922 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
4923
4924 return 0;
4925
4926 err:
4927 mutex_unlock(&dev->struct_mutex);
4928 return ret;
4929 }
4930
4931 int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
4932 {
4933 struct drm_device *dev = ring->dev;
4934 struct drm_i915_private *dev_priv = dev->dev_private;
4935 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
4936 u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
4937 int i, ret;
4938
4939 if (!HAS_L3_DPF(dev) || !remap_info)
4940 return 0;
4941
4942 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
4943 if (ret)
4944 return ret;
4945
4946 /*
4947 * Note: We do not worry about the concurrent register cacheline hang
4948 * here because no other code should access these registers other than
4949 * at initialization time.
4950 */
4951 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
4952 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
4953 intel_ring_emit(ring, reg_base + i);
4954 intel_ring_emit(ring, remap_info[i/4]);
4955 }
4956
4957 intel_ring_advance(ring);
4958
4959 return ret;
4960 }
4961
4962 void i915_gem_init_swizzling(struct drm_device *dev)
4963 {
4964 struct drm_i915_private *dev_priv = dev->dev_private;
4965
4966 if (INTEL_INFO(dev)->gen < 5 ||
4967 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4968 return;
4969
4970 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4971 DISP_TILE_SURFACE_SWIZZLING);
4972
4973 if (IS_GEN5(dev))
4974 return;
4975
4976 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4977 if (IS_GEN6(dev))
4978 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4979 else if (IS_GEN7(dev))
4980 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4981 else if (IS_GEN8(dev))
4982 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4983 else
4984 BUG();
4985 }
4986
4987 static bool
4988 intel_enable_blt(struct drm_device *dev)
4989 {
4990 if (!HAS_BLT(dev))
4991 return false;
4992
4993 /* The blitter was dysfunctional on early prototypes */
4994 if (IS_GEN6(dev) && dev->pdev->revision < 8) {
4995 DRM_INFO("BLT not supported on this pre-production hardware;"
4996 " graphics performance will be degraded.\n");
4997 return false;
4998 }
4999
5000 return true;
5001 }
5002
5003 static int i915_gem_init_rings(struct drm_device *dev)
5004 {
5005 struct drm_i915_private *dev_priv = dev->dev_private;
5006 int ret;
5007
5008 ret = intel_init_render_ring_buffer(dev);
5009 if (ret)
5010 return ret;
5011
5012 if (HAS_BSD(dev)) {
5013 ret = intel_init_bsd_ring_buffer(dev);
5014 if (ret)
5015 goto cleanup_render_ring;
5016 }
5017
5018 if (intel_enable_blt(dev)) {
5019 ret = intel_init_blt_ring_buffer(dev);
5020 if (ret)
5021 goto cleanup_bsd_ring;
5022 }
5023
5024 if (HAS_VEBOX(dev)) {
5025 ret = intel_init_vebox_ring_buffer(dev);
5026 if (ret)
5027 goto cleanup_blt_ring;
5028 }
5029
5030
5031 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
5032 if (ret)
5033 goto cleanup_vebox_ring;
5034
5035 return 0;
5036
5037 cleanup_vebox_ring:
5038 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
5039 cleanup_blt_ring:
5040 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
5041 cleanup_bsd_ring:
5042 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
5043 cleanup_render_ring:
5044 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
5045
5046 return ret;
5047 }
5048
5049 int
5050 i915_gem_init_hw(struct drm_device *dev)
5051 {
5052 struct drm_i915_private *dev_priv = dev->dev_private;
5053 int ret, i;
5054
5055 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
5056 return -EIO;
5057
5058 if (dev_priv->ellc_size)
5059 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5060
5061 if (IS_HASWELL(dev))
5062 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
5063 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5064
5065 if (HAS_PCH_NOP(dev)) {
5066 if (IS_IVYBRIDGE(dev)) {
5067 u32 temp = I915_READ(GEN7_MSG_CTL);
5068 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5069 I915_WRITE(GEN7_MSG_CTL, temp);
5070 } else if (INTEL_INFO(dev)->gen >= 7) {
5071 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5072 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5073 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5074 }
5075 }
5076
5077 i915_gem_init_swizzling(dev);
5078
5079 ret = i915_gem_init_rings(dev);
5080 if (ret)
5081 return ret;
5082
5083 for (i = 0; i < NUM_L3_SLICES(dev); i++)
5084 i915_gem_l3_remap(&dev_priv->ring[RCS], i);
5085
5086 /*
5087 * XXX: Contexts should only be initialized once. Doing a switch to the
5088 * default context switch however is something we'd like to do after
5089 * reset or thaw (the latter may not actually be necessary for HW, but
5090 * goes with our code better). Context switching requires rings (for
5091 * the do_switch), but before enabling PPGTT. So don't move this.
5092 */
5093 ret = i915_gem_context_enable(dev_priv);
5094 if (ret) {
5095 DRM_ERROR("Context enable failed %d\n", ret);
5096 goto err_out;
5097 }
5098
5099 return 0;
5100
5101 err_out:
5102 i915_gem_cleanup_ringbuffer(dev);
5103 return ret;
5104 }
5105
5106 int i915_gem_init(struct drm_device *dev)
5107 {
5108 struct drm_i915_private *dev_priv = dev->dev_private;
5109 int ret;
5110
5111 mutex_lock(&dev->struct_mutex);
5112
5113 if (IS_VALLEYVIEW(dev)) {
5114 /* VLVA0 (potential hack), BIOS isn't actually waking us */
5115 I915_WRITE(VLV_GTLC_WAKE_CTRL, 1);
5116 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10))
5117 DRM_DEBUG_DRIVER("allow wake ack timed out\n");
5118 }
5119 i915_gem_init_global_gtt(dev);
5120
5121 ret = i915_gem_context_init(dev);
5122 if (ret) {
5123 mutex_unlock(&dev->struct_mutex);
5124 return ret;
5125 }
5126
5127 ret = i915_gem_init_hw(dev);
5128 mutex_unlock(&dev->struct_mutex);
5129 if (ret) {
5130 WARN_ON(dev_priv->mm.aliasing_ppgtt);
5131 i915_gem_context_fini(dev);
5132 drm_mm_takedown(&dev_priv->gtt.base.mm);
5133 return ret;
5134 }
5135
5136 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
5137 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5138 dev_priv->dri1.allow_batchbuffer = 1;
5139 return 0;
5140 }
5141
5142 void
5143 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
5144 {
5145 struct drm_i915_private *dev_priv = dev->dev_private;
5146 struct intel_ring_buffer *ring;
5147 int i;
5148
5149 for_each_ring(ring, dev_priv, i)
5150 intel_cleanup_ring_buffer(ring);
5151 }
5152
5153 int
5154 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
5155 struct drm_file *file_priv)
5156 {
5157 struct drm_i915_private *dev_priv = dev->dev_private;
5158 int ret;
5159
5160 if (drm_core_check_feature(dev, DRIVER_MODESET))
5161 return 0;
5162
5163 if (i915_reset_in_progress(&dev_priv->gpu_error)) {
5164 DRM_ERROR("Reenabling wedged hardware, good luck\n");
5165 atomic_set(&dev_priv->gpu_error.reset_counter, 0);
5166 }
5167
5168 mutex_lock(&dev->struct_mutex);
5169 dev_priv->ums.mm_suspended = 0;
5170
5171 ret = i915_gem_init_hw(dev);
5172 if (ret != 0) {
5173 mutex_unlock(&dev->struct_mutex);
5174 return ret;
5175 }
5176
5177 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
5178 mutex_unlock(&dev->struct_mutex);
5179
5180 ret = drm_irq_install(dev);
5181 if (ret)
5182 goto cleanup_ringbuffer;
5183
5184 return 0;
5185
5186 cleanup_ringbuffer:
5187 mutex_lock(&dev->struct_mutex);
5188 i915_gem_cleanup_ringbuffer(dev);
5189 dev_priv->ums.mm_suspended = 1;
5190 mutex_unlock(&dev->struct_mutex);
5191
5192 return ret;
5193 }
5194
5195 int
5196 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
5197 struct drm_file *file_priv)
5198 {
5199 if (drm_core_check_feature(dev, DRIVER_MODESET))
5200 return 0;
5201
5202 drm_irq_uninstall(dev);
5203
5204 return i915_gem_suspend(dev);
5205 }
5206
5207 void
5208 i915_gem_lastclose(struct drm_device *dev)
5209 {
5210 int ret;
5211
5212 if (drm_core_check_feature(dev, DRIVER_MODESET))
5213 return;
5214
5215 ret = i915_gem_suspend(dev);
5216 if (ret)
5217 DRM_ERROR("failed to idle hardware: %d\n", ret);
5218 }
5219
5220 static void
5221 init_ring_lists(struct intel_ring_buffer *ring)
5222 {
5223 INIT_LIST_HEAD(&ring->active_list);
5224 INIT_LIST_HEAD(&ring->request_list);
5225 }
5226
5227 void i915_init_vm(struct drm_i915_private *dev_priv,
5228 struct i915_address_space *vm)
5229 {
5230 if (!i915_is_ggtt(vm))
5231 drm_mm_init(&vm->mm, vm->start, vm->total);
5232 vm->dev = dev_priv->dev;
5233 INIT_LIST_HEAD(&vm->active_list);
5234 INIT_LIST_HEAD(&vm->inactive_list);
5235 INIT_LIST_HEAD(&vm->global_link);
5236 list_add_tail(&vm->global_link, &dev_priv->vm_list);
5237 }
5238
5239 void
5240 i915_gem_load(struct drm_device *dev)
5241 {
5242 struct drm_i915_private *dev_priv = dev->dev_private;
5243 int i;
5244
5245 dev_priv->slab =
5246 kmem_cache_create("i915_gem_object",
5247 sizeof(struct drm_i915_gem_object), 0,
5248 SLAB_HWCACHE_ALIGN,
5249 NULL);
5250
5251 INIT_LIST_HEAD(&dev_priv->vm_list);
5252 i915_init_vm(dev_priv, &dev_priv->gtt.base);
5253
5254 INIT_LIST_HEAD(&dev_priv->context_list);
5255 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5256 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5257 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5258 for (i = 0; i < I915_NUM_RINGS; i++)
5259 init_ring_lists(&dev_priv->ring[i]);
5260 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5261 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5262 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5263 i915_gem_retire_work_handler);
5264 INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5265 i915_gem_idle_work_handler);
5266 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5267
5268 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
5269 if (IS_GEN3(dev)) {
5270 I915_WRITE(MI_ARB_STATE,
5271 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
5272 }
5273
5274 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5275
5276 /* Old X drivers will take 0-2 for front, back, depth buffers */
5277 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5278 dev_priv->fence_reg_start = 3;
5279
5280 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
5281 dev_priv->num_fence_regs = 32;
5282 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
5283 dev_priv->num_fence_regs = 16;
5284 else
5285 dev_priv->num_fence_regs = 8;
5286
5287 /* Initialize fence registers to zero */
5288 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5289 i915_gem_restore_fences(dev);
5290
5291 i915_gem_detect_bit_6_swizzle(dev);
5292 #ifdef __NetBSD__
5293 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip");
5294 spin_lock_init(&dev_priv->pending_flip_lock);
5295 #else
5296 init_waitqueue_head(&dev_priv->pending_flip_queue);
5297 #endif
5298
5299 dev_priv->mm.interruptible = true;
5300
5301 dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
5302 dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
5303 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
5304 register_shrinker(&dev_priv->mm.inactive_shrinker);
5305 }
5306
5307 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5308 {
5309 struct drm_i915_file_private *file_priv = file->driver_priv;
5310
5311 cancel_delayed_work_sync(&file_priv->mm.idle_work);
5312
5313 /* Clean up our request list when the client is going away, so that
5314 * later retire_requests won't dereference our soon-to-be-gone
5315 * file_priv.
5316 */
5317 spin_lock(&file_priv->mm.lock);
5318 while (!list_empty(&file_priv->mm.request_list)) {
5319 struct drm_i915_gem_request *request;
5320
5321 request = list_first_entry(&file_priv->mm.request_list,
5322 struct drm_i915_gem_request,
5323 client_list);
5324 list_del(&request->client_list);
5325 request->file_priv = NULL;
5326 }
5327 spin_unlock(&file_priv->mm.lock);
5328 }
5329
5330 static void
5331 i915_gem_file_idle_work_handler(struct work_struct *work)
5332 {
5333 struct drm_i915_file_private *file_priv =
5334 container_of(work, typeof(*file_priv), mm.idle_work.work);
5335
5336 atomic_set(&file_priv->rps_wait_boost, false);
5337 }
5338
5339 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5340 {
5341 struct drm_i915_file_private *file_priv;
5342 int ret;
5343
5344 DRM_DEBUG_DRIVER("\n");
5345
5346 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5347 if (!file_priv)
5348 return -ENOMEM;
5349
5350 file->driver_priv = file_priv;
5351 file_priv->dev_priv = dev->dev_private;
5352 file_priv->file = file;
5353
5354 spin_lock_init(&file_priv->mm.lock);
5355 INIT_LIST_HEAD(&file_priv->mm.request_list);
5356 INIT_DELAYED_WORK(&file_priv->mm.idle_work,
5357 i915_gem_file_idle_work_handler);
5358
5359 ret = i915_gem_context_open(dev, file);
5360 if (ret)
5361 kfree(file_priv);
5362
5363 return ret;
5364 }
5365
5366 #ifndef __NetBSD__
5367 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
5368 {
5369 if (!mutex_is_locked(mutex))
5370 return false;
5371
5372 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
5373 return mutex->owner == task;
5374 #else
5375 /* Since UP may be pre-empted, we cannot assume that we own the lock */
5376 return false;
5377 #endif
5378 }
5379 #endif
5380
5381 static unsigned long
5382 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
5383 {
5384 #ifdef __NetBSD__ /* XXX shrinkers */
5385 return 0;
5386 #else
5387 struct drm_i915_private *dev_priv =
5388 container_of(shrinker,
5389 struct drm_i915_private,
5390 mm.inactive_shrinker);
5391 struct drm_device *dev = dev_priv->dev;
5392 struct drm_i915_gem_object *obj;
5393 bool unlock = true;
5394 unsigned long count;
5395
5396 if (!mutex_trylock(&dev->struct_mutex)) {
5397 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5398 return 0;
5399
5400 if (dev_priv->mm.shrinker_no_lock_stealing)
5401 return 0;
5402
5403 unlock = false;
5404 }
5405
5406 count = 0;
5407 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
5408 if (obj->pages_pin_count == 0)
5409 count += obj->base.size >> PAGE_SHIFT;
5410
5411 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
5412 if (obj->active)
5413 continue;
5414
5415 if (!i915_gem_obj_is_pinned(obj) && obj->pages_pin_count == 0)
5416 count += obj->base.size >> PAGE_SHIFT;
5417 }
5418
5419 if (unlock)
5420 mutex_unlock(&dev->struct_mutex);
5421
5422 return count;
5423 #endif
5424 }
5425
5426 /* All the new VM stuff */
5427 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
5428 struct i915_address_space *vm)
5429 {
5430 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5431 struct i915_vma *vma;
5432
5433 if (!dev_priv->mm.aliasing_ppgtt ||
5434 vm == &dev_priv->mm.aliasing_ppgtt->base)
5435 vm = &dev_priv->gtt.base;
5436
5437 BUG_ON(list_empty(&o->vma_list));
5438 list_for_each_entry(vma, &o->vma_list, vma_link) {
5439 if (vma->vm == vm)
5440 return vma->node.start;
5441
5442 }
5443 return -1;
5444 }
5445
5446 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5447 struct i915_address_space *vm)
5448 {
5449 struct i915_vma *vma;
5450
5451 list_for_each_entry(vma, &o->vma_list, vma_link)
5452 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5453 return true;
5454
5455 return false;
5456 }
5457
5458 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5459 {
5460 struct i915_vma *vma;
5461
5462 list_for_each_entry(vma, &o->vma_list, vma_link)
5463 if (drm_mm_node_allocated(&vma->node))
5464 return true;
5465
5466 return false;
5467 }
5468
5469 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5470 struct i915_address_space *vm)
5471 {
5472 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5473 struct i915_vma *vma;
5474
5475 if (!dev_priv->mm.aliasing_ppgtt ||
5476 vm == &dev_priv->mm.aliasing_ppgtt->base)
5477 vm = &dev_priv->gtt.base;
5478
5479 BUG_ON(list_empty(&o->vma_list));
5480
5481 list_for_each_entry(vma, &o->vma_list, vma_link)
5482 if (vma->vm == vm)
5483 return vma->node.size;
5484
5485 return 0;
5486 }
5487 #endif
5488
5489 static unsigned long
5490 i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
5491 {
5492 #ifdef __NetBSD__ /* XXX shrinkers */
5493 return 0;
5494 #else
5495 struct drm_i915_private *dev_priv =
5496 container_of(shrinker,
5497 struct drm_i915_private,
5498 mm.inactive_shrinker);
5499 struct drm_device *dev = dev_priv->dev;
5500 unsigned long freed;
5501 bool unlock = true;
5502
5503 if (!mutex_trylock(&dev->struct_mutex)) {
5504 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5505 return SHRINK_STOP;
5506
5507 if (dev_priv->mm.shrinker_no_lock_stealing)
5508 return SHRINK_STOP;
5509
5510 unlock = false;
5511 }
5512
5513 freed = i915_gem_purge(dev_priv, sc->nr_to_scan);
5514 if (freed < sc->nr_to_scan)
5515 freed += __i915_gem_shrink(dev_priv,
5516 sc->nr_to_scan - freed,
5517 false);
5518 if (freed < sc->nr_to_scan)
5519 freed += i915_gem_shrink_all(dev_priv);
5520
5521 if (unlock)
5522 mutex_unlock(&dev->struct_mutex);
5523
5524 return freed;
5525 }
5526
5527 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
5528 {
5529 struct i915_vma *vma;
5530
5531 if (WARN_ON(list_empty(&obj->vma_list)))
5532 return NULL;
5533
5534 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
5535 if (vma->vm != obj_to_ggtt(obj))
5536 return NULL;
5537
5538 return vma;
5539 }
5540