i915_gem.c revision 1.14.2.2 1 /*
2 * Copyright 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric (at) anholt.net>
25 *
26 */
27
28 #ifdef __NetBSD__
29 #if 0 /* XXX uvmhist option? */
30 #include "opt_uvmhist.h"
31 #endif
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35
36 #include <uvm/uvm.h>
37 #include <uvm/uvm_extern.h>
38 #include <uvm/uvm_fault.h>
39 #include <uvm/uvm_page.h>
40 #include <uvm/uvm_pmap.h>
41 #include <uvm/uvm_prot.h>
42
43 #include <drm/bus_dma_hacks.h>
44 #endif
45
46 #include <drm/drmP.h>
47 #include <drm/drm_vma_manager.h>
48 #include <drm/i915_drm.h>
49 #include "i915_drv.h"
50 #include "i915_trace.h"
51 #include "intel_drv.h"
52 #include <linux/shmem_fs.h>
53 #include <linux/slab.h>
54 #include <linux/swap.h>
55 #include <linux/pci.h>
56 #include <linux/dma-buf.h>
57 #include <linux/errno.h>
58 #include <linux/time.h>
59 #include <linux/err.h>
60 #include <linux/bitops.h>
61 #include <linux/printk.h>
62 #include <asm/param.h>
63 #include <asm/page.h>
64
65 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
66 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
67 bool force);
68 static __must_check int
69 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
70 bool readonly);
71
72 static void i915_gem_write_fence(struct drm_device *dev, int reg,
73 struct drm_i915_gem_object *obj);
74 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
75 struct drm_i915_fence_reg *fence,
76 bool enable);
77
78 static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
79 struct shrink_control *sc);
80 static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
81 struct shrink_control *sc);
82 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
83 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
84 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
85 static void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
86
87 static bool cpu_cache_is_coherent(struct drm_device *dev,
88 enum i915_cache_level level)
89 {
90 return HAS_LLC(dev) || level != I915_CACHE_NONE;
91 }
92
93 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
94 {
95 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
96 return true;
97
98 return obj->pin_display;
99 }
100
101 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
102 {
103 if (obj->tiling_mode)
104 i915_gem_release_mmap(obj);
105
106 /* As we do not have an associated fence register, we will force
107 * a tiling change if we ever need to acquire one.
108 */
109 obj->fence_dirty = false;
110 obj->fence_reg = I915_FENCE_REG_NONE;
111 }
112
113 /* some bookkeeping */
114 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
115 size_t size)
116 {
117 spin_lock(&dev_priv->mm.object_stat_lock);
118 dev_priv->mm.object_count++;
119 dev_priv->mm.object_memory += size;
120 spin_unlock(&dev_priv->mm.object_stat_lock);
121 }
122
123 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
124 size_t size)
125 {
126 spin_lock(&dev_priv->mm.object_stat_lock);
127 dev_priv->mm.object_count--;
128 dev_priv->mm.object_memory -= size;
129 spin_unlock(&dev_priv->mm.object_stat_lock);
130 }
131
132 static int
133 i915_gem_wait_for_error(struct i915_gpu_error *error)
134 {
135 int ret;
136
137 #define EXIT_COND (!i915_reset_in_progress(error) || \
138 i915_terminally_wedged(error))
139 if (EXIT_COND)
140 return 0;
141
142 /*
143 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
144 * userspace. If it takes that long something really bad is going on and
145 * we should simply try to bail out and fail as gracefully as possible.
146 */
147 #ifdef __NetBSD__
148 spin_lock(&error->reset_lock);
149 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &error->reset_queue, &error->reset_lock,
150 10*HZ, EXIT_COND);
151 spin_unlock(&error->reset_lock);
152 #else
153 ret = wait_event_interruptible_timeout(error->reset_queue,
154 EXIT_COND,
155 10*HZ);
156 #endif
157 if (ret == 0) {
158 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
159 return -EIO;
160 } else if (ret < 0) {
161 return ret;
162 }
163 #undef EXIT_COND
164
165 return 0;
166 }
167
168 int i915_mutex_lock_interruptible(struct drm_device *dev)
169 {
170 struct drm_i915_private *dev_priv = dev->dev_private;
171 int ret;
172
173 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
174 if (ret)
175 return ret;
176
177 ret = mutex_lock_interruptible(&dev->struct_mutex);
178 if (ret)
179 return ret;
180
181 WARN_ON(i915_verify_lists(dev));
182 return 0;
183 }
184
185 static inline bool
186 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
187 {
188 return i915_gem_obj_bound_any(obj) && !obj->active;
189 }
190
191 int
192 i915_gem_init_ioctl(struct drm_device *dev, void *data,
193 struct drm_file *file)
194 {
195 struct drm_i915_private *dev_priv = dev->dev_private;
196 struct drm_i915_gem_init *args = data;
197
198 if (drm_core_check_feature(dev, DRIVER_MODESET))
199 return -ENODEV;
200
201 if (args->gtt_start >= args->gtt_end ||
202 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
203 return -EINVAL;
204
205 /* GEM with user mode setting was never supported on ilk and later. */
206 if (INTEL_INFO(dev)->gen >= 5)
207 return -ENODEV;
208
209 mutex_lock(&dev->struct_mutex);
210 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
211 args->gtt_end);
212 dev_priv->gtt.mappable_end = args->gtt_end;
213 mutex_unlock(&dev->struct_mutex);
214
215 return 0;
216 }
217
218 int
219 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
220 struct drm_file *file)
221 {
222 struct drm_i915_private *dev_priv = dev->dev_private;
223 struct drm_i915_gem_get_aperture *args = data;
224 struct drm_i915_gem_object *obj;
225 size_t pinned;
226
227 pinned = 0;
228 mutex_lock(&dev->struct_mutex);
229 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
230 if (i915_gem_obj_is_pinned(obj))
231 pinned += i915_gem_obj_ggtt_size(obj);
232 mutex_unlock(&dev->struct_mutex);
233
234 args->aper_size = dev_priv->gtt.base.total;
235 args->aper_available_size = args->aper_size - pinned;
236
237 return 0;
238 }
239
240 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
241 {
242 drm_dma_handle_t *phys = obj->phys_handle;
243
244 if (!phys)
245 return;
246
247 if (obj->madv == I915_MADV_WILLNEED) {
248 #ifdef __NetBSD__
249 const char *vaddr = phys->vaddr;
250 unsigned i;
251
252 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
253 struct pglist pages;
254 int error;
255
256 TAILQ_INIT(&pages);
257 error = uvm_obj_wirepages(obj->base.gemo_shm_uao,
258 i*PAGE_SIZE, (i+1)*PAGE_SIZE, &pages);
259 if (error)
260 continue;
261
262 struct vm_page *const vm_page = TAILQ_FIRST(&pages);
263 struct page *const page = container_of(vm_page,
264 struct page, p_vmp);
265 char *const dst = kmap_atomic(page);
266 (void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE);
267 drm_clflush_virt_range(dst, PAGE_SIZE);
268 kunmap_atomic(dst);
269
270 vm_page->flags &= ~PG_CLEAN;
271 /* XXX mark page accessed */
272 uvm_obj_unwirepages(obj->base.gemo_shm_uao,
273 i*PAGE_SIZE, (i+1)*PAGE_SIZE);
274 }
275 #else
276 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
277 char *vaddr = phys->vaddr;
278 int i;
279
280 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
281 struct page *page = shmem_read_mapping_page(mapping, i);
282 if (!IS_ERR(page)) {
283 char *dst = kmap_atomic(page);
284 memcpy(dst, vaddr, PAGE_SIZE);
285 drm_clflush_virt_range(dst, PAGE_SIZE);
286 kunmap_atomic(dst);
287
288 set_page_dirty(page);
289 mark_page_accessed(page);
290 page_cache_release(page);
291 }
292 vaddr += PAGE_SIZE;
293 }
294 #endif
295 i915_gem_chipset_flush(obj->base.dev);
296 }
297
298 #ifndef __NetBSD__
299 #ifdef CONFIG_X86
300 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
301 #endif
302 #endif
303 drm_pci_free(obj->base.dev, phys);
304 obj->phys_handle = NULL;
305 }
306
307 int
308 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
309 int align)
310 {
311 drm_dma_handle_t *phys;
312 #ifndef __NetBSD__
313 struct address_space *mapping;
314 #endif
315 char *vaddr;
316 int i;
317
318 if (obj->phys_handle) {
319 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
320 return -EBUSY;
321
322 return 0;
323 }
324
325 if (obj->madv != I915_MADV_WILLNEED)
326 return -EFAULT;
327
328 #ifdef __NetBSD__
329 if (obj->base.gemo_shm_uao == NULL)
330 return -EINVAL;
331 #else
332 if (obj->base.filp == NULL)
333 return -EINVAL;
334 #endif
335
336 /* create a new object */
337 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
338 if (!phys)
339 return -ENOMEM;
340
341 vaddr = phys->vaddr;
342 #ifndef __NetBSD__
343 #ifdef CONFIG_X86
344 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
345 #endif
346 mapping = file_inode(obj->base.filp)->i_mapping;
347 #endif
348 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
349 struct page *page;
350 char *src;
351
352 #ifdef __NetBSD__
353 struct pglist pages;
354 int ret;
355
356 TAILQ_INIT(&pages);
357
358 /* XXX errno NetBSD->Linux */
359 ret = -uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
360 (i+1)*PAGE_SIZE, &pages);
361 if (ret) {
362 drm_pci_free(obj->base.dev, phys);
363 return ret;
364 }
365 KASSERT(!TAILQ_EMPTY(&pages));
366 page = container_of(TAILQ_FIRST(&pages), struct page, p_vmp);
367 #else
368 page = shmem_read_mapping_page(mapping, i);
369 if (IS_ERR(page)) {
370 #ifdef CONFIG_X86
371 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
372 #endif
373 drm_pci_free(obj->base.dev, phys);
374 return PTR_ERR(page);
375 }
376 #endif /* defined(__NetBSD__) */
377
378 src = kmap_atomic(page);
379 memcpy(vaddr, src, PAGE_SIZE);
380 kunmap_atomic(src);
381
382 #ifndef __NetBSD__
383 mark_page_accessed(page);
384 page_cache_release(page);
385 #endif
386
387 vaddr += PAGE_SIZE;
388 }
389
390 obj->phys_handle = phys;
391 return 0;
392 }
393
394 static int
395 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
396 struct drm_i915_gem_pwrite *args,
397 struct drm_file *file_priv)
398 {
399 struct drm_device *dev = obj->base.dev;
400 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset;
401 char __user *user_data = to_user_ptr(args->data_ptr);
402
403 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
404 unsigned long unwritten;
405
406 /* The physical object once assigned is fixed for the lifetime
407 * of the obj, so we can safely drop the lock and continue
408 * to access vaddr.
409 */
410 mutex_unlock(&dev->struct_mutex);
411 unwritten = copy_from_user(vaddr, user_data, args->size);
412 mutex_lock(&dev->struct_mutex);
413 if (unwritten)
414 return -EFAULT;
415 }
416
417 i915_gem_chipset_flush(dev);
418 return 0;
419 }
420
421 void *i915_gem_object_alloc(struct drm_device *dev)
422 {
423 struct drm_i915_private *dev_priv = dev->dev_private;
424 return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL);
425 }
426
427 void i915_gem_object_free(struct drm_i915_gem_object *obj)
428 {
429 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
430 kmem_cache_free(dev_priv->slab, obj);
431 }
432
433 static int
434 i915_gem_create(struct drm_file *file,
435 struct drm_device *dev,
436 uint64_t size,
437 uint32_t *handle_p)
438 {
439 struct drm_i915_gem_object *obj;
440 int ret;
441 u32 handle;
442
443 size = roundup(size, PAGE_SIZE);
444 if (size == 0)
445 return -EINVAL;
446
447 /* Allocate the new object */
448 obj = i915_gem_alloc_object(dev, size);
449 if (obj == NULL)
450 return -ENOMEM;
451
452 ret = drm_gem_handle_create(file, &obj->base, &handle);
453 /* drop reference from allocate - handle holds it now */
454 drm_gem_object_unreference_unlocked(&obj->base);
455 if (ret)
456 return ret;
457
458 *handle_p = handle;
459 return 0;
460 }
461
462 int
463 i915_gem_dumb_create(struct drm_file *file,
464 struct drm_device *dev,
465 struct drm_mode_create_dumb *args)
466 {
467 /* have to work out size/pitch and return them */
468 #ifdef __NetBSD__ /* ALIGN means something else. */
469 args->pitch = round_up(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
470 #else
471 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
472 #endif
473 args->size = args->pitch * args->height;
474 return i915_gem_create(file, dev,
475 args->size, &args->handle);
476 }
477
478 /**
479 * Creates a new mm object and returns a handle to it.
480 */
481 int
482 i915_gem_create_ioctl(struct drm_device *dev, void *data,
483 struct drm_file *file)
484 {
485 struct drm_i915_gem_create *args = data;
486
487 return i915_gem_create(file, dev,
488 args->size, &args->handle);
489 }
490
491 static inline int
492 __copy_to_user_swizzled(char __user *cpu_vaddr,
493 const char *gpu_vaddr, int gpu_offset,
494 int length)
495 {
496 int ret, cpu_offset = 0;
497
498 while (length > 0) {
499 #ifdef __NetBSD__ /* XXX ALIGN means something else. */
500 int cacheline_end = round_up(gpu_offset + 1, 64);
501 #else
502 int cacheline_end = ALIGN(gpu_offset + 1, 64);
503 #endif
504 int this_length = min(cacheline_end - gpu_offset, length);
505 int swizzled_gpu_offset = gpu_offset ^ 64;
506
507 ret = __copy_to_user(cpu_vaddr + cpu_offset,
508 gpu_vaddr + swizzled_gpu_offset,
509 this_length);
510 if (ret)
511 return ret + length;
512
513 cpu_offset += this_length;
514 gpu_offset += this_length;
515 length -= this_length;
516 }
517
518 return 0;
519 }
520
521 static inline int
522 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
523 const char __user *cpu_vaddr,
524 int length)
525 {
526 int ret, cpu_offset = 0;
527
528 while (length > 0) {
529 #ifdef __NetBSD__ /* XXX ALIGN means something else. */
530 int cacheline_end = round_up(gpu_offset + 1, 64);
531 #else
532 int cacheline_end = ALIGN(gpu_offset + 1, 64);
533 #endif
534 int this_length = min(cacheline_end - gpu_offset, length);
535 int swizzled_gpu_offset = gpu_offset ^ 64;
536
537 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
538 cpu_vaddr + cpu_offset,
539 this_length);
540 if (ret)
541 return ret + length;
542
543 cpu_offset += this_length;
544 gpu_offset += this_length;
545 length -= this_length;
546 }
547
548 return 0;
549 }
550
551 /*
552 * Pins the specified object's pages and synchronizes the object with
553 * GPU accesses. Sets needs_clflush to non-zero if the caller should
554 * flush the object from the CPU cache.
555 */
556 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
557 int *needs_clflush)
558 {
559 int ret;
560
561 *needs_clflush = 0;
562
563 #ifdef __NetBSD__
564 if (obj->base.gemo_shm_uao == NULL)
565 return -EINVAL;
566 #else
567 if (!obj->base.filp)
568 return -EINVAL;
569 #endif
570
571 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
572 /* If we're not in the cpu read domain, set ourself into the gtt
573 * read domain and manually flush cachelines (if required). This
574 * optimizes for the case when the gpu will dirty the data
575 * anyway again before the next pread happens. */
576 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
577 obj->cache_level);
578 ret = i915_gem_object_wait_rendering(obj, true);
579 if (ret)
580 return ret;
581 }
582
583 ret = i915_gem_object_get_pages(obj);
584 if (ret)
585 return ret;
586
587 i915_gem_object_pin_pages(obj);
588
589 return ret;
590 }
591
592 /* Per-page copy function for the shmem pread fastpath.
593 * Flushes invalid cachelines before reading the target if
594 * needs_clflush is set. */
595 static int
596 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
597 char __user *user_data,
598 bool page_do_bit17_swizzling, bool needs_clflush)
599 {
600 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
601 return -EFAULT;
602 #else
603 char *vaddr;
604 int ret;
605
606 if (unlikely(page_do_bit17_swizzling))
607 return -EINVAL;
608
609 vaddr = kmap_atomic(page);
610 if (needs_clflush)
611 drm_clflush_virt_range(vaddr + shmem_page_offset,
612 page_length);
613 ret = __copy_to_user_inatomic(user_data,
614 vaddr + shmem_page_offset,
615 page_length);
616 kunmap_atomic(vaddr);
617
618 return ret ? -EFAULT : 0;
619 #endif
620 }
621
622 static void
623 shmem_clflush_swizzled_range(char *addr, unsigned long length,
624 bool swizzled)
625 {
626 if (unlikely(swizzled)) {
627 unsigned long start = (unsigned long) addr;
628 unsigned long end = (unsigned long) addr + length;
629
630 /* For swizzling simply ensure that we always flush both
631 * channels. Lame, but simple and it works. Swizzled
632 * pwrite/pread is far from a hotpath - current userspace
633 * doesn't use it at all. */
634 start = round_down(start, 128);
635 end = round_up(end, 128);
636
637 drm_clflush_virt_range((void *)start, end - start);
638 } else {
639 drm_clflush_virt_range(addr, length);
640 }
641
642 }
643
644 /* Only difference to the fast-path function is that this can handle bit17
645 * and uses non-atomic copy and kmap functions. */
646 static int
647 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
648 char __user *user_data,
649 bool page_do_bit17_swizzling, bool needs_clflush)
650 {
651 char *vaddr;
652 int ret;
653
654 vaddr = kmap(page);
655 if (needs_clflush)
656 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
657 page_length,
658 page_do_bit17_swizzling);
659
660 if (page_do_bit17_swizzling)
661 ret = __copy_to_user_swizzled(user_data,
662 vaddr, shmem_page_offset,
663 page_length);
664 else
665 ret = __copy_to_user(user_data,
666 vaddr + shmem_page_offset,
667 page_length);
668 kunmap(page);
669
670 return ret ? - EFAULT : 0;
671 }
672
673 static int
674 i915_gem_shmem_pread(struct drm_device *dev,
675 struct drm_i915_gem_object *obj,
676 struct drm_i915_gem_pread *args,
677 struct drm_file *file)
678 {
679 char __user *user_data;
680 ssize_t remain;
681 loff_t offset;
682 int shmem_page_offset, page_length, ret = 0;
683 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
684 #ifndef __NetBSD__ /* XXX */
685 int prefaulted = 0;
686 #endif
687 int needs_clflush = 0;
688 #ifndef __NetBSD__
689 struct sg_page_iter sg_iter;
690 #endif
691
692 user_data = to_user_ptr(args->data_ptr);
693 remain = args->size;
694
695 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
696
697 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
698 if (ret)
699 return ret;
700
701 offset = args->offset;
702
703 #ifdef __NetBSD__
704 /*
705 * XXX This is a big #ifdef with a lot of duplicated code, but
706 * factoring out the loop head -- which is all that
707 * substantially differs -- is probably more trouble than it's
708 * worth at the moment.
709 */
710 while (0 < remain) {
711 /* Get the next page. */
712 shmem_page_offset = offset_in_page(offset);
713 KASSERT(shmem_page_offset < PAGE_SIZE);
714 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
715 struct page *const page = i915_gem_object_get_page(obj,
716 atop(offset));
717
718 /* Decide whether to swizzle bit 17. */
719 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
720 (page_to_phys(page) & (1 << 17)) != 0;
721
722 /* Try the fast path. */
723 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
724 user_data, page_do_bit17_swizzling, needs_clflush);
725 if (ret == 0)
726 goto next_page;
727
728 /* Fast path failed. Try the slow path. */
729 mutex_unlock(&dev->struct_mutex);
730 /* XXX prefault */
731 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
732 user_data, page_do_bit17_swizzling, needs_clflush);
733 mutex_lock(&dev->struct_mutex);
734 if (ret)
735 goto out;
736
737 next_page: KASSERT(page_length <= remain);
738 remain -= page_length;
739 user_data += page_length;
740 offset += page_length;
741 }
742 #else
743 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
744 offset >> PAGE_SHIFT) {
745 struct page *page = sg_page_iter_page(&sg_iter);
746
747 if (remain <= 0)
748 break;
749
750 /* Operation in this page
751 *
752 * shmem_page_offset = offset within page in shmem file
753 * page_length = bytes to copy for this page
754 */
755 shmem_page_offset = offset_in_page(offset);
756 page_length = remain;
757 if ((shmem_page_offset + page_length) > PAGE_SIZE)
758 page_length = PAGE_SIZE - shmem_page_offset;
759
760 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
761 (page_to_phys(page) & (1 << 17)) != 0;
762
763 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
764 user_data, page_do_bit17_swizzling,
765 needs_clflush);
766 if (ret == 0)
767 goto next_page;
768
769 mutex_unlock(&dev->struct_mutex);
770
771 if (likely(!i915.prefault_disable) && !prefaulted) {
772 ret = fault_in_multipages_writeable(user_data, remain);
773 /* Userspace is tricking us, but we've already clobbered
774 * its pages with the prefault and promised to write the
775 * data up to the first fault. Hence ignore any errors
776 * and just continue. */
777 (void)ret;
778 prefaulted = 1;
779 }
780
781 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
782 user_data, page_do_bit17_swizzling,
783 needs_clflush);
784
785 mutex_lock(&dev->struct_mutex);
786
787 if (ret)
788 goto out;
789
790 next_page:
791 remain -= page_length;
792 user_data += page_length;
793 offset += page_length;
794 }
795 #endif
796
797 out:
798 i915_gem_object_unpin_pages(obj);
799
800 return ret;
801 }
802
803 /**
804 * Reads data from the object referenced by handle.
805 *
806 * On error, the contents of *data are undefined.
807 */
808 int
809 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
810 struct drm_file *file)
811 {
812 struct drm_i915_gem_pread *args = data;
813 struct drm_i915_gem_object *obj;
814 int ret = 0;
815
816 if (args->size == 0)
817 return 0;
818
819 if (!access_ok(VERIFY_WRITE,
820 to_user_ptr(args->data_ptr),
821 args->size))
822 return -EFAULT;
823
824 ret = i915_mutex_lock_interruptible(dev);
825 if (ret)
826 return ret;
827
828 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
829 if (&obj->base == NULL) {
830 ret = -ENOENT;
831 goto unlock;
832 }
833
834 /* Bounds check source. */
835 if (args->offset > obj->base.size ||
836 args->size > obj->base.size - args->offset) {
837 ret = -EINVAL;
838 goto out;
839 }
840
841 /* prime objects have no backing filp to GEM pread/pwrite
842 * pages from.
843 */
844 #ifdef __NetBSD__
845 /* Also stolen objects. */
846 if (obj->base.gemo_shm_uao == NULL) {
847 ret = -EINVAL;
848 goto out;
849 }
850 #else
851 if (!obj->base.filp) {
852 ret = -EINVAL;
853 goto out;
854 }
855 #endif
856
857 trace_i915_gem_object_pread(obj, args->offset, args->size);
858
859 ret = i915_gem_shmem_pread(dev, obj, args, file);
860
861 out:
862 drm_gem_object_unreference(&obj->base);
863 unlock:
864 mutex_unlock(&dev->struct_mutex);
865 return ret;
866 }
867
868 /* This is the fast write path which cannot handle
869 * page faults in the source data
870 */
871
872 static inline int
873 fast_user_write(struct io_mapping *mapping,
874 loff_t page_base, int page_offset,
875 char __user *user_data,
876 int length)
877 {
878 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
879 return -EFAULT;
880 #else
881 void __iomem *vaddr_atomic;
882 void *vaddr;
883 unsigned long unwritten;
884
885 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
886 /* We can use the cpu mem copy function because this is X86. */
887 vaddr = (void __force*)vaddr_atomic + page_offset;
888 unwritten = __copy_from_user_inatomic_nocache(vaddr,
889 user_data, length);
890 io_mapping_unmap_atomic(vaddr_atomic);
891 return unwritten;
892 #endif
893 }
894
895 /**
896 * This is the fast pwrite path, where we copy the data directly from the
897 * user into the GTT, uncached.
898 */
899 static int
900 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
901 struct drm_i915_gem_object *obj,
902 struct drm_i915_gem_pwrite *args,
903 struct drm_file *file)
904 {
905 struct drm_i915_private *dev_priv = dev->dev_private;
906 ssize_t remain;
907 loff_t offset, page_base;
908 char __user *user_data;
909 int page_offset, page_length, ret;
910
911 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
912 if (ret)
913 goto out;
914
915 ret = i915_gem_object_set_to_gtt_domain(obj, true);
916 if (ret)
917 goto out_unpin;
918
919 ret = i915_gem_object_put_fence(obj);
920 if (ret)
921 goto out_unpin;
922
923 user_data = to_user_ptr(args->data_ptr);
924 remain = args->size;
925
926 offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
927
928 while (remain > 0) {
929 /* Operation in this page
930 *
931 * page_base = page offset within aperture
932 * page_offset = offset within page
933 * page_length = bytes to copy for this page
934 */
935 page_base = offset & PAGE_MASK;
936 page_offset = offset_in_page(offset);
937 page_length = remain;
938 if ((page_offset + remain) > PAGE_SIZE)
939 page_length = PAGE_SIZE - page_offset;
940
941 /* If we get a fault while copying data, then (presumably) our
942 * source page isn't available. Return the error and we'll
943 * retry in the slow path.
944 */
945 if (fast_user_write(dev_priv->gtt.mappable, page_base,
946 page_offset, user_data, page_length)) {
947 ret = -EFAULT;
948 goto out_unpin;
949 }
950
951 remain -= page_length;
952 user_data += page_length;
953 offset += page_length;
954 }
955
956 out_unpin:
957 i915_gem_object_ggtt_unpin(obj);
958 out:
959 return ret;
960 }
961
962 /* Per-page copy function for the shmem pwrite fastpath.
963 * Flushes invalid cachelines before writing to the target if
964 * needs_clflush_before is set and flushes out any written cachelines after
965 * writing if needs_clflush is set. */
966 static int
967 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
968 char __user *user_data,
969 bool page_do_bit17_swizzling,
970 bool needs_clflush_before,
971 bool needs_clflush_after)
972 {
973 #ifdef __NetBSD__
974 return -EFAULT;
975 #else
976 char *vaddr;
977 int ret;
978
979 if (unlikely(page_do_bit17_swizzling))
980 return -EINVAL;
981
982 vaddr = kmap_atomic(page);
983 if (needs_clflush_before)
984 drm_clflush_virt_range(vaddr + shmem_page_offset,
985 page_length);
986 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
987 user_data, page_length);
988 if (needs_clflush_after)
989 drm_clflush_virt_range(vaddr + shmem_page_offset,
990 page_length);
991 kunmap_atomic(vaddr);
992
993 return ret ? -EFAULT : 0;
994 #endif
995 }
996
997 /* Only difference to the fast-path function is that this can handle bit17
998 * and uses non-atomic copy and kmap functions. */
999 static int
1000 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1001 char __user *user_data,
1002 bool page_do_bit17_swizzling,
1003 bool needs_clflush_before,
1004 bool needs_clflush_after)
1005 {
1006 char *vaddr;
1007 int ret;
1008
1009 vaddr = kmap(page);
1010 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1011 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1012 page_length,
1013 page_do_bit17_swizzling);
1014 if (page_do_bit17_swizzling)
1015 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
1016 user_data,
1017 page_length);
1018 else
1019 ret = __copy_from_user(vaddr + shmem_page_offset,
1020 user_data,
1021 page_length);
1022 if (needs_clflush_after)
1023 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1024 page_length,
1025 page_do_bit17_swizzling);
1026 kunmap(page);
1027
1028 return ret ? -EFAULT : 0;
1029 }
1030
1031 static int
1032 i915_gem_shmem_pwrite(struct drm_device *dev,
1033 struct drm_i915_gem_object *obj,
1034 struct drm_i915_gem_pwrite *args,
1035 struct drm_file *file)
1036 {
1037 ssize_t remain;
1038 loff_t offset;
1039 char __user *user_data;
1040 int shmem_page_offset, page_length, ret = 0;
1041 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1042 int hit_slowpath = 0;
1043 int needs_clflush_after = 0;
1044 int needs_clflush_before = 0;
1045 #ifndef __NetBSD__
1046 struct sg_page_iter sg_iter;
1047 #endif
1048
1049 user_data = to_user_ptr(args->data_ptr);
1050 remain = args->size;
1051
1052 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1053
1054 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1055 /* If we're not in the cpu write domain, set ourself into the gtt
1056 * write domain and manually flush cachelines (if required). This
1057 * optimizes for the case when the gpu will use the data
1058 * right away and we therefore have to clflush anyway. */
1059 needs_clflush_after = cpu_write_needs_clflush(obj);
1060 ret = i915_gem_object_wait_rendering(obj, false);
1061 if (ret)
1062 return ret;
1063 }
1064 /* Same trick applies to invalidate partially written cachelines read
1065 * before writing. */
1066 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1067 needs_clflush_before =
1068 !cpu_cache_is_coherent(dev, obj->cache_level);
1069
1070 ret = i915_gem_object_get_pages(obj);
1071 if (ret)
1072 return ret;
1073
1074 i915_gem_object_pin_pages(obj);
1075
1076 offset = args->offset;
1077 obj->dirty = 1;
1078
1079 #ifdef __NetBSD__
1080 while (0 < remain) {
1081 /* Get the next page. */
1082 shmem_page_offset = offset_in_page(offset);
1083 KASSERT(shmem_page_offset < PAGE_SIZE);
1084 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
1085 struct page *const page = i915_gem_object_get_page(obj,
1086 atop(offset));
1087
1088 /* Decide whether to flush the cache or swizzle bit 17. */
1089 const bool partial_cacheline_write = needs_clflush_before &&
1090 ((shmem_page_offset | page_length)
1091 & (cpu_info_primary.ci_cflush_lsize - 1));
1092 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1093 (page_to_phys(page) & (1 << 17)) != 0;
1094
1095 /* Try the fast path. */
1096 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1097 user_data, page_do_bit17_swizzling,
1098 partial_cacheline_write, needs_clflush_after);
1099 if (ret == 0)
1100 goto next_page;
1101
1102 /* Fast path failed. Try the slow path. */
1103 hit_slowpath = 1;
1104 mutex_unlock(&dev->struct_mutex);
1105 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1106 user_data, page_do_bit17_swizzling,
1107 partial_cacheline_write, needs_clflush_after);
1108 mutex_lock(&dev->struct_mutex);
1109 if (ret)
1110 goto out;
1111
1112 next_page: KASSERT(page_length <= remain);
1113 remain -= page_length;
1114 user_data += page_length;
1115 offset += page_length;
1116 }
1117 #else
1118 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1119 offset >> PAGE_SHIFT) {
1120 struct page *page = sg_page_iter_page(&sg_iter);
1121 int partial_cacheline_write;
1122
1123 if (remain <= 0)
1124 break;
1125
1126 /* Operation in this page
1127 *
1128 * shmem_page_offset = offset within page in shmem file
1129 * page_length = bytes to copy for this page
1130 */
1131 shmem_page_offset = offset_in_page(offset);
1132
1133 page_length = remain;
1134 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1135 page_length = PAGE_SIZE - shmem_page_offset;
1136
1137 /* If we don't overwrite a cacheline completely we need to be
1138 * careful to have up-to-date data by first clflushing. Don't
1139 * overcomplicate things and flush the entire patch. */
1140 partial_cacheline_write = needs_clflush_before &&
1141 ((shmem_page_offset | page_length)
1142 & (boot_cpu_data.x86_clflush_size - 1));
1143
1144 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1145 (page_to_phys(page) & (1 << 17)) != 0;
1146
1147 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1148 user_data, page_do_bit17_swizzling,
1149 partial_cacheline_write,
1150 needs_clflush_after);
1151 if (ret == 0)
1152 goto next_page;
1153
1154 hit_slowpath = 1;
1155 mutex_unlock(&dev->struct_mutex);
1156 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1157 user_data, page_do_bit17_swizzling,
1158 partial_cacheline_write,
1159 needs_clflush_after);
1160
1161 mutex_lock(&dev->struct_mutex);
1162
1163 if (ret)
1164 goto out;
1165
1166 next_page:
1167 remain -= page_length;
1168 user_data += page_length;
1169 offset += page_length;
1170 }
1171 #endif
1172
1173 out:
1174 i915_gem_object_unpin_pages(obj);
1175
1176 if (hit_slowpath) {
1177 /*
1178 * Fixup: Flush cpu caches in case we didn't flush the dirty
1179 * cachelines in-line while writing and the object moved
1180 * out of the cpu write domain while we've dropped the lock.
1181 */
1182 if (!needs_clflush_after &&
1183 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1184 if (i915_gem_clflush_object(obj, obj->pin_display))
1185 i915_gem_chipset_flush(dev);
1186 }
1187 }
1188
1189 if (needs_clflush_after)
1190 i915_gem_chipset_flush(dev);
1191
1192 return ret;
1193 }
1194
1195 /**
1196 * Writes data to the object referenced by handle.
1197 *
1198 * On error, the contents of the buffer that were to be modified are undefined.
1199 */
1200 int
1201 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1202 struct drm_file *file)
1203 {
1204 struct drm_i915_gem_pwrite *args = data;
1205 struct drm_i915_gem_object *obj;
1206 int ret;
1207
1208 if (args->size == 0)
1209 return 0;
1210
1211 if (!access_ok(VERIFY_READ,
1212 to_user_ptr(args->data_ptr),
1213 args->size))
1214 return -EFAULT;
1215
1216 #ifndef __NetBSD__ /* XXX prefault */
1217 if (likely(!i915.prefault_disable)) {
1218 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1219 args->size);
1220 if (ret)
1221 return -EFAULT;
1222 }
1223 #endif
1224
1225 ret = i915_mutex_lock_interruptible(dev);
1226 if (ret)
1227 return ret;
1228
1229 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1230 if (&obj->base == NULL) {
1231 ret = -ENOENT;
1232 goto unlock;
1233 }
1234
1235 /* Bounds check destination. */
1236 if (args->offset > obj->base.size ||
1237 args->size > obj->base.size - args->offset) {
1238 ret = -EINVAL;
1239 goto out;
1240 }
1241
1242 /* prime objects have no backing filp to GEM pread/pwrite
1243 * pages from.
1244 */
1245 #ifdef __NetBSD__
1246 /* Also stolen objects. */
1247 if (obj->base.gemo_shm_uao == NULL) {
1248 ret = -EINVAL;
1249 goto out;
1250 }
1251 #else
1252 if (!obj->base.filp) {
1253 ret = -EINVAL;
1254 goto out;
1255 }
1256 #endif
1257
1258 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1259
1260 ret = -EFAULT;
1261 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1262 * it would end up going through the fenced access, and we'll get
1263 * different detiling behavior between reading and writing.
1264 * pread/pwrite currently are reading and writing from the CPU
1265 * perspective, requiring manual detiling by the client.
1266 */
1267 if (obj->phys_handle) {
1268 ret = i915_gem_phys_pwrite(obj, args, file);
1269 goto out;
1270 }
1271
1272 if (obj->tiling_mode == I915_TILING_NONE &&
1273 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1274 cpu_write_needs_clflush(obj)) {
1275 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1276 /* Note that the gtt paths might fail with non-page-backed user
1277 * pointers (e.g. gtt mappings when moving data between
1278 * textures). Fallback to the shmem path in that case. */
1279 }
1280
1281 if (ret == -EFAULT || ret == -ENOSPC)
1282 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1283
1284 out:
1285 drm_gem_object_unreference(&obj->base);
1286 unlock:
1287 mutex_unlock(&dev->struct_mutex);
1288 return ret;
1289 }
1290
1291 int
1292 i915_gem_check_wedge(struct i915_gpu_error *error,
1293 bool interruptible)
1294 {
1295 if (i915_reset_in_progress(error)) {
1296 /* Non-interruptible callers can't handle -EAGAIN, hence return
1297 * -EIO unconditionally for these. */
1298 if (!interruptible)
1299 return -EIO;
1300
1301 /* Recovery complete, but the reset failed ... */
1302 if (i915_terminally_wedged(error))
1303 return -EIO;
1304
1305 return -EAGAIN;
1306 }
1307
1308 return 0;
1309 }
1310
1311 /*
1312 * Compare seqno against outstanding lazy request. Emit a request if they are
1313 * equal.
1314 */
1315 static int
1316 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1317 {
1318 int ret;
1319
1320 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1321
1322 ret = 0;
1323 if (seqno == ring->outstanding_lazy_seqno)
1324 ret = i915_add_request(ring, NULL);
1325
1326 return ret;
1327 }
1328
1329 #ifndef __NetBSD__
1330 static void fake_irq(unsigned long data)
1331 {
1332 wake_up_process((struct task_struct *)data);
1333 }
1334 #endif
1335
1336 static bool missed_irq(struct drm_i915_private *dev_priv,
1337 struct intel_ring_buffer *ring)
1338 {
1339 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1340 }
1341
1342 static bool can_wait_boost(struct drm_i915_file_private *file_priv)
1343 {
1344 if (file_priv == NULL)
1345 return true;
1346
1347 return !atomic_xchg(&file_priv->rps_wait_boost, true);
1348 }
1349
1350 /**
1351 * __wait_seqno - wait until execution of seqno has finished
1352 * @ring: the ring expected to report seqno
1353 * @seqno: duh!
1354 * @reset_counter: reset sequence associated with the given seqno
1355 * @interruptible: do an interruptible wait (normally yes)
1356 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1357 *
1358 * Note: It is of utmost importance that the passed in seqno and reset_counter
1359 * values have been read by the caller in an smp safe manner. Where read-side
1360 * locks are involved, it is sufficient to read the reset_counter before
1361 * unlocking the lock that protects the seqno. For lockless tricks, the
1362 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1363 * inserted.
1364 *
1365 * Returns 0 if the seqno was found within the alloted time. Else returns the
1366 * errno with remaining time filled in timeout argument.
1367 */
1368 #ifdef __NetBSD__
1369 static int
1370 __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, unsigned reset_counter,
1371 bool interruptible, struct timespec *timeout,
1372 struct drm_i915_file_private *file_priv)
1373 {
1374 struct drm_device *dev = ring->dev;
1375 struct drm_i915_private *dev_priv = dev->dev_private;
1376 bool irq_test_in_progress;
1377 struct timespec before, after;
1378 int ticks;
1379 bool wedged;
1380 int ret;
1381
1382 irq_test_in_progress = (dev_priv->gpu_error.test_irq_rings &
1383 intel_ring_flag(ring));
1384 __insn_barrier();
1385
1386 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1387 return 0;
1388
1389 if (timeout)
1390 ticks = mstohz(timespec_to_ns(timeout) / 1000000);
1391
1392 if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1393 gen6_rps_boost(dev_priv);
1394 if (file_priv)
1395 mod_delayed_work(dev_priv->wq,
1396 &file_priv->mm.idle_work,
1397 msecs_to_jiffies(100));
1398 }
1399
1400 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1401 return -ENODEV;
1402
1403 nanotime(&before);
1404 spin_lock(&dev_priv->irq_lock);
1405 #define EXIT_COND \
1406 (((reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) \
1407 ? wedged = true : false) || \
1408 i915_seqno_passed(ring->get_seqno(ring, false), \
1409 seqno))
1410
1411 if (timeout) {
1412 /*
1413 * XXX This missed_irq business smells like unlocked
1414 * Linux waitqueue nonsense.
1415 */
1416 if (missed_irq(dev_priv, ring))
1417 ticks = 1;
1418 if (interruptible)
1419 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &ring->irq_queue,
1420 &dev_priv->irq_lock, ticks, EXIT_COND);
1421 else
1422 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1423 &dev_priv->irq_lock, ticks, EXIT_COND);
1424 } else {
1425 if (interruptible)
1426 DRM_SPIN_WAIT_UNTIL(ret, &ring->irq_queue,
1427 &dev_priv->irq_lock, EXIT_COND);
1428 else
1429 DRM_SPIN_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1430 &dev_priv->irq_lock, EXIT_COND);
1431 }
1432 #undef EXIT_COND
1433 spin_unlock(&dev_priv->irq_lock);
1434 nanotime(&after);
1435
1436 if (!irq_test_in_progress)
1437 ring->irq_put(ring);
1438 if (timeout)
1439 timespecsub(&after, &before, timeout);
1440 return MAX(ret, 0); /* ignore remaining ticks */
1441 }
1442 #else
1443 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1444 unsigned reset_counter,
1445 bool interruptible,
1446 struct timespec *timeout,
1447 struct drm_i915_file_private *file_priv)
1448 {
1449 struct drm_device *dev = ring->dev;
1450 struct drm_i915_private *dev_priv = dev->dev_private;
1451 const bool irq_test_in_progress =
1452 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1453 struct timespec before, now;
1454 DEFINE_WAIT(wait);
1455 unsigned long timeout_expire;
1456 int ret;
1457
1458 WARN(dev_priv->pm.irqs_disabled, "IRQs disabled\n");
1459
1460 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1461 return 0;
1462
1463 timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
1464
1465 if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1466 gen6_rps_boost(dev_priv);
1467 if (file_priv)
1468 mod_delayed_work(dev_priv->wq,
1469 &file_priv->mm.idle_work,
1470 msecs_to_jiffies(100));
1471 }
1472
1473 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1474 return -ENODEV;
1475
1476 /* Record current time in case interrupted by signal, or wedged */
1477 trace_i915_gem_request_wait_begin(ring, seqno);
1478 getrawmonotonic(&before);
1479 for (;;) {
1480 struct timer_list timer;
1481
1482 prepare_to_wait(&ring->irq_queue, &wait,
1483 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
1484
1485 /* We need to check whether any gpu reset happened in between
1486 * the caller grabbing the seqno and now ... */
1487 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1488 /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1489 * is truely gone. */
1490 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1491 if (ret == 0)
1492 ret = -EAGAIN;
1493 break;
1494 }
1495
1496 if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
1497 ret = 0;
1498 break;
1499 }
1500
1501 if (interruptible && signal_pending(current)) {
1502 ret = -ERESTARTSYS;
1503 break;
1504 }
1505
1506 if (timeout && time_after_eq(jiffies, timeout_expire)) {
1507 ret = -ETIME;
1508 break;
1509 }
1510
1511 timer.function = NULL;
1512 if (timeout || missed_irq(dev_priv, ring)) {
1513 unsigned long expire;
1514
1515 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
1516 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1517 mod_timer(&timer, expire);
1518 }
1519
1520 io_schedule();
1521
1522 if (timer.function) {
1523 del_singleshot_timer_sync(&timer);
1524 destroy_timer_on_stack(&timer);
1525 }
1526 }
1527 getrawmonotonic(&now);
1528 trace_i915_gem_request_wait_end(ring, seqno);
1529
1530 if (!irq_test_in_progress)
1531 ring->irq_put(ring);
1532
1533 finish_wait(&ring->irq_queue, &wait);
1534
1535 if (timeout) {
1536 struct timespec sleep_time = timespec_sub(now, before);
1537 *timeout = timespec_sub(*timeout, sleep_time);
1538 if (!timespec_valid(timeout)) /* i.e. negative time remains */
1539 set_normalized_timespec(timeout, 0, 0);
1540 }
1541
1542 return ret;
1543 }
1544 #endif
1545
1546 /**
1547 * Waits for a sequence number to be signaled, and cleans up the
1548 * request and object lists appropriately for that event.
1549 */
1550 int
1551 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1552 {
1553 struct drm_device *dev = ring->dev;
1554 struct drm_i915_private *dev_priv = dev->dev_private;
1555 bool interruptible = dev_priv->mm.interruptible;
1556 int ret;
1557
1558 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1559 BUG_ON(seqno == 0);
1560
1561 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1562 if (ret)
1563 return ret;
1564
1565 ret = i915_gem_check_olr(ring, seqno);
1566 if (ret)
1567 return ret;
1568
1569 return __wait_seqno(ring, seqno,
1570 atomic_read(&dev_priv->gpu_error.reset_counter),
1571 interruptible, NULL, NULL);
1572 }
1573
1574 static int
1575 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
1576 struct intel_ring_buffer *ring)
1577 {
1578 i915_gem_retire_requests_ring(ring);
1579
1580 /* Manually manage the write flush as we may have not yet
1581 * retired the buffer.
1582 *
1583 * Note that the last_write_seqno is always the earlier of
1584 * the two (read/write) seqno, so if we haved successfully waited,
1585 * we know we have passed the last write.
1586 */
1587 obj->last_write_seqno = 0;
1588 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1589
1590 return 0;
1591 }
1592
1593 /**
1594 * Ensures that all rendering to the object has completed and the object is
1595 * safe to unbind from the GTT or access from the CPU.
1596 */
1597 static __must_check int
1598 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1599 bool readonly)
1600 {
1601 struct intel_ring_buffer *ring = obj->ring;
1602 u32 seqno;
1603 int ret;
1604
1605 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1606 if (seqno == 0)
1607 return 0;
1608
1609 ret = i915_wait_seqno(ring, seqno);
1610 if (ret)
1611 return ret;
1612
1613 return i915_gem_object_wait_rendering__tail(obj, ring);
1614 }
1615
1616 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1617 * as the object state may change during this call.
1618 */
1619 static __must_check int
1620 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1621 struct drm_i915_file_private *file_priv,
1622 bool readonly)
1623 {
1624 struct drm_device *dev = obj->base.dev;
1625 struct drm_i915_private *dev_priv = dev->dev_private;
1626 struct intel_ring_buffer *ring = obj->ring;
1627 unsigned reset_counter;
1628 u32 seqno;
1629 int ret;
1630
1631 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1632 BUG_ON(!dev_priv->mm.interruptible);
1633
1634 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1635 if (seqno == 0)
1636 return 0;
1637
1638 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1639 if (ret)
1640 return ret;
1641
1642 ret = i915_gem_check_olr(ring, seqno);
1643 if (ret)
1644 return ret;
1645
1646 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1647 mutex_unlock(&dev->struct_mutex);
1648 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
1649 mutex_lock(&dev->struct_mutex);
1650 if (ret)
1651 return ret;
1652
1653 return i915_gem_object_wait_rendering__tail(obj, ring);
1654 }
1655
1656 /**
1657 * Called when user space prepares to use an object with the CPU, either
1658 * through the mmap ioctl's mapping or a GTT mapping.
1659 */
1660 int
1661 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1662 struct drm_file *file)
1663 {
1664 struct drm_i915_gem_set_domain *args = data;
1665 struct drm_i915_gem_object *obj;
1666 uint32_t read_domains = args->read_domains;
1667 uint32_t write_domain = args->write_domain;
1668 int ret;
1669
1670 /* Only handle setting domains to types used by the CPU. */
1671 if (write_domain & I915_GEM_GPU_DOMAINS)
1672 return -EINVAL;
1673
1674 if (read_domains & I915_GEM_GPU_DOMAINS)
1675 return -EINVAL;
1676
1677 /* Having something in the write domain implies it's in the read
1678 * domain, and only that read domain. Enforce that in the request.
1679 */
1680 if (write_domain != 0 && read_domains != write_domain)
1681 return -EINVAL;
1682
1683 ret = i915_mutex_lock_interruptible(dev);
1684 if (ret)
1685 return ret;
1686
1687 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1688 if (&obj->base == NULL) {
1689 ret = -ENOENT;
1690 goto unlock;
1691 }
1692
1693 /* Try to flush the object off the GPU without holding the lock.
1694 * We will repeat the flush holding the lock in the normal manner
1695 * to catch cases where we are gazumped.
1696 */
1697 ret = i915_gem_object_wait_rendering__nonblocking(obj,
1698 file->driver_priv,
1699 !write_domain);
1700 if (ret)
1701 goto unref;
1702
1703 if (read_domains & I915_GEM_DOMAIN_GTT) {
1704 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1705
1706 /* Silently promote "you're not bound, there was nothing to do"
1707 * to success, since the client was just asking us to
1708 * make sure everything was done.
1709 */
1710 if (ret == -EINVAL)
1711 ret = 0;
1712 } else {
1713 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1714 }
1715
1716 unref:
1717 drm_gem_object_unreference(&obj->base);
1718 unlock:
1719 mutex_unlock(&dev->struct_mutex);
1720 return ret;
1721 }
1722
1723 /**
1724 * Called when user space has done writes to this buffer
1725 */
1726 int
1727 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1728 struct drm_file *file)
1729 {
1730 struct drm_i915_gem_sw_finish *args = data;
1731 struct drm_i915_gem_object *obj;
1732 int ret = 0;
1733
1734 ret = i915_mutex_lock_interruptible(dev);
1735 if (ret)
1736 return ret;
1737
1738 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1739 if (&obj->base == NULL) {
1740 ret = -ENOENT;
1741 goto unlock;
1742 }
1743
1744 /* Pinned buffers may be scanout, so flush the cache */
1745 if (obj->pin_display)
1746 i915_gem_object_flush_cpu_write_domain(obj, true);
1747
1748 drm_gem_object_unreference(&obj->base);
1749 unlock:
1750 mutex_unlock(&dev->struct_mutex);
1751 return ret;
1752 }
1753
1754 /**
1755 * Maps the contents of an object, returning the address it is mapped
1756 * into.
1757 *
1758 * While the mapping holds a reference on the contents of the object, it doesn't
1759 * imply a ref on the object itself.
1760 */
1761 int
1762 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1763 struct drm_file *file)
1764 {
1765 struct drm_i915_gem_mmap *args = data;
1766 struct drm_gem_object *obj;
1767 unsigned long addr;
1768 #ifdef __NetBSD__
1769 int ret;
1770 #endif
1771
1772 obj = drm_gem_object_lookup(dev, file, args->handle);
1773 if (obj == NULL)
1774 return -ENOENT;
1775
1776 /* prime objects have no backing filp to GEM mmap
1777 * pages from.
1778 */
1779 #ifdef __NetBSD__
1780 /* Also stolen objects (XXX can we get them here?) */
1781 if (obj->gemo_shm_uao == NULL) {
1782 drm_gem_object_unreference_unlocked(obj);
1783 return -EINVAL;
1784 }
1785 #else
1786 if (!obj->filp) {
1787 drm_gem_object_unreference_unlocked(obj);
1788 return -EINVAL;
1789 }
1790 #endif
1791
1792 #ifdef __NetBSD__
1793 addr = (*curproc->p_emul->e_vm_default_addr)(curproc,
1794 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size);
1795 /* XXX errno NetBSD->Linux */
1796 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size,
1797 obj->gemo_shm_uao, args->offset, 0,
1798 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE),
1799 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL,
1800 0));
1801 if (ret) {
1802 drm_gem_object_unreference_unlocked(obj);
1803 return ret;
1804 }
1805 uao_reference(obj->gemo_shm_uao);
1806 drm_gem_object_unreference_unlocked(obj);
1807 #else
1808 addr = vm_mmap(obj->filp, 0, args->size,
1809 PROT_READ | PROT_WRITE, MAP_SHARED,
1810 args->offset);
1811 drm_gem_object_unreference_unlocked(obj);
1812 if (IS_ERR((void *)addr))
1813 return addr;
1814 #endif
1815
1816 args->addr_ptr = (uint64_t) addr;
1817
1818 return 0;
1819 }
1820
1821 #ifdef __NetBSD__ /* XXX gem gtt fault */
1822 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t,
1823 struct vm_page **, int, int, vm_prot_t, int, paddr_t);
1824
1825 int
1826 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1827 int npages, int centeridx, vm_prot_t access_type, int flags)
1828 {
1829 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1830 struct drm_gem_object *gem_obj =
1831 container_of(uobj, struct drm_gem_object, gemo_uvmobj);
1832 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
1833 struct drm_device *dev = obj->base.dev;
1834 struct drm_i915_private *dev_priv = dev->dev_private;
1835 voff_t byte_offset;
1836 pgoff_t page_offset;
1837 int ret = 0;
1838 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0;
1839
1840 byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start));
1841 KASSERT(byte_offset <= obj->base.size);
1842 page_offset = (byte_offset >> PAGE_SHIFT);
1843
1844 intel_runtime_pm_get(dev_priv);
1845
1846 /* Thanks, uvm, but we don't need this lock. */
1847 mutex_exit(uobj->vmobjlock);
1848
1849 ret = i915_mutex_lock_interruptible(dev);
1850 if (ret)
1851 goto out;
1852
1853 trace_i915_gem_object_fault(obj, page_offset, true, write);
1854
1855 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1856 if (ret)
1857 goto unlock;
1858
1859 if ((obj->cache_level != I915_CACHE_NONE) && !HAS_LLC(dev)) {
1860 ret = -EINVAL;
1861 goto unlock;
1862 }
1863
1864 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
1865 if (ret)
1866 goto unlock;
1867
1868 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1869 if (ret)
1870 goto unpin;
1871
1872 ret = i915_gem_object_get_fence(obj);
1873 if (ret)
1874 goto unpin;
1875
1876 obj->fault_mappable = true;
1877
1878 /* XXX errno NetBSD->Linux */
1879 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type,
1880 flags,
1881 (dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj)));
1882 unpin:
1883 i915_gem_object_ggtt_unpin(obj);
1884 unlock:
1885 mutex_unlock(&dev->struct_mutex);
1886 out:
1887 mutex_enter(uobj->vmobjlock);
1888 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
1889 if (ret == -ERESTART)
1890 uvm_wait("i915flt");
1891 /* XXX Deal with GPU hangs here... */
1892 intel_runtime_pm_put(dev_priv);
1893 /* XXX errno Linux->NetBSD */
1894 return -ret;
1895 }
1896
1897 /*
1898 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c.
1899 *
1900 * XXX pmap_enter_default instead of pmap_enter because of a problem
1901 * with using weak aliases in kernel modules or something.
1902 */
1903 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned);
1904
1905 static int
1906 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1907 int npages, int centeridx, vm_prot_t access_type, int flags,
1908 paddr_t gtt_paddr)
1909 {
1910 struct vm_map_entry *entry = ufi->entry;
1911 vaddr_t curr_va;
1912 off_t curr_offset;
1913 paddr_t paddr;
1914 u_int mmapflags;
1915 int lcv, retval;
1916 vm_prot_t mapprot;
1917 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist);
1918 UVMHIST_LOG(maphist," flags=%d", flags,0,0,0);
1919
1920 /*
1921 * we do not allow device mappings to be mapped copy-on-write
1922 * so we kill any attempt to do so here.
1923 */
1924
1925 if (UVM_ET_ISCOPYONWRITE(entry)) {
1926 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)",
1927 entry->etype, 0,0,0);
1928 return(EIO);
1929 }
1930
1931 /*
1932 * now we must determine the offset in udv to use and the VA to
1933 * use for pmap_enter. note that we always use orig_map's pmap
1934 * for pmap_enter (even if we have a submap). since virtual
1935 * addresses in a submap must match the main map, this is ok.
1936 */
1937
1938 /* udv offset = (offset from start of entry) + entry's offset */
1939 curr_offset = entry->offset + (vaddr - entry->start);
1940 /* pmap va = vaddr (virtual address of pps[0]) */
1941 curr_va = vaddr;
1942
1943 /*
1944 * loop over the page range entering in as needed
1945 */
1946
1947 retval = 0;
1948 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
1949 curr_va += PAGE_SIZE) {
1950 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
1951 continue;
1952
1953 if (pps[lcv] == PGO_DONTCARE)
1954 continue;
1955
1956 paddr = (gtt_paddr + curr_offset);
1957 mmapflags = 0;
1958 mapprot = ufi->entry->protection;
1959 UVMHIST_LOG(maphist,
1960 " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d",
1961 ufi->orig_map->pmap, curr_va, paddr, mapprot);
1962 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot,
1963 PMAP_CANFAIL | mapprot | mmapflags) != 0) {
1964 /*
1965 * pmap_enter() didn't have the resource to
1966 * enter this mapping. Unlock everything,
1967 * wait for the pagedaemon to free up some
1968 * pages, and then tell uvm_fault() to start
1969 * the fault again.
1970 *
1971 * XXX Needs some rethinking for the PGO_ALLPAGES
1972 * XXX case.
1973 */
1974 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */
1975 return (ERESTART);
1976 }
1977 }
1978
1979 pmap_update(ufi->orig_map->pmap);
1980 return (retval);
1981 }
1982 #else
1983 /**
1984 * i915_gem_fault - fault a page into the GTT
1985 * vma: VMA in question
1986 * vmf: fault info
1987 *
1988 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1989 * from userspace. The fault handler takes care of binding the object to
1990 * the GTT (if needed), allocating and programming a fence register (again,
1991 * only if needed based on whether the old reg is still valid or the object
1992 * is tiled) and inserting a new PTE into the faulting process.
1993 *
1994 * Note that the faulting process may involve evicting existing objects
1995 * from the GTT and/or fence registers to make room. So performance may
1996 * suffer if the GTT working set is large or there are few fence registers
1997 * left.
1998 */
1999 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2000 {
2001 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
2002 struct drm_device *dev = obj->base.dev;
2003 struct drm_i915_private *dev_priv = dev->dev_private;
2004 pgoff_t page_offset;
2005 unsigned long pfn;
2006 int ret = 0;
2007 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
2008
2009 intel_runtime_pm_get(dev_priv);
2010
2011 /* We don't use vmf->pgoff since that has the fake offset */
2012 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
2013 PAGE_SHIFT;
2014
2015 ret = i915_mutex_lock_interruptible(dev);
2016 if (ret)
2017 goto out;
2018
2019 trace_i915_gem_object_fault(obj, page_offset, true, write);
2020
2021 /* Try to flush the object off the GPU first without holding the lock.
2022 * Upon reacquiring the lock, we will perform our sanity checks and then
2023 * repeat the flush holding the lock in the normal manner to catch cases
2024 * where we are gazumped.
2025 */
2026 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
2027 if (ret)
2028 goto unlock;
2029
2030 /* Access to snoopable pages through the GTT is incoherent. */
2031 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
2032 ret = -EINVAL;
2033 goto unlock;
2034 }
2035
2036 /* Now bind it into the GTT if needed */
2037 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
2038 if (ret)
2039 goto unlock;
2040
2041 ret = i915_gem_object_set_to_gtt_domain(obj, write);
2042 if (ret)
2043 goto unpin;
2044
2045 ret = i915_gem_object_get_fence(obj);
2046 if (ret)
2047 goto unpin;
2048
2049 obj->fault_mappable = true;
2050
2051 pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
2052 pfn >>= PAGE_SHIFT;
2053 pfn += page_offset;
2054
2055 /* Finally, remap it using the new GTT offset */
2056 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
2057 unpin:
2058 i915_gem_object_ggtt_unpin(obj);
2059 unlock:
2060 mutex_unlock(&dev->struct_mutex);
2061 out:
2062 switch (ret) {
2063 case -EIO:
2064 /* If this -EIO is due to a gpu hang, give the reset code a
2065 * chance to clean up the mess. Otherwise return the proper
2066 * SIGBUS. */
2067 if (i915_terminally_wedged(&dev_priv->gpu_error)) {
2068 ret = VM_FAULT_SIGBUS;
2069 break;
2070 }
2071 case -EAGAIN:
2072 /*
2073 * EAGAIN means the gpu is hung and we'll wait for the error
2074 * handler to reset everything when re-faulting in
2075 * i915_mutex_lock_interruptible.
2076 */
2077 case 0:
2078 case -ERESTARTSYS:
2079 case -EINTR:
2080 case -EBUSY:
2081 /*
2082 * EBUSY is ok: this just means that another thread
2083 * already did the job.
2084 */
2085 ret = VM_FAULT_NOPAGE;
2086 break;
2087 case -ENOMEM:
2088 ret = VM_FAULT_OOM;
2089 break;
2090 case -ENOSPC:
2091 case -EFAULT:
2092 ret = VM_FAULT_SIGBUS;
2093 break;
2094 default:
2095 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2096 ret = VM_FAULT_SIGBUS;
2097 break;
2098 }
2099
2100 intel_runtime_pm_put(dev_priv);
2101 return ret;
2102 }
2103
2104 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2105 {
2106 struct i915_vma *vma;
2107
2108 /*
2109 * Only the global gtt is relevant for gtt memory mappings, so restrict
2110 * list traversal to objects bound into the global address space. Note
2111 * that the active list should be empty, but better safe than sorry.
2112 */
2113 WARN_ON(!list_empty(&dev_priv->gtt.base.active_list));
2114 list_for_each_entry(vma, &dev_priv->gtt.base.active_list, mm_list)
2115 i915_gem_release_mmap(vma->obj);
2116 list_for_each_entry(vma, &dev_priv->gtt.base.inactive_list, mm_list)
2117 i915_gem_release_mmap(vma->obj);
2118 }
2119 #endif
2120
2121 /**
2122 * i915_gem_release_mmap - remove physical page mappings
2123 * @obj: obj in question
2124 *
2125 * Preserve the reservation of the mmapping with the DRM core code, but
2126 * relinquish ownership of the pages back to the system.
2127 *
2128 * It is vital that we remove the page mapping if we have mapped a tiled
2129 * object through the GTT and then lose the fence register due to
2130 * resource pressure. Similarly if the object has been moved out of the
2131 * aperture, than pages mapped into userspace must be revoked. Removing the
2132 * mapping will then trigger a page fault on the next user access, allowing
2133 * fixup by i915_gem_fault().
2134 */
2135 void
2136 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2137 {
2138 if (!obj->fault_mappable)
2139 return;
2140
2141 #ifdef __NetBSD__ /* XXX gem gtt fault */
2142 {
2143 struct vm_page *page;
2144
2145 mutex_enter(obj->base.gemo_shm_uao->vmobjlock);
2146 KASSERT(obj->pages != NULL);
2147 /* Force a fresh fault for each page. */
2148 /*
2149 * XXX OOPS! This doesn't actually do what we want.
2150 * This causes a fresh fault for access to the backing
2151 * pages -- but nothing accesses the backing pages
2152 * directly! What is actually entered into CPU page
2153 * table entries is aperture addresses which have been
2154 * programmed by the GTT to refer to those backing
2155 * pages.
2156 *
2157 * We need to clear those page table entries, but
2158 * there's no good way to do that at the moment: nobody
2159 * records for us a map from either uvm objects or
2160 * physical device addresses to a list of all virtual
2161 * pages where they have been mapped. pmap(9) records
2162 * a map only from physical RAM addresses to virtual
2163 * pages; it does nothing for physical device
2164 * addresses.
2165 */
2166 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue)
2167 pmap_page_protect(page, VM_PROT_NONE);
2168 mutex_exit(obj->base.gemo_shm_uao->vmobjlock);
2169 }
2170 #else
2171 drm_vma_node_unmap(&obj->base.vma_node,
2172 obj->base.dev->anon_inode->i_mapping);
2173 #endif
2174 obj->fault_mappable = false;
2175 }
2176
2177 uint32_t
2178 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2179 {
2180 uint32_t gtt_size;
2181
2182 if (INTEL_INFO(dev)->gen >= 4 ||
2183 tiling_mode == I915_TILING_NONE)
2184 return size;
2185
2186 /* Previous chips need a power-of-two fence region when tiling */
2187 if (INTEL_INFO(dev)->gen == 3)
2188 gtt_size = 1024*1024;
2189 else
2190 gtt_size = 512*1024;
2191
2192 while (gtt_size < size)
2193 gtt_size <<= 1;
2194
2195 return gtt_size;
2196 }
2197
2198 /**
2199 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2200 * @obj: object to check
2201 *
2202 * Return the required GTT alignment for an object, taking into account
2203 * potential fence register mapping.
2204 */
2205 uint32_t
2206 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2207 int tiling_mode, bool fenced)
2208 {
2209 /*
2210 * Minimum alignment is 4k (GTT page size), but might be greater
2211 * if a fence register is needed for the object.
2212 */
2213 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2214 tiling_mode == I915_TILING_NONE)
2215 return 4096;
2216
2217 /*
2218 * Previous chips need to be aligned to the size of the smallest
2219 * fence register that can contain the object.
2220 */
2221 return i915_gem_get_gtt_size(dev, size, tiling_mode);
2222 }
2223
2224 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2225 {
2226 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2227 int ret;
2228
2229 if (drm_vma_node_has_offset(&obj->base.vma_node))
2230 return 0;
2231
2232 dev_priv->mm.shrinker_no_lock_stealing = true;
2233
2234 ret = drm_gem_create_mmap_offset(&obj->base);
2235 if (ret != -ENOSPC)
2236 goto out;
2237
2238 /* Badly fragmented mmap space? The only way we can recover
2239 * space is by destroying unwanted objects. We can't randomly release
2240 * mmap_offsets as userspace expects them to be persistent for the
2241 * lifetime of the objects. The closest we can is to release the
2242 * offsets on purgeable objects by truncating it and marking it purged,
2243 * which prevents userspace from ever using that object again.
2244 */
2245 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
2246 ret = drm_gem_create_mmap_offset(&obj->base);
2247 if (ret != -ENOSPC)
2248 goto out;
2249
2250 i915_gem_shrink_all(dev_priv);
2251 ret = drm_gem_create_mmap_offset(&obj->base);
2252 out:
2253 dev_priv->mm.shrinker_no_lock_stealing = false;
2254
2255 return ret;
2256 }
2257
2258 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2259 {
2260 drm_gem_free_mmap_offset(&obj->base);
2261 }
2262
2263 int
2264 i915_gem_mmap_gtt(struct drm_file *file,
2265 struct drm_device *dev,
2266 uint32_t handle,
2267 uint64_t *offset)
2268 {
2269 struct drm_i915_private *dev_priv = dev->dev_private;
2270 struct drm_i915_gem_object *obj;
2271 int ret;
2272
2273 ret = i915_mutex_lock_interruptible(dev);
2274 if (ret)
2275 return ret;
2276
2277 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
2278 if (&obj->base == NULL) {
2279 ret = -ENOENT;
2280 goto unlock;
2281 }
2282
2283 if (obj->base.size > dev_priv->gtt.mappable_end) {
2284 ret = -E2BIG;
2285 goto out;
2286 }
2287
2288 if (obj->madv != I915_MADV_WILLNEED) {
2289 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2290 ret = -EFAULT;
2291 goto out;
2292 }
2293
2294 ret = i915_gem_object_create_mmap_offset(obj);
2295 if (ret)
2296 goto out;
2297
2298 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2299
2300 out:
2301 drm_gem_object_unreference(&obj->base);
2302 unlock:
2303 mutex_unlock(&dev->struct_mutex);
2304 return ret;
2305 }
2306
2307 /**
2308 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2309 * @dev: DRM device
2310 * @data: GTT mapping ioctl data
2311 * @file: GEM object info
2312 *
2313 * Simply returns the fake offset to userspace so it can mmap it.
2314 * The mmap call will end up in drm_gem_mmap(), which will set things
2315 * up so we can get faults in the handler above.
2316 *
2317 * The fault handler will take care of binding the object into the GTT
2318 * (since it may have been evicted to make room for something), allocating
2319 * a fence register, and mapping the appropriate aperture address into
2320 * userspace.
2321 */
2322 int
2323 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2324 struct drm_file *file)
2325 {
2326 struct drm_i915_gem_mmap_gtt *args = data;
2327
2328 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2329 }
2330
2331 /* Immediately discard the backing storage */
2332 static void
2333 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2334 {
2335 #ifndef __NetBSD__
2336 struct inode *inode;
2337 #endif
2338
2339 i915_gem_object_free_mmap_offset(obj);
2340
2341 #ifdef __NetBSD__
2342 if (obj->base.gemo_shm_uao == NULL)
2343 return;
2344
2345 {
2346 struct uvm_object *const uobj = obj->base.gemo_shm_uao;
2347
2348 if (uobj != NULL) {
2349 /* XXX Calling pgo_put like this is bogus. */
2350 mutex_enter(uobj->vmobjlock);
2351 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size,
2352 (PGO_ALLPAGES | PGO_FREE));
2353 }
2354 }
2355 #else
2356 if (obj->base.filp == NULL)
2357 return;
2358
2359 /* Our goal here is to return as much of the memory as
2360 * is possible back to the system as we are called from OOM.
2361 * To do this we must instruct the shmfs to drop all of its
2362 * backing pages, *now*.
2363 */
2364 inode = file_inode(obj->base.filp);
2365 shmem_truncate_range(inode, 0, (loff_t)-1);
2366 #endif
2367
2368 obj->madv = __I915_MADV_PURGED;
2369 }
2370
2371 static inline int
2372 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
2373 {
2374 return obj->madv == I915_MADV_DONTNEED;
2375 }
2376
2377 #ifdef __NetBSD__
2378 static void
2379 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2380 {
2381 struct drm_device *const dev = obj->base.dev;
2382 int ret;
2383
2384 /* XXX Cargo-culted from the Linux code. */
2385 BUG_ON(obj->madv == __I915_MADV_PURGED);
2386
2387 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2388 if (ret) {
2389 WARN_ON(ret != -EIO);
2390 i915_gem_clflush_object(obj, true);
2391 obj->base.read_domains = obj->base.write_domain =
2392 I915_GEM_DOMAIN_CPU;
2393 }
2394
2395 if (i915_gem_object_needs_bit17_swizzle(obj))
2396 i915_gem_object_save_bit_17_swizzle(obj);
2397
2398 /* XXX Maintain dirty flag? */
2399
2400 bus_dmamap_destroy(dev->dmat, obj->igo_dmamap);
2401 bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2402 obj->base.size, obj->pages, obj->igo_nsegs);
2403
2404 kfree(obj->pages);
2405 }
2406 #else
2407 static void
2408 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2409 {
2410 struct sg_page_iter sg_iter;
2411 int ret;
2412
2413 BUG_ON(obj->madv == __I915_MADV_PURGED);
2414
2415 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2416 if (ret) {
2417 /* In the event of a disaster, abandon all caches and
2418 * hope for the best.
2419 */
2420 WARN_ON(ret != -EIO);
2421 i915_gem_clflush_object(obj, true);
2422 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2423 }
2424
2425 if (i915_gem_object_needs_bit17_swizzle(obj))
2426 i915_gem_object_save_bit_17_swizzle(obj);
2427
2428 if (obj->madv == I915_MADV_DONTNEED)
2429 obj->dirty = 0;
2430
2431 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2432 struct page *page = sg_page_iter_page(&sg_iter);
2433
2434 if (obj->dirty)
2435 set_page_dirty(page);
2436
2437 if (obj->madv == I915_MADV_WILLNEED)
2438 mark_page_accessed(page);
2439
2440 page_cache_release(page);
2441 }
2442 obj->dirty = 0;
2443
2444 sg_free_table(obj->pages);
2445 kfree(obj->pages);
2446 }
2447 #endif
2448
2449 int
2450 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2451 {
2452 const struct drm_i915_gem_object_ops *ops = obj->ops;
2453
2454 if (obj->pages == NULL)
2455 return 0;
2456
2457 if (obj->pages_pin_count)
2458 return -EBUSY;
2459
2460 BUG_ON(i915_gem_obj_bound_any(obj));
2461
2462 /* ->put_pages might need to allocate memory for the bit17 swizzle
2463 * array, hence protect them from being reaped by removing them from gtt
2464 * lists early. */
2465 list_del(&obj->global_list);
2466
2467 ops->put_pages(obj);
2468 obj->pages = NULL;
2469
2470 if (i915_gem_object_is_purgeable(obj))
2471 i915_gem_object_truncate(obj);
2472
2473 return 0;
2474 }
2475
2476 static unsigned long
2477 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
2478 bool purgeable_only)
2479 {
2480 struct list_head still_bound_list;
2481 struct drm_i915_gem_object *obj, *next;
2482 unsigned long count = 0;
2483
2484 list_for_each_entry_safe(obj, next,
2485 &dev_priv->mm.unbound_list,
2486 global_list) {
2487 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2488 i915_gem_object_put_pages(obj) == 0) {
2489 count += obj->base.size >> PAGE_SHIFT;
2490 if (count >= target)
2491 return count;
2492 }
2493 }
2494
2495 /*
2496 * As we may completely rewrite the bound list whilst unbinding
2497 * (due to retiring requests) we have to strictly process only
2498 * one element of the list at the time, and recheck the list
2499 * on every iteration.
2500 */
2501 INIT_LIST_HEAD(&still_bound_list);
2502 while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
2503 struct i915_vma *vma, *v;
2504
2505 obj = list_first_entry(&dev_priv->mm.bound_list,
2506 typeof(*obj), global_list);
2507 list_move_tail(&obj->global_list, &still_bound_list);
2508
2509 if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
2510 continue;
2511
2512 /*
2513 * Hold a reference whilst we unbind this object, as we may
2514 * end up waiting for and retiring requests. This might
2515 * release the final reference (held by the active list)
2516 * and result in the object being freed from under us.
2517 * in this object being freed.
2518 *
2519 * Note 1: Shrinking the bound list is special since only active
2520 * (and hence bound objects) can contain such limbo objects, so
2521 * we don't need special tricks for shrinking the unbound list.
2522 * The only other place where we have to be careful with active
2523 * objects suddenly disappearing due to retiring requests is the
2524 * eviction code.
2525 *
2526 * Note 2: Even though the bound list doesn't hold a reference
2527 * to the object we can safely grab one here: The final object
2528 * unreferencing and the bound_list are both protected by the
2529 * dev->struct_mutex and so we won't ever be able to observe an
2530 * object on the bound_list with a reference count equals 0.
2531 */
2532 drm_gem_object_reference(&obj->base);
2533
2534 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
2535 if (i915_vma_unbind(vma))
2536 break;
2537
2538 if (i915_gem_object_put_pages(obj) == 0)
2539 count += obj->base.size >> PAGE_SHIFT;
2540
2541 drm_gem_object_unreference(&obj->base);
2542 }
2543 list_splice(&still_bound_list, &dev_priv->mm.bound_list);
2544
2545 return count;
2546 }
2547
2548 static unsigned long
2549 i915_gem_purge(struct drm_i915_private *dev_priv, long target)
2550 {
2551 return __i915_gem_shrink(dev_priv, target, true);
2552 }
2553
2554 static unsigned long
2555 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
2556 {
2557 struct drm_i915_gem_object *obj, *next;
2558 long freed = 0;
2559
2560 i915_gem_evict_everything(dev_priv->dev);
2561
2562 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
2563 global_list) {
2564 if (i915_gem_object_put_pages(obj) == 0)
2565 freed += obj->base.size >> PAGE_SHIFT;
2566 }
2567 return freed;
2568 }
2569
2570 #ifdef __NetBSD__
2571 static int
2572 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2573 {
2574 struct drm_device *const dev = obj->base.dev;
2575 struct vm_page *page;
2576 int error;
2577
2578 /* XXX Cargo-culted from the Linux code. */
2579 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2580 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2581
2582 KASSERT(obj->pages == NULL);
2583 TAILQ_INIT(&obj->igo_pageq);
2584 obj->pages = kcalloc((obj->base.size / PAGE_SIZE),
2585 sizeof(obj->pages[0]), GFP_KERNEL);
2586 if (obj->pages == NULL) {
2587 error = -ENOMEM;
2588 goto fail0;
2589 }
2590
2591 /* XXX errno NetBSD->Linux */
2592 error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao,
2593 0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages,
2594 (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT);
2595 if (error)
2596 /* XXX Try i915_gem_purge, i915_gem_shrink_all. */
2597 goto fail1;
2598 KASSERT(0 < obj->igo_nsegs);
2599 KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE));
2600
2601 /*
2602 * Check that the paddrs will fit in 40 bits, or 32 bits on i965.
2603 *
2604 * XXX This is wrong; we ought to pass this constraint to
2605 * bus_dmamem_wire_uvm_object instead.
2606 */
2607 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) {
2608 const uint64_t mask =
2609 (IS_BROADWATER(dev) || IS_CRESTLINE(dev)?
2610 0xffffffffULL : 0xffffffffffULL);
2611 if (VM_PAGE_TO_PHYS(page) & ~mask) {
2612 DRM_ERROR("GEM physical address exceeds %u bits"
2613 ": %"PRIxMAX"\n",
2614 popcount64(mask),
2615 (uintmax_t)VM_PAGE_TO_PHYS(page));
2616 error = -EIO;
2617 goto fail2;
2618 }
2619 }
2620
2621 /* XXX Should create the DMA map when creating the object. */
2622
2623 /* XXX errno NetBSD->Linux */
2624 error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs,
2625 PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap);
2626 if (error)
2627 goto fail2;
2628
2629 /* XXX Cargo-culted from the Linux code. */
2630 if (i915_gem_object_needs_bit17_swizzle(obj))
2631 i915_gem_object_do_bit_17_swizzle(obj);
2632
2633 /* Success! */
2634 return 0;
2635
2636 fail2: bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2637 obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE));
2638 fail1: kfree(obj->pages);
2639 obj->pages = NULL;
2640 fail0: KASSERT(error);
2641 return error;
2642 }
2643 #else
2644 static int
2645 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2646 {
2647 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2648 int page_count, i;
2649 struct address_space *mapping;
2650 struct sg_table *st;
2651 struct scatterlist *sg;
2652 struct sg_page_iter sg_iter;
2653 struct page *page;
2654 unsigned long last_pfn = 0; /* suppress gcc warning */
2655 gfp_t gfp;
2656
2657 /* Assert that the object is not currently in any GPU domain. As it
2658 * wasn't in the GTT, there shouldn't be any way it could have been in
2659 * a GPU cache
2660 */
2661 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2662 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2663
2664 st = kmalloc(sizeof(*st), GFP_KERNEL);
2665 if (st == NULL)
2666 return -ENOMEM;
2667
2668 page_count = obj->base.size / PAGE_SIZE;
2669 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2670 kfree(st);
2671 return -ENOMEM;
2672 }
2673
2674 /* Get the list of pages out of our struct file. They'll be pinned
2675 * at this point until we release them.
2676 *
2677 * Fail silently without starting the shrinker
2678 */
2679 mapping = file_inode(obj->base.filp)->i_mapping;
2680 gfp = mapping_gfp_mask(mapping);
2681 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2682 gfp &= ~(__GFP_IO | __GFP_WAIT);
2683 sg = st->sgl;
2684 st->nents = 0;
2685 for (i = 0; i < page_count; i++) {
2686 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2687 if (IS_ERR(page)) {
2688 i915_gem_purge(dev_priv, page_count);
2689 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2690 }
2691 if (IS_ERR(page)) {
2692 /* We've tried hard to allocate the memory by reaping
2693 * our own buffer, now let the real VM do its job and
2694 * go down in flames if truly OOM.
2695 */
2696 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
2697 gfp |= __GFP_IO | __GFP_WAIT;
2698
2699 i915_gem_shrink_all(dev_priv);
2700 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2701 if (IS_ERR(page))
2702 goto err_pages;
2703
2704 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2705 gfp &= ~(__GFP_IO | __GFP_WAIT);
2706 }
2707 #ifdef CONFIG_SWIOTLB
2708 if (swiotlb_nr_tbl()) {
2709 st->nents++;
2710 sg_set_page(sg, page, PAGE_SIZE, 0);
2711 sg = sg_next(sg);
2712 continue;
2713 }
2714 #endif
2715 if (!i || page_to_pfn(page) != last_pfn + 1) {
2716 if (i)
2717 sg = sg_next(sg);
2718 st->nents++;
2719 sg_set_page(sg, page, PAGE_SIZE, 0);
2720 } else {
2721 sg->length += PAGE_SIZE;
2722 }
2723 last_pfn = page_to_pfn(page);
2724
2725 /* Check that the i965g/gm workaround works. */
2726 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2727 }
2728 #ifdef CONFIG_SWIOTLB
2729 if (!swiotlb_nr_tbl())
2730 #endif
2731 sg_mark_end(sg);
2732 obj->pages = st;
2733
2734 if (i915_gem_object_needs_bit17_swizzle(obj))
2735 i915_gem_object_do_bit_17_swizzle(obj);
2736
2737 return 0;
2738
2739 err_pages:
2740 sg_mark_end(sg);
2741 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
2742 page_cache_release(sg_page_iter_page(&sg_iter));
2743 sg_free_table(st);
2744 kfree(st);
2745 return PTR_ERR(page);
2746 }
2747 #endif
2748
2749 /* Ensure that the associated pages are gathered from the backing storage
2750 * and pinned into our object. i915_gem_object_get_pages() may be called
2751 * multiple times before they are released by a single call to
2752 * i915_gem_object_put_pages() - once the pages are no longer referenced
2753 * either as a result of memory pressure (reaping pages under the shrinker)
2754 * or as the object is itself released.
2755 */
2756 int
2757 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2758 {
2759 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2760 const struct drm_i915_gem_object_ops *ops = obj->ops;
2761 int ret;
2762
2763 if (obj->pages)
2764 return 0;
2765
2766 if (obj->madv != I915_MADV_WILLNEED) {
2767 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2768 return -EFAULT;
2769 }
2770
2771 BUG_ON(obj->pages_pin_count);
2772
2773 ret = ops->get_pages(obj);
2774 if (ret)
2775 return ret;
2776
2777 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2778 return 0;
2779 }
2780
2781 static void
2782 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
2783 struct intel_ring_buffer *ring)
2784 {
2785 struct drm_device *dev = obj->base.dev;
2786 struct drm_i915_private *dev_priv = dev->dev_private;
2787 u32 seqno = intel_ring_get_seqno(ring);
2788
2789 BUG_ON(ring == NULL);
2790 if (obj->ring != ring && obj->last_write_seqno) {
2791 /* Keep the seqno relative to the current ring */
2792 obj->last_write_seqno = seqno;
2793 }
2794 obj->ring = ring;
2795
2796 /* Add a reference if we're newly entering the active list. */
2797 if (!obj->active) {
2798 drm_gem_object_reference(&obj->base);
2799 obj->active = 1;
2800 }
2801
2802 list_move_tail(&obj->ring_list, &ring->active_list);
2803
2804 obj->last_read_seqno = seqno;
2805
2806 if (obj->fenced_gpu_access) {
2807 obj->last_fenced_seqno = seqno;
2808
2809 /* Bump MRU to take account of the delayed flush */
2810 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2811 struct drm_i915_fence_reg *reg;
2812
2813 reg = &dev_priv->fence_regs[obj->fence_reg];
2814 list_move_tail(®->lru_list,
2815 &dev_priv->mm.fence_list);
2816 }
2817 }
2818 }
2819
2820 void i915_vma_move_to_active(struct i915_vma *vma,
2821 struct intel_ring_buffer *ring)
2822 {
2823 list_move_tail(&vma->mm_list, &vma->vm->active_list);
2824 return i915_gem_object_move_to_active(vma->obj, ring);
2825 }
2826
2827 static void
2828 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
2829 {
2830 struct drm_device *dev = obj->base.dev;
2831 struct drm_i915_private *dev_priv = dev->dev_private;
2832 struct i915_address_space *vm;
2833 struct i915_vma *vma;
2834
2835 if ((obj->base.write_domain & I915_GEM_DOMAIN_GTT) != 0) {
2836 #if 0
2837 printk(KERN_ERR "%s: %p 0x%x flushing gtt\n", __func__, obj,
2838 obj->base.write_domain);
2839 #endif
2840 i915_gem_object_flush_gtt_write_domain(obj);
2841 }
2842 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
2843 BUG_ON(!obj->active);
2844
2845 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
2846 vma = i915_gem_obj_to_vma(obj, vm);
2847 if (vma && !list_empty(&vma->mm_list))
2848 list_move_tail(&vma->mm_list, &vm->inactive_list);
2849 }
2850
2851 list_del_init(&obj->ring_list);
2852 obj->ring = NULL;
2853
2854 obj->last_read_seqno = 0;
2855 obj->last_write_seqno = 0;
2856 obj->base.write_domain = 0;
2857
2858 obj->last_fenced_seqno = 0;
2859 obj->fenced_gpu_access = false;
2860
2861 obj->active = 0;
2862 drm_gem_object_unreference(&obj->base);
2863
2864 WARN_ON(i915_verify_lists(dev));
2865 }
2866
2867 static int
2868 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2869 {
2870 struct drm_i915_private *dev_priv = dev->dev_private;
2871 struct intel_ring_buffer *ring;
2872 int ret, i, j;
2873
2874 /* Carefully retire all requests without writing to the rings */
2875 for_each_ring(ring, dev_priv, i) {
2876 ret = intel_ring_idle(ring);
2877 if (ret)
2878 return ret;
2879 }
2880 i915_gem_retire_requests(dev);
2881
2882 /* Finally reset hw state */
2883 for_each_ring(ring, dev_priv, i) {
2884 intel_ring_init_seqno(ring, seqno);
2885
2886 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
2887 ring->sync_seqno[j] = 0;
2888 }
2889
2890 return 0;
2891 }
2892
2893 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2894 {
2895 struct drm_i915_private *dev_priv = dev->dev_private;
2896 int ret;
2897
2898 if (seqno == 0)
2899 return -EINVAL;
2900
2901 /* HWS page needs to be set less than what we
2902 * will inject to ring
2903 */
2904 ret = i915_gem_init_seqno(dev, seqno - 1);
2905 if (ret)
2906 return ret;
2907
2908 /* Carefully set the last_seqno value so that wrap
2909 * detection still works
2910 */
2911 dev_priv->next_seqno = seqno;
2912 dev_priv->last_seqno = seqno - 1;
2913 if (dev_priv->last_seqno == 0)
2914 dev_priv->last_seqno--;
2915
2916 return 0;
2917 }
2918
2919 int
2920 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2921 {
2922 struct drm_i915_private *dev_priv = dev->dev_private;
2923
2924 /* reserve 0 for non-seqno */
2925 if (dev_priv->next_seqno == 0) {
2926 int ret = i915_gem_init_seqno(dev, 0);
2927 if (ret)
2928 return ret;
2929
2930 dev_priv->next_seqno = 1;
2931 }
2932
2933 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2934 return 0;
2935 }
2936
2937 int __i915_add_request(struct intel_ring_buffer *ring,
2938 struct drm_file *file,
2939 struct drm_i915_gem_object *obj,
2940 u32 *out_seqno)
2941 {
2942 struct drm_i915_private *dev_priv = ring->dev->dev_private;
2943 struct drm_i915_gem_request *request;
2944 u32 request_ring_position, request_start;
2945 int ret;
2946
2947 request_start = intel_ring_get_tail(ring);
2948 /*
2949 * Emit any outstanding flushes - execbuf can fail to emit the flush
2950 * after having emitted the batchbuffer command. Hence we need to fix
2951 * things up similar to emitting the lazy request. The difference here
2952 * is that the flush _must_ happen before the next request, no matter
2953 * what.
2954 */
2955 ret = intel_ring_flush_all_caches(ring);
2956 if (ret)
2957 return ret;
2958
2959 request = ring->preallocated_lazy_request;
2960 if (WARN_ON(request == NULL))
2961 return -ENOMEM;
2962
2963 /* Record the position of the start of the request so that
2964 * should we detect the updated seqno part-way through the
2965 * GPU processing the request, we never over-estimate the
2966 * position of the head.
2967 */
2968 request_ring_position = intel_ring_get_tail(ring);
2969
2970 ret = ring->add_request(ring);
2971 if (ret)
2972 return ret;
2973
2974 request->seqno = intel_ring_get_seqno(ring);
2975 request->ring = ring;
2976 request->head = request_start;
2977 request->tail = request_ring_position;
2978
2979 /* Whilst this request exists, batch_obj will be on the
2980 * active_list, and so will hold the active reference. Only when this
2981 * request is retired will the the batch_obj be moved onto the
2982 * inactive_list and lose its active reference. Hence we do not need
2983 * to explicitly hold another reference here.
2984 */
2985 request->batch_obj = obj;
2986
2987 /* Hold a reference to the current context so that we can inspect
2988 * it later in case a hangcheck error event fires.
2989 */
2990 request->ctx = ring->last_context;
2991 if (request->ctx)
2992 i915_gem_context_reference(request->ctx);
2993
2994 request->emitted_jiffies = jiffies;
2995 list_add_tail(&request->list, &ring->request_list);
2996 request->file_priv = NULL;
2997
2998 if (file) {
2999 struct drm_i915_file_private *file_priv = file->driver_priv;
3000
3001 spin_lock(&file_priv->mm.lock);
3002 request->file_priv = file_priv;
3003 list_add_tail(&request->client_list,
3004 &file_priv->mm.request_list);
3005 spin_unlock(&file_priv->mm.lock);
3006 }
3007
3008 trace_i915_gem_request_add(ring, request->seqno);
3009 ring->outstanding_lazy_seqno = 0;
3010 ring->preallocated_lazy_request = NULL;
3011
3012 if (!dev_priv->ums.mm_suspended) {
3013 i915_queue_hangcheck(ring->dev);
3014
3015 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
3016 queue_delayed_work(dev_priv->wq,
3017 &dev_priv->mm.retire_work,
3018 round_jiffies_up_relative(HZ));
3019 intel_mark_busy(dev_priv->dev);
3020 }
3021
3022 if (out_seqno)
3023 *out_seqno = request->seqno;
3024 return 0;
3025 }
3026
3027 static inline void
3028 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
3029 {
3030 struct drm_i915_file_private *file_priv = request->file_priv;
3031
3032 if (!file_priv)
3033 return;
3034
3035 spin_lock(&file_priv->mm.lock);
3036 list_del(&request->client_list);
3037 request->file_priv = NULL;
3038 spin_unlock(&file_priv->mm.lock);
3039 }
3040
3041 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
3042 const struct i915_hw_context *ctx)
3043 {
3044 unsigned long elapsed;
3045
3046 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
3047
3048 if (ctx->hang_stats.banned)
3049 return true;
3050
3051 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
3052 if (!i915_gem_context_is_default(ctx)) {
3053 DRM_DEBUG("context hanging too fast, banning!\n");
3054 return true;
3055 } else if (dev_priv->gpu_error.stop_rings == 0) {
3056 DRM_ERROR("gpu hanging too fast, banning!\n");
3057 return true;
3058 }
3059 }
3060
3061 return false;
3062 }
3063
3064 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
3065 struct i915_hw_context *ctx,
3066 const bool guilty)
3067 {
3068 struct i915_ctx_hang_stats *hs;
3069
3070 if (WARN_ON(!ctx))
3071 return;
3072
3073 hs = &ctx->hang_stats;
3074
3075 if (guilty) {
3076 hs->banned = i915_context_is_banned(dev_priv, ctx);
3077 hs->batch_active++;
3078 hs->guilty_ts = get_seconds();
3079 } else {
3080 hs->batch_pending++;
3081 }
3082 }
3083
3084 static void i915_gem_free_request(struct drm_i915_gem_request *request)
3085 {
3086 list_del(&request->list);
3087 i915_gem_request_remove_from_client(request);
3088
3089 if (request->ctx)
3090 i915_gem_context_unreference(request->ctx);
3091
3092 kfree(request);
3093 }
3094
3095 struct drm_i915_gem_request *
3096 i915_gem_find_active_request(struct intel_ring_buffer *ring)
3097 {
3098 struct drm_i915_gem_request *request;
3099 u32 completed_seqno;
3100
3101 completed_seqno = ring->get_seqno(ring, false);
3102
3103 list_for_each_entry(request, &ring->request_list, list) {
3104 if (i915_seqno_passed(completed_seqno, request->seqno))
3105 continue;
3106
3107 return request;
3108 }
3109
3110 return NULL;
3111 }
3112
3113 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
3114 struct intel_ring_buffer *ring)
3115 {
3116 struct drm_i915_gem_request *request;
3117 bool ring_hung;
3118
3119 request = i915_gem_find_active_request(ring);
3120
3121 if (request == NULL)
3122 return;
3123
3124 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
3125
3126 i915_set_reset_status(dev_priv, request->ctx, ring_hung);
3127
3128 list_for_each_entry_continue(request, &ring->request_list, list)
3129 i915_set_reset_status(dev_priv, request->ctx, false);
3130 }
3131
3132 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
3133 struct intel_ring_buffer *ring)
3134 {
3135 while (!list_empty(&ring->active_list)) {
3136 struct drm_i915_gem_object *obj;
3137
3138 obj = list_first_entry(&ring->active_list,
3139 struct drm_i915_gem_object,
3140 ring_list);
3141
3142 i915_gem_object_move_to_inactive(obj);
3143 }
3144
3145 /*
3146 * We must free the requests after all the corresponding objects have
3147 * been moved off active lists. Which is the same order as the normal
3148 * retire_requests function does. This is important if object hold
3149 * implicit references on things like e.g. ppgtt address spaces through
3150 * the request.
3151 */
3152 while (!list_empty(&ring->request_list)) {
3153 struct drm_i915_gem_request *request;
3154
3155 request = list_first_entry(&ring->request_list,
3156 struct drm_i915_gem_request,
3157 list);
3158
3159 i915_gem_free_request(request);
3160 }
3161 }
3162
3163 void i915_gem_restore_fences(struct drm_device *dev)
3164 {
3165 struct drm_i915_private *dev_priv = dev->dev_private;
3166 int i;
3167
3168 for (i = 0; i < dev_priv->num_fence_regs; i++) {
3169 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
3170
3171 /*
3172 * Commit delayed tiling changes if we have an object still
3173 * attached to the fence, otherwise just clear the fence.
3174 */
3175 if (reg->obj) {
3176 i915_gem_object_update_fence(reg->obj, reg,
3177 reg->obj->tiling_mode);
3178 } else {
3179 i915_gem_write_fence(dev, i, NULL);
3180 }
3181 }
3182 }
3183
3184 void i915_gem_reset(struct drm_device *dev)
3185 {
3186 struct drm_i915_private *dev_priv = dev->dev_private;
3187 struct intel_ring_buffer *ring;
3188 int i;
3189
3190 /*
3191 * Before we free the objects from the requests, we need to inspect
3192 * them for finding the guilty party. As the requests only borrow
3193 * their reference to the objects, the inspection must be done first.
3194 */
3195 for_each_ring(ring, dev_priv, i)
3196 i915_gem_reset_ring_status(dev_priv, ring);
3197
3198 for_each_ring(ring, dev_priv, i)
3199 i915_gem_reset_ring_cleanup(dev_priv, ring);
3200
3201 i915_gem_cleanup_ringbuffer(dev);
3202
3203 i915_gem_context_reset(dev);
3204
3205 i915_gem_restore_fences(dev);
3206 }
3207
3208 /**
3209 * This function clears the request list as sequence numbers are passed.
3210 */
3211 static void
3212 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
3213 {
3214 uint32_t seqno;
3215
3216 if (list_empty(&ring->request_list))
3217 return;
3218
3219 WARN_ON(i915_verify_lists(ring->dev));
3220
3221 seqno = ring->get_seqno(ring, true);
3222
3223 /* Move any buffers on the active list that are no longer referenced
3224 * by the ringbuffer to the flushing/inactive lists as appropriate,
3225 * before we free the context associated with the requests.
3226 */
3227 while (!list_empty(&ring->active_list)) {
3228 struct drm_i915_gem_object *obj;
3229
3230 obj = list_first_entry(&ring->active_list,
3231 struct drm_i915_gem_object,
3232 ring_list);
3233
3234 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
3235 break;
3236
3237 i915_gem_object_move_to_inactive(obj);
3238 }
3239
3240
3241 while (!list_empty(&ring->request_list)) {
3242 struct drm_i915_gem_request *request;
3243
3244 request = list_first_entry(&ring->request_list,
3245 struct drm_i915_gem_request,
3246 list);
3247
3248 if (!i915_seqno_passed(seqno, request->seqno))
3249 break;
3250
3251 trace_i915_gem_request_retire(ring, request->seqno);
3252 /* We know the GPU must have read the request to have
3253 * sent us the seqno + interrupt, so use the position
3254 * of tail of the request to update the last known position
3255 * of the GPU head.
3256 */
3257 ring->last_retired_head = request->tail;
3258
3259 i915_gem_free_request(request);
3260 }
3261
3262 if (unlikely(ring->trace_irq_seqno &&
3263 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
3264 ring->irq_put(ring);
3265 ring->trace_irq_seqno = 0;
3266 }
3267
3268 WARN_ON(i915_verify_lists(ring->dev));
3269 }
3270
3271 bool
3272 i915_gem_retire_requests(struct drm_device *dev)
3273 {
3274 struct drm_i915_private *dev_priv = dev->dev_private;
3275 struct intel_ring_buffer *ring;
3276 bool idle = true;
3277 int i;
3278
3279 for_each_ring(ring, dev_priv, i) {
3280 i915_gem_retire_requests_ring(ring);
3281 idle &= list_empty(&ring->request_list);
3282 }
3283
3284 if (idle)
3285 mod_delayed_work(dev_priv->wq,
3286 &dev_priv->mm.idle_work,
3287 msecs_to_jiffies(100));
3288
3289 return idle;
3290 }
3291
3292 static void
3293 i915_gem_retire_work_handler(struct work_struct *work)
3294 {
3295 struct drm_i915_private *dev_priv =
3296 container_of(work, typeof(*dev_priv), mm.retire_work.work);
3297 struct drm_device *dev = dev_priv->dev;
3298 bool idle;
3299
3300 /* Come back later if the device is busy... */
3301 idle = false;
3302 if (mutex_trylock(&dev->struct_mutex)) {
3303 idle = i915_gem_retire_requests(dev);
3304 mutex_unlock(&dev->struct_mutex);
3305 }
3306 if (!idle)
3307 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3308 round_jiffies_up_relative(HZ));
3309 }
3310
3311 static void
3312 i915_gem_idle_work_handler(struct work_struct *work)
3313 {
3314 struct drm_i915_private *dev_priv =
3315 container_of(work, typeof(*dev_priv), mm.idle_work.work);
3316
3317 intel_mark_idle(dev_priv->dev);
3318 }
3319
3320 /**
3321 * Ensures that an object will eventually get non-busy by flushing any required
3322 * write domains, emitting any outstanding lazy request and retiring and
3323 * completed requests.
3324 */
3325 static int
3326 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3327 {
3328 int ret;
3329
3330 if (obj->active) {
3331 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
3332 if (ret)
3333 return ret;
3334
3335 i915_gem_retire_requests_ring(obj->ring);
3336 }
3337
3338 return 0;
3339 }
3340
3341 /**
3342 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3343 * @DRM_IOCTL_ARGS: standard ioctl arguments
3344 *
3345 * Returns 0 if successful, else an error is returned with the remaining time in
3346 * the timeout parameter.
3347 * -ETIME: object is still busy after timeout
3348 * -ERESTARTSYS: signal interrupted the wait
3349 * -ENONENT: object doesn't exist
3350 * Also possible, but rare:
3351 * -EAGAIN: GPU wedged
3352 * -ENOMEM: damn
3353 * -ENODEV: Internal IRQ fail
3354 * -E?: The add request failed
3355 *
3356 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3357 * non-zero timeout parameter the wait ioctl will wait for the given number of
3358 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3359 * without holding struct_mutex the object may become re-busied before this
3360 * function completes. A similar but shorter * race condition exists in the busy
3361 * ioctl
3362 */
3363 int
3364 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3365 {
3366 struct drm_i915_private *dev_priv = dev->dev_private;
3367 struct drm_i915_gem_wait *args = data;
3368 struct drm_i915_gem_object *obj;
3369 struct intel_ring_buffer *ring = NULL;
3370 struct timespec timeout_stack, *timeout = NULL;
3371 unsigned reset_counter;
3372 u32 seqno = 0;
3373 int ret = 0;
3374
3375 if (args->timeout_ns >= 0) {
3376 timeout_stack = ns_to_timespec(args->timeout_ns);
3377 timeout = &timeout_stack;
3378 }
3379
3380 ret = i915_mutex_lock_interruptible(dev);
3381 if (ret)
3382 return ret;
3383
3384 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
3385 if (&obj->base == NULL) {
3386 mutex_unlock(&dev->struct_mutex);
3387 return -ENOENT;
3388 }
3389
3390 /* Need to make sure the object gets inactive eventually. */
3391 ret = i915_gem_object_flush_active(obj);
3392 if (ret)
3393 goto out;
3394
3395 if (obj->active) {
3396 seqno = obj->last_read_seqno;
3397 ring = obj->ring;
3398 }
3399
3400 if (seqno == 0)
3401 goto out;
3402
3403 /* Do this after OLR check to make sure we make forward progress polling
3404 * on this IOCTL with a 0 timeout (like busy ioctl)
3405 */
3406 if (!args->timeout_ns) {
3407 ret = -ETIME;
3408 goto out;
3409 }
3410
3411 drm_gem_object_unreference(&obj->base);
3412 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3413 mutex_unlock(&dev->struct_mutex);
3414
3415 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
3416 if (timeout)
3417 args->timeout_ns = timespec_to_ns(timeout);
3418 return ret;
3419
3420 out:
3421 drm_gem_object_unreference(&obj->base);
3422 mutex_unlock(&dev->struct_mutex);
3423 return ret;
3424 }
3425
3426 /**
3427 * i915_gem_object_sync - sync an object to a ring.
3428 *
3429 * @obj: object which may be in use on another ring.
3430 * @to: ring we wish to use the object on. May be NULL.
3431 *
3432 * This code is meant to abstract object synchronization with the GPU.
3433 * Calling with NULL implies synchronizing the object with the CPU
3434 * rather than a particular GPU ring.
3435 *
3436 * Returns 0 if successful, else propagates up the lower layer error.
3437 */
3438 int
3439 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3440 struct intel_ring_buffer *to)
3441 {
3442 struct intel_ring_buffer *from = obj->ring;
3443 u32 seqno;
3444 int ret, idx;
3445
3446 if (from == NULL || to == from)
3447 return 0;
3448
3449 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
3450 return i915_gem_object_wait_rendering(obj, false);
3451
3452 idx = intel_ring_sync_index(from, to);
3453
3454 seqno = obj->last_read_seqno;
3455 if (seqno <= from->sync_seqno[idx])
3456 return 0;
3457
3458 ret = i915_gem_check_olr(obj->ring, seqno);
3459 if (ret)
3460 return ret;
3461
3462 trace_i915_gem_ring_sync_to(from, to, seqno);
3463 ret = to->sync_to(to, from, seqno);
3464 if (!ret)
3465 /* We use last_read_seqno because sync_to()
3466 * might have just caused seqno wrap under
3467 * the radar.
3468 */
3469 from->sync_seqno[idx] = obj->last_read_seqno;
3470
3471 return ret;
3472 }
3473
3474 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3475 {
3476 u32 old_write_domain, old_read_domains;
3477
3478 /* Force a pagefault for domain tracking on next user access */
3479 i915_gem_release_mmap(obj);
3480
3481 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3482 return;
3483
3484 /* Wait for any direct GTT access to complete */
3485 mb();
3486
3487 old_read_domains = obj->base.read_domains;
3488 old_write_domain = obj->base.write_domain;
3489
3490 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3491 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3492
3493 trace_i915_gem_object_change_domain(obj,
3494 old_read_domains,
3495 old_write_domain);
3496 }
3497
3498 int i915_vma_unbind(struct i915_vma *vma)
3499 {
3500 struct drm_i915_gem_object *obj = vma->obj;
3501 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3502 int ret;
3503
3504 if (list_empty(&vma->vma_link))
3505 return 0;
3506
3507 if (!drm_mm_node_allocated(&vma->node)) {
3508 i915_gem_vma_destroy(vma);
3509 return 0;
3510 }
3511
3512 if (vma->pin_count)
3513 return -EBUSY;
3514
3515 BUG_ON(obj->pages == NULL);
3516
3517 ret = i915_gem_object_finish_gpu(obj);
3518 if (ret)
3519 return ret;
3520 /* Continue on if we fail due to EIO, the GPU is hung so we
3521 * should be safe and we need to cleanup or else we might
3522 * cause memory corruption through use-after-free.
3523 */
3524
3525 i915_gem_object_finish_gtt(obj);
3526
3527 /* release the fence reg _after_ flushing */
3528 ret = i915_gem_object_put_fence(obj);
3529 if (ret)
3530 return ret;
3531
3532 trace_i915_vma_unbind(vma);
3533
3534 vma->unbind_vma(vma);
3535
3536 i915_gem_gtt_finish_object(obj);
3537
3538 list_del_init(&vma->mm_list);
3539 /* Avoid an unnecessary call to unbind on rebind. */
3540 if (i915_is_ggtt(vma->vm))
3541 obj->map_and_fenceable = true;
3542
3543 drm_mm_remove_node(&vma->node);
3544 i915_gem_vma_destroy(vma);
3545
3546 /* Since the unbound list is global, only move to that list if
3547 * no more VMAs exist. */
3548 if (list_empty(&obj->vma_list))
3549 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3550
3551 /* And finally now the object is completely decoupled from this vma,
3552 * we can drop its hold on the backing storage and allow it to be
3553 * reaped by the shrinker.
3554 */
3555 i915_gem_object_unpin_pages(obj);
3556
3557 return 0;
3558 }
3559
3560 int i915_gpu_idle(struct drm_device *dev)
3561 {
3562 struct drm_i915_private *dev_priv = dev->dev_private;
3563 struct intel_ring_buffer *ring;
3564 int ret, i;
3565
3566 /* Flush everything onto the inactive list. */
3567 for_each_ring(ring, dev_priv, i) {
3568 ret = i915_switch_context(ring, ring->default_context);
3569 if (ret)
3570 return ret;
3571
3572 ret = intel_ring_idle(ring);
3573 if (ret)
3574 return ret;
3575 }
3576
3577 return 0;
3578 }
3579
3580 static void i965_write_fence_reg(struct drm_device *dev, int reg,
3581 struct drm_i915_gem_object *obj)
3582 {
3583 struct drm_i915_private *dev_priv = dev->dev_private;
3584 int fence_reg;
3585 int fence_pitch_shift;
3586
3587 if (INTEL_INFO(dev)->gen >= 6) {
3588 fence_reg = FENCE_REG_SANDYBRIDGE_0;
3589 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
3590 } else {
3591 fence_reg = FENCE_REG_965_0;
3592 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
3593 }
3594
3595 fence_reg += reg * 8;
3596
3597 /* To w/a incoherency with non-atomic 64-bit register updates,
3598 * we split the 64-bit update into two 32-bit writes. In order
3599 * for a partial fence not to be evaluated between writes, we
3600 * precede the update with write to turn off the fence register,
3601 * and only enable the fence as the last step.
3602 *
3603 * For extra levels of paranoia, we make sure each step lands
3604 * before applying the next step.
3605 */
3606 I915_WRITE(fence_reg, 0);
3607 POSTING_READ(fence_reg);
3608
3609 if (obj) {
3610 u32 size = i915_gem_obj_ggtt_size(obj);
3611 uint64_t val;
3612
3613 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
3614 0xfffff000) << 32;
3615 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
3616 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
3617 if (obj->tiling_mode == I915_TILING_Y)
3618 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3619 val |= I965_FENCE_REG_VALID;
3620
3621 I915_WRITE(fence_reg + 4, val >> 32);
3622 POSTING_READ(fence_reg + 4);
3623
3624 I915_WRITE(fence_reg + 0, val);
3625 POSTING_READ(fence_reg);
3626 } else {
3627 I915_WRITE(fence_reg + 4, 0);
3628 POSTING_READ(fence_reg + 4);
3629 }
3630 }
3631
3632 static void i915_write_fence_reg(struct drm_device *dev, int reg,
3633 struct drm_i915_gem_object *obj)
3634 {
3635 struct drm_i915_private *dev_priv = dev->dev_private;
3636 u32 val;
3637
3638 if (obj) {
3639 u32 size = i915_gem_obj_ggtt_size(obj);
3640 int pitch_val;
3641 int tile_width;
3642
3643 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
3644 (size & -size) != size ||
3645 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3646 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3647 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
3648
3649 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
3650 tile_width = 128;
3651 else
3652 tile_width = 512;
3653
3654 /* Note: pitch better be a power of two tile widths */
3655 pitch_val = obj->stride / tile_width;
3656 pitch_val = ffs(pitch_val) - 1;
3657
3658 val = i915_gem_obj_ggtt_offset(obj);
3659 if (obj->tiling_mode == I915_TILING_Y)
3660 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3661 val |= I915_FENCE_SIZE_BITS(size);
3662 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3663 val |= I830_FENCE_REG_VALID;
3664 } else
3665 val = 0;
3666
3667 if (reg < 8)
3668 reg = FENCE_REG_830_0 + reg * 4;
3669 else
3670 reg = FENCE_REG_945_8 + (reg - 8) * 4;
3671
3672 I915_WRITE(reg, val);
3673 POSTING_READ(reg);
3674 }
3675
3676 static void i830_write_fence_reg(struct drm_device *dev, int reg,
3677 struct drm_i915_gem_object *obj)
3678 {
3679 struct drm_i915_private *dev_priv = dev->dev_private;
3680 uint32_t val;
3681
3682 if (obj) {
3683 u32 size = i915_gem_obj_ggtt_size(obj);
3684 uint32_t pitch_val;
3685
3686 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
3687 (size & -size) != size ||
3688 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3689 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
3690 i915_gem_obj_ggtt_offset(obj), size);
3691
3692 pitch_val = obj->stride / 128;
3693 pitch_val = ffs(pitch_val) - 1;
3694
3695 val = i915_gem_obj_ggtt_offset(obj);
3696 if (obj->tiling_mode == I915_TILING_Y)
3697 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3698 val |= I830_FENCE_SIZE_BITS(size);
3699 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3700 val |= I830_FENCE_REG_VALID;
3701 } else
3702 val = 0;
3703
3704 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
3705 POSTING_READ(FENCE_REG_830_0 + reg * 4);
3706 }
3707
3708 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
3709 {
3710 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
3711 }
3712
3713 static void i915_gem_write_fence(struct drm_device *dev, int reg,
3714 struct drm_i915_gem_object *obj)
3715 {
3716 struct drm_i915_private *dev_priv = dev->dev_private;
3717
3718 /* Ensure that all CPU reads are completed before installing a fence
3719 * and all writes before removing the fence.
3720 */
3721 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
3722 mb();
3723
3724 WARN(obj && (!obj->stride || !obj->tiling_mode),
3725 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
3726 obj->stride, obj->tiling_mode);
3727
3728 switch (INTEL_INFO(dev)->gen) {
3729 case 8:
3730 case 7:
3731 case 6:
3732 case 5:
3733 case 4: i965_write_fence_reg(dev, reg, obj); break;
3734 case 3: i915_write_fence_reg(dev, reg, obj); break;
3735 case 2: i830_write_fence_reg(dev, reg, obj); break;
3736 default: BUG();
3737 }
3738
3739 /* And similarly be paranoid that no direct access to this region
3740 * is reordered to before the fence is installed.
3741 */
3742 if (i915_gem_object_needs_mb(obj))
3743 mb();
3744 }
3745
3746 static inline int fence_number(struct drm_i915_private *dev_priv,
3747 struct drm_i915_fence_reg *fence)
3748 {
3749 return fence - dev_priv->fence_regs;
3750 }
3751
3752 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
3753 struct drm_i915_fence_reg *fence,
3754 bool enable)
3755 {
3756 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3757 int reg = fence_number(dev_priv, fence);
3758
3759 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
3760
3761 if (enable) {
3762 obj->fence_reg = reg;
3763 fence->obj = obj;
3764 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
3765 } else {
3766 obj->fence_reg = I915_FENCE_REG_NONE;
3767 fence->obj = NULL;
3768 list_del_init(&fence->lru_list);
3769 }
3770 obj->fence_dirty = false;
3771 }
3772
3773 static int
3774 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
3775 {
3776 if (obj->last_fenced_seqno) {
3777 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
3778 if (ret)
3779 return ret;
3780
3781 obj->last_fenced_seqno = 0;
3782 }
3783
3784 obj->fenced_gpu_access = false;
3785 return 0;
3786 }
3787
3788 int
3789 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
3790 {
3791 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3792 struct drm_i915_fence_reg *fence;
3793 int ret;
3794
3795 ret = i915_gem_object_wait_fence(obj);
3796 if (ret)
3797 return ret;
3798
3799 if (obj->fence_reg == I915_FENCE_REG_NONE)
3800 return 0;
3801
3802 fence = &dev_priv->fence_regs[obj->fence_reg];
3803
3804 i915_gem_object_fence_lost(obj);
3805 i915_gem_object_update_fence(obj, fence, false);
3806
3807 return 0;
3808 }
3809
3810 static struct drm_i915_fence_reg *
3811 i915_find_fence_reg(struct drm_device *dev)
3812 {
3813 struct drm_i915_private *dev_priv = dev->dev_private;
3814 struct drm_i915_fence_reg *reg, *avail;
3815 int i;
3816
3817 /* First try to find a free reg */
3818 avail = NULL;
3819 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
3820 reg = &dev_priv->fence_regs[i];
3821 if (!reg->obj)
3822 return reg;
3823
3824 if (!reg->pin_count)
3825 avail = reg;
3826 }
3827
3828 if (avail == NULL)
3829 goto deadlock;
3830
3831 /* None available, try to steal one or wait for a user to finish */
3832 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
3833 if (reg->pin_count)
3834 continue;
3835
3836 return reg;
3837 }
3838
3839 deadlock:
3840 /* Wait for completion of pending flips which consume fences */
3841 if (intel_has_pending_fb_unpin(dev))
3842 return ERR_PTR(-EAGAIN);
3843
3844 return ERR_PTR(-EDEADLK);
3845 }
3846
3847 /**
3848 * i915_gem_object_get_fence - set up fencing for an object
3849 * @obj: object to map through a fence reg
3850 *
3851 * When mapping objects through the GTT, userspace wants to be able to write
3852 * to them without having to worry about swizzling if the object is tiled.
3853 * This function walks the fence regs looking for a free one for @obj,
3854 * stealing one if it can't find any.
3855 *
3856 * It then sets up the reg based on the object's properties: address, pitch
3857 * and tiling format.
3858 *
3859 * For an untiled surface, this removes any existing fence.
3860 */
3861 int
3862 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
3863 {
3864 struct drm_device *dev = obj->base.dev;
3865 struct drm_i915_private *dev_priv = dev->dev_private;
3866 bool enable = obj->tiling_mode != I915_TILING_NONE;
3867 struct drm_i915_fence_reg *reg;
3868 int ret;
3869
3870 /* Have we updated the tiling parameters upon the object and so
3871 * will need to serialise the write to the associated fence register?
3872 */
3873 if (obj->fence_dirty) {
3874 ret = i915_gem_object_wait_fence(obj);
3875 if (ret)
3876 return ret;
3877 }
3878
3879 /* Just update our place in the LRU if our fence is getting reused. */
3880 if (obj->fence_reg != I915_FENCE_REG_NONE) {
3881 reg = &dev_priv->fence_regs[obj->fence_reg];
3882 if (!obj->fence_dirty) {
3883 list_move_tail(®->lru_list,
3884 &dev_priv->mm.fence_list);
3885 return 0;
3886 }
3887 } else if (enable) {
3888 reg = i915_find_fence_reg(dev);
3889 if (IS_ERR(reg))
3890 return PTR_ERR(reg);
3891
3892 if (reg->obj) {
3893 struct drm_i915_gem_object *old = reg->obj;
3894
3895 ret = i915_gem_object_wait_fence(old);
3896 if (ret)
3897 return ret;
3898
3899 i915_gem_object_fence_lost(old);
3900 }
3901 } else
3902 return 0;
3903
3904 i915_gem_object_update_fence(obj, reg, enable);
3905
3906 return 0;
3907 }
3908
3909 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
3910 struct drm_mm_node *gtt_space,
3911 unsigned long cache_level)
3912 {
3913 struct drm_mm_node *other;
3914
3915 /* On non-LLC machines we have to be careful when putting differing
3916 * types of snoopable memory together to avoid the prefetcher
3917 * crossing memory domains and dying.
3918 */
3919 if (HAS_LLC(dev))
3920 return true;
3921
3922 if (!drm_mm_node_allocated(gtt_space))
3923 return true;
3924
3925 if (list_empty(>t_space->node_list))
3926 return true;
3927
3928 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3929 if (other->allocated && !other->hole_follows && other->color != cache_level)
3930 return false;
3931
3932 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3933 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3934 return false;
3935
3936 return true;
3937 }
3938
3939 static void i915_gem_verify_gtt(struct drm_device *dev)
3940 {
3941 #if WATCH_GTT
3942 struct drm_i915_private *dev_priv = dev->dev_private;
3943 struct drm_i915_gem_object *obj;
3944 int err = 0;
3945
3946 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3947 if (obj->gtt_space == NULL) {
3948 printk(KERN_ERR "object found on GTT list with no space reserved\n");
3949 err++;
3950 continue;
3951 }
3952
3953 if (obj->cache_level != obj->gtt_space->color) {
3954 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
3955 i915_gem_obj_ggtt_offset(obj),
3956 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3957 obj->cache_level,
3958 obj->gtt_space->color);
3959 err++;
3960 continue;
3961 }
3962
3963 if (!i915_gem_valid_gtt_space(dev,
3964 obj->gtt_space,
3965 obj->cache_level)) {
3966 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
3967 i915_gem_obj_ggtt_offset(obj),
3968 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3969 obj->cache_level);
3970 err++;
3971 continue;
3972 }
3973 }
3974
3975 WARN_ON(err);
3976 #endif
3977 }
3978
3979 /**
3980 * Finds free space in the GTT aperture and binds the object there.
3981 */
3982 static struct i915_vma *
3983 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3984 struct i915_address_space *vm,
3985 unsigned alignment,
3986 uint64_t flags)
3987 {
3988 struct drm_device *dev = obj->base.dev;
3989 struct drm_i915_private *dev_priv = dev->dev_private;
3990 u32 size, fence_size, fence_alignment, unfenced_alignment;
3991 unsigned long start =
3992 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3993 unsigned long end =
3994 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
3995 struct i915_vma *vma;
3996 int ret;
3997
3998 fence_size = i915_gem_get_gtt_size(dev,
3999 obj->base.size,
4000 obj->tiling_mode);
4001 fence_alignment = i915_gem_get_gtt_alignment(dev,
4002 obj->base.size,
4003 obj->tiling_mode, true);
4004 unfenced_alignment =
4005 i915_gem_get_gtt_alignment(dev,
4006 obj->base.size,
4007 obj->tiling_mode, false);
4008
4009 if (alignment == 0)
4010 alignment = flags & PIN_MAPPABLE ? fence_alignment :
4011 unfenced_alignment;
4012 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
4013 DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
4014 return ERR_PTR(-EINVAL);
4015 }
4016
4017 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
4018
4019 /* If the object is bigger than the entire aperture, reject it early
4020 * before evicting everything in a vain attempt to find space.
4021 */
4022 if (obj->base.size > end) {
4023 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
4024 obj->base.size,
4025 flags & PIN_MAPPABLE ? "mappable" : "total",
4026 end);
4027 return ERR_PTR(-E2BIG);
4028 }
4029
4030 ret = i915_gem_object_get_pages(obj);
4031 if (ret)
4032 return ERR_PTR(ret);
4033
4034 i915_gem_object_pin_pages(obj);
4035
4036 vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
4037 if (IS_ERR(vma))
4038 goto err_unpin;
4039
4040 search_free:
4041 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
4042 size, alignment,
4043 obj->cache_level,
4044 start, end,
4045 DRM_MM_SEARCH_DEFAULT,
4046 DRM_MM_CREATE_DEFAULT);
4047 if (ret) {
4048 ret = i915_gem_evict_something(dev, vm, size, alignment,
4049 obj->cache_level,
4050 start, end,
4051 flags);
4052 if (ret == 0)
4053 goto search_free;
4054
4055 goto err_free_vma;
4056 }
4057 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
4058 obj->cache_level))) {
4059 ret = -EINVAL;
4060 goto err_remove_node;
4061 }
4062
4063 ret = i915_gem_gtt_prepare_object(obj);
4064 if (ret)
4065 goto err_remove_node;
4066
4067 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
4068 list_add_tail(&vma->mm_list, &vm->inactive_list);
4069
4070 if (i915_is_ggtt(vm)) {
4071 bool mappable, fenceable;
4072
4073 fenceable = (vma->node.size == fence_size &&
4074 (vma->node.start & (fence_alignment - 1)) == 0);
4075
4076 mappable = (vma->node.start + obj->base.size <=
4077 dev_priv->gtt.mappable_end);
4078
4079 obj->map_and_fenceable = mappable && fenceable;
4080 }
4081
4082 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4083
4084 trace_i915_vma_bind(vma, flags);
4085 vma->bind_vma(vma, obj->cache_level,
4086 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
4087
4088 i915_gem_verify_gtt(dev);
4089 return vma;
4090
4091 err_remove_node:
4092 drm_mm_remove_node(&vma->node);
4093 err_free_vma:
4094 i915_gem_vma_destroy(vma);
4095 vma = ERR_PTR(ret);
4096 err_unpin:
4097 i915_gem_object_unpin_pages(obj);
4098 return vma;
4099 }
4100
4101 bool
4102 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
4103 bool force)
4104 {
4105 /* If we don't have a page list set up, then we're not pinned
4106 * to GPU, and we can ignore the cache flush because it'll happen
4107 * again at bind time.
4108 */
4109 if (obj->pages == NULL)
4110 return false;
4111
4112 /*
4113 * Stolen memory is always coherent with the GPU as it is explicitly
4114 * marked as wc by the system, or the system is cache-coherent.
4115 */
4116 if (obj->stolen)
4117 return false;
4118
4119 /* If the GPU is snooping the contents of the CPU cache,
4120 * we do not need to manually clear the CPU cache lines. However,
4121 * the caches are only snooped when the render cache is
4122 * flushed/invalidated. As we always have to emit invalidations
4123 * and flushes when moving into and out of the RENDER domain, correct
4124 * snooping behaviour occurs naturally as the result of our domain
4125 * tracking.
4126 */
4127 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
4128 return false;
4129
4130 trace_i915_gem_object_clflush(obj);
4131 #ifdef __NetBSD__
4132 drm_clflush_pglist(&obj->igo_pageq);
4133 #else
4134 drm_clflush_sg(obj->pages);
4135 #endif
4136
4137 return true;
4138 }
4139
4140 /** Flushes the GTT write domain for the object if it's dirty. */
4141 static void
4142 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
4143 {
4144 uint32_t old_write_domain;
4145
4146 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
4147 return;
4148
4149 /* No actual flushing is required for the GTT write domain. Writes
4150 * to it immediately go to main memory as far as we know, so there's
4151 * no chipset flush. It also doesn't land in render cache.
4152 *
4153 * However, we do have to enforce the order so that all writes through
4154 * the GTT land before any writes to the device, such as updates to
4155 * the GATT itself.
4156 */
4157 wmb();
4158
4159 old_write_domain = obj->base.write_domain;
4160 obj->base.write_domain = 0;
4161
4162 trace_i915_gem_object_change_domain(obj,
4163 obj->base.read_domains,
4164 old_write_domain);
4165 }
4166
4167 /** Flushes the CPU write domain for the object if it's dirty. */
4168 static void
4169 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
4170 bool force)
4171 {
4172 uint32_t old_write_domain;
4173
4174 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
4175 return;
4176
4177 if (i915_gem_clflush_object(obj, force))
4178 i915_gem_chipset_flush(obj->base.dev);
4179
4180 old_write_domain = obj->base.write_domain;
4181 obj->base.write_domain = 0;
4182
4183 trace_i915_gem_object_change_domain(obj,
4184 obj->base.read_domains,
4185 old_write_domain);
4186 }
4187
4188 /**
4189 * Moves a single object to the GTT read, and possibly write domain.
4190 *
4191 * This function returns when the move is complete, including waiting on
4192 * flushes to occur.
4193 */
4194 int
4195 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4196 {
4197 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4198 uint32_t old_write_domain, old_read_domains;
4199 int ret;
4200
4201 /* Not valid to be called on unbound objects. */
4202 if (!i915_gem_obj_bound_any(obj))
4203 return -EINVAL;
4204
4205 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4206 return 0;
4207
4208 ret = i915_gem_object_wait_rendering(obj, !write);
4209 if (ret)
4210 return ret;
4211
4212 i915_gem_object_flush_cpu_write_domain(obj, false);
4213
4214 /* Serialise direct access to this object with the barriers for
4215 * coherent writes from the GPU, by effectively invalidating the
4216 * GTT domain upon first access.
4217 */
4218 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
4219 mb();
4220
4221 old_write_domain = obj->base.write_domain;
4222 old_read_domains = obj->base.read_domains;
4223
4224 /* It should now be out of any other write domains, and we can update
4225 * the domain values for our changes.
4226 */
4227 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4228 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4229 if (write) {
4230 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
4231 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
4232 obj->dirty = 1;
4233 }
4234
4235 trace_i915_gem_object_change_domain(obj,
4236 old_read_domains,
4237 old_write_domain);
4238
4239 /* And bump the LRU for this access */
4240 if (i915_gem_object_is_inactive(obj)) {
4241 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4242 if (vma)
4243 list_move_tail(&vma->mm_list,
4244 &dev_priv->gtt.base.inactive_list);
4245
4246 }
4247
4248 return 0;
4249 }
4250
4251 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4252 enum i915_cache_level cache_level)
4253 {
4254 struct drm_device *dev = obj->base.dev;
4255 struct i915_vma *vma, *next;
4256 int ret;
4257
4258 if (obj->cache_level == cache_level)
4259 return 0;
4260
4261 if (i915_gem_obj_is_pinned(obj)) {
4262 DRM_DEBUG("can not change the cache level of pinned objects\n");
4263 return -EBUSY;
4264 }
4265
4266 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4267 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
4268 ret = i915_vma_unbind(vma);
4269 if (ret)
4270 return ret;
4271 }
4272 }
4273
4274 if (i915_gem_obj_bound_any(obj)) {
4275 ret = i915_gem_object_finish_gpu(obj);
4276 if (ret)
4277 return ret;
4278
4279 i915_gem_object_finish_gtt(obj);
4280
4281 /* Before SandyBridge, you could not use tiling or fence
4282 * registers with snooped memory, so relinquish any fences
4283 * currently pointing to our region in the aperture.
4284 */
4285 if (INTEL_INFO(dev)->gen < 6) {
4286 ret = i915_gem_object_put_fence(obj);
4287 if (ret)
4288 return ret;
4289 }
4290
4291 list_for_each_entry(vma, &obj->vma_list, vma_link)
4292 if (drm_mm_node_allocated(&vma->node))
4293 vma->bind_vma(vma, cache_level,
4294 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
4295 }
4296
4297 list_for_each_entry(vma, &obj->vma_list, vma_link)
4298 vma->node.color = cache_level;
4299 obj->cache_level = cache_level;
4300
4301 if (cpu_write_needs_clflush(obj)) {
4302 u32 old_read_domains, old_write_domain;
4303
4304 /* If we're coming from LLC cached, then we haven't
4305 * actually been tracking whether the data is in the
4306 * CPU cache or not, since we only allow one bit set
4307 * in obj->write_domain and have been skipping the clflushes.
4308 * Just set it to the CPU cache for now.
4309 */
4310 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
4311
4312 old_read_domains = obj->base.read_domains;
4313 old_write_domain = obj->base.write_domain;
4314
4315 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4316 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4317
4318 trace_i915_gem_object_change_domain(obj,
4319 old_read_domains,
4320 old_write_domain);
4321 }
4322
4323 i915_gem_verify_gtt(dev);
4324 return 0;
4325 }
4326
4327 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4328 struct drm_file *file)
4329 {
4330 struct drm_i915_gem_caching *args = data;
4331 struct drm_i915_gem_object *obj;
4332 int ret;
4333
4334 ret = i915_mutex_lock_interruptible(dev);
4335 if (ret)
4336 return ret;
4337
4338 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4339 if (&obj->base == NULL) {
4340 ret = -ENOENT;
4341 goto unlock;
4342 }
4343
4344 switch (obj->cache_level) {
4345 case I915_CACHE_LLC:
4346 case I915_CACHE_L3_LLC:
4347 args->caching = I915_CACHING_CACHED;
4348 break;
4349
4350 case I915_CACHE_WT:
4351 args->caching = I915_CACHING_DISPLAY;
4352 break;
4353
4354 default:
4355 args->caching = I915_CACHING_NONE;
4356 break;
4357 }
4358
4359 drm_gem_object_unreference(&obj->base);
4360 unlock:
4361 mutex_unlock(&dev->struct_mutex);
4362 return ret;
4363 }
4364
4365 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4366 struct drm_file *file)
4367 {
4368 struct drm_i915_gem_caching *args = data;
4369 struct drm_i915_gem_object *obj;
4370 enum i915_cache_level level;
4371 int ret;
4372
4373 switch (args->caching) {
4374 case I915_CACHING_NONE:
4375 level = I915_CACHE_NONE;
4376 break;
4377 case I915_CACHING_CACHED:
4378 level = I915_CACHE_LLC;
4379 break;
4380 case I915_CACHING_DISPLAY:
4381 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4382 break;
4383 default:
4384 return -EINVAL;
4385 }
4386
4387 ret = i915_mutex_lock_interruptible(dev);
4388 if (ret)
4389 return ret;
4390
4391 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4392 if (&obj->base == NULL) {
4393 ret = -ENOENT;
4394 goto unlock;
4395 }
4396
4397 ret = i915_gem_object_set_cache_level(obj, level);
4398
4399 drm_gem_object_unreference(&obj->base);
4400 unlock:
4401 mutex_unlock(&dev->struct_mutex);
4402 return ret;
4403 }
4404
4405 static bool is_pin_display(struct drm_i915_gem_object *obj)
4406 {
4407 /* There are 3 sources that pin objects:
4408 * 1. The display engine (scanouts, sprites, cursors);
4409 * 2. Reservations for execbuffer;
4410 * 3. The user.
4411 *
4412 * We can ignore reservations as we hold the struct_mutex and
4413 * are only called outside of the reservation path. The user
4414 * can only increment pin_count once, and so if after
4415 * subtracting the potential reference by the user, any pin_count
4416 * remains, it must be due to another use by the display engine.
4417 */
4418 return i915_gem_obj_to_ggtt(obj)->pin_count - !!obj->user_pin_count;
4419 }
4420
4421 /*
4422 * Prepare buffer for display plane (scanout, cursors, etc).
4423 * Can be called from an uninterruptible phase (modesetting) and allows
4424 * any flushes to be pipelined (for pageflips).
4425 */
4426 int
4427 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4428 u32 alignment,
4429 struct intel_ring_buffer *pipelined)
4430 {
4431 u32 old_read_domains, old_write_domain;
4432 int ret;
4433
4434 if (pipelined != obj->ring) {
4435 ret = i915_gem_object_sync(obj, pipelined);
4436 if (ret)
4437 return ret;
4438 }
4439
4440 /* Mark the pin_display early so that we account for the
4441 * display coherency whilst setting up the cache domains.
4442 */
4443 obj->pin_display = true;
4444
4445 /* The display engine is not coherent with the LLC cache on gen6. As
4446 * a result, we make sure that the pinning that is about to occur is
4447 * done with uncached PTEs. This is lowest common denominator for all
4448 * chipsets.
4449 *
4450 * However for gen6+, we could do better by using the GFDT bit instead
4451 * of uncaching, which would allow us to flush all the LLC-cached data
4452 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4453 */
4454 ret = i915_gem_object_set_cache_level(obj,
4455 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4456 if (ret)
4457 goto err_unpin_display;
4458
4459 /* As the user may map the buffer once pinned in the display plane
4460 * (e.g. libkms for the bootup splash), we have to ensure that we
4461 * always use map_and_fenceable for all scanout buffers.
4462 */
4463 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
4464 if (ret)
4465 goto err_unpin_display;
4466
4467 i915_gem_object_flush_cpu_write_domain(obj, true);
4468
4469 old_write_domain = obj->base.write_domain;
4470 old_read_domains = obj->base.read_domains;
4471
4472 /* It should now be out of any other write domains, and we can update
4473 * the domain values for our changes.
4474 */
4475 obj->base.write_domain = 0;
4476 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4477
4478 trace_i915_gem_object_change_domain(obj,
4479 old_read_domains,
4480 old_write_domain);
4481
4482 return 0;
4483
4484 err_unpin_display:
4485 obj->pin_display = is_pin_display(obj);
4486 return ret;
4487 }
4488
4489 void
4490 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
4491 {
4492 i915_gem_object_ggtt_unpin(obj);
4493 obj->pin_display = is_pin_display(obj);
4494 }
4495
4496 int
4497 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
4498 {
4499 int ret;
4500
4501 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
4502 return 0;
4503
4504 ret = i915_gem_object_wait_rendering(obj, false);
4505 if (ret)
4506 return ret;
4507
4508 /* Ensure that we invalidate the GPU's caches and TLBs. */
4509 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
4510 return 0;
4511 }
4512
4513 /**
4514 * Moves a single object to the CPU read, and possibly write domain.
4515 *
4516 * This function returns when the move is complete, including waiting on
4517 * flushes to occur.
4518 */
4519 int
4520 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4521 {
4522 uint32_t old_write_domain, old_read_domains;
4523 int ret;
4524
4525 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4526 return 0;
4527
4528 ret = i915_gem_object_wait_rendering(obj, !write);
4529 if (ret)
4530 return ret;
4531
4532 i915_gem_object_flush_gtt_write_domain(obj);
4533
4534 old_write_domain = obj->base.write_domain;
4535 old_read_domains = obj->base.read_domains;
4536
4537 /* Flush the CPU cache if it's still invalid. */
4538 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4539 i915_gem_clflush_object(obj, false);
4540
4541 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4542 }
4543
4544 /* It should now be out of any other write domains, and we can update
4545 * the domain values for our changes.
4546 */
4547 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4548
4549 /* If we're writing through the CPU, then the GPU read domains will
4550 * need to be invalidated at next use.
4551 */
4552 if (write) {
4553 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4554 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4555 }
4556
4557 trace_i915_gem_object_change_domain(obj,
4558 old_read_domains,
4559 old_write_domain);
4560
4561 return 0;
4562 }
4563
4564 /* Throttle our rendering by waiting until the ring has completed our requests
4565 * emitted over 20 msec ago.
4566 *
4567 * Note that if we were to use the current jiffies each time around the loop,
4568 * we wouldn't escape the function with any frames outstanding if the time to
4569 * render a frame was over 20ms.
4570 *
4571 * This should get us reasonable parallelism between CPU and GPU but also
4572 * relatively low latency when blocking on a particular request to finish.
4573 */
4574 static int
4575 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4576 {
4577 struct drm_i915_private *dev_priv = dev->dev_private;
4578 struct drm_i915_file_private *file_priv = file->driver_priv;
4579 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
4580 struct drm_i915_gem_request *request;
4581 struct intel_ring_buffer *ring = NULL;
4582 unsigned reset_counter;
4583 u32 seqno = 0;
4584 int ret;
4585
4586 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4587 if (ret)
4588 return ret;
4589
4590 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4591 if (ret)
4592 return ret;
4593
4594 spin_lock(&file_priv->mm.lock);
4595 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4596 if (time_after_eq(request->emitted_jiffies, recent_enough))
4597 break;
4598
4599 ring = request->ring;
4600 seqno = request->seqno;
4601 }
4602 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
4603 spin_unlock(&file_priv->mm.lock);
4604
4605 if (seqno == 0)
4606 return 0;
4607
4608 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
4609 if (ret == 0)
4610 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4611
4612 return ret;
4613 }
4614
4615 static bool
4616 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4617 {
4618 struct drm_i915_gem_object *obj = vma->obj;
4619
4620 if (alignment &&
4621 vma->node.start & (alignment - 1))
4622 return true;
4623
4624 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4625 return true;
4626
4627 if (flags & PIN_OFFSET_BIAS &&
4628 vma->node.start < (flags & PIN_OFFSET_MASK))
4629 return true;
4630
4631 return false;
4632 }
4633
4634 int
4635 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4636 struct i915_address_space *vm,
4637 uint32_t alignment,
4638 uint64_t flags)
4639 {
4640 struct i915_vma *vma;
4641 int ret;
4642
4643 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4644 return -EINVAL;
4645
4646 vma = i915_gem_obj_to_vma(obj, vm);
4647 if (vma) {
4648 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4649 return -EBUSY;
4650
4651 if (i915_vma_misplaced(vma, alignment, flags)) {
4652 WARN(vma->pin_count,
4653 "bo is already pinned with incorrect alignment:"
4654 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
4655 " obj->map_and_fenceable=%d\n",
4656 i915_gem_obj_offset(obj, vm), alignment,
4657 !!(flags & PIN_MAPPABLE),
4658 obj->map_and_fenceable);
4659 ret = i915_vma_unbind(vma);
4660 if (ret)
4661 return ret;
4662
4663 vma = NULL;
4664 }
4665 }
4666
4667 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4668 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
4669 if (IS_ERR(vma))
4670 return PTR_ERR(vma);
4671 }
4672
4673 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
4674 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
4675
4676 vma->pin_count++;
4677 if (flags & PIN_MAPPABLE)
4678 obj->pin_mappable |= true;
4679
4680 return 0;
4681 }
4682
4683 void
4684 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
4685 {
4686 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4687
4688 BUG_ON(!vma);
4689 BUG_ON(vma->pin_count == 0);
4690 BUG_ON(!i915_gem_obj_ggtt_bound(obj));
4691
4692 if (--vma->pin_count == 0)
4693 obj->pin_mappable = false;
4694 }
4695
4696 int
4697 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4698 struct drm_file *file)
4699 {
4700 struct drm_i915_gem_pin *args = data;
4701 struct drm_i915_gem_object *obj;
4702 int ret;
4703
4704 if (INTEL_INFO(dev)->gen >= 6)
4705 return -ENODEV;
4706
4707 ret = i915_mutex_lock_interruptible(dev);
4708 if (ret)
4709 return ret;
4710
4711 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4712 if (&obj->base == NULL) {
4713 ret = -ENOENT;
4714 goto unlock;
4715 }
4716
4717 if (obj->madv != I915_MADV_WILLNEED) {
4718 DRM_DEBUG("Attempting to pin a purgeable buffer\n");
4719 ret = -EFAULT;
4720 goto out;
4721 }
4722
4723 if (obj->pin_filp != NULL && obj->pin_filp != file) {
4724 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n",
4725 args->handle);
4726 ret = -EINVAL;
4727 goto out;
4728 }
4729
4730 if (obj->user_pin_count == ULONG_MAX) {
4731 ret = -EBUSY;
4732 goto out;
4733 }
4734
4735 if (obj->user_pin_count == 0) {
4736 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
4737 if (ret)
4738 goto out;
4739 }
4740
4741 obj->user_pin_count++;
4742 obj->pin_filp = file;
4743
4744 args->offset = i915_gem_obj_ggtt_offset(obj);
4745 out:
4746 drm_gem_object_unreference(&obj->base);
4747 unlock:
4748 mutex_unlock(&dev->struct_mutex);
4749 return ret;
4750 }
4751
4752 int
4753 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4754 struct drm_file *file)
4755 {
4756 struct drm_i915_gem_pin *args = data;
4757 struct drm_i915_gem_object *obj;
4758 int ret;
4759
4760 ret = i915_mutex_lock_interruptible(dev);
4761 if (ret)
4762 return ret;
4763
4764 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4765 if (&obj->base == NULL) {
4766 ret = -ENOENT;
4767 goto unlock;
4768 }
4769
4770 if (obj->pin_filp != file) {
4771 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4772 args->handle);
4773 ret = -EINVAL;
4774 goto out;
4775 }
4776 obj->user_pin_count--;
4777 if (obj->user_pin_count == 0) {
4778 obj->pin_filp = NULL;
4779 i915_gem_object_ggtt_unpin(obj);
4780 }
4781
4782 out:
4783 drm_gem_object_unreference(&obj->base);
4784 unlock:
4785 mutex_unlock(&dev->struct_mutex);
4786 return ret;
4787 }
4788
4789 int
4790 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4791 struct drm_file *file)
4792 {
4793 struct drm_i915_gem_busy *args = data;
4794 struct drm_i915_gem_object *obj;
4795 int ret;
4796
4797 ret = i915_mutex_lock_interruptible(dev);
4798 if (ret)
4799 return ret;
4800
4801 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4802 if (&obj->base == NULL) {
4803 ret = -ENOENT;
4804 goto unlock;
4805 }
4806
4807 /* Count all active objects as busy, even if they are currently not used
4808 * by the gpu. Users of this interface expect objects to eventually
4809 * become non-busy without any further actions, therefore emit any
4810 * necessary flushes here.
4811 */
4812 ret = i915_gem_object_flush_active(obj);
4813
4814 args->busy = obj->active;
4815 if (obj->ring) {
4816 BUILD_BUG_ON(I915_NUM_RINGS > 16);
4817 args->busy |= intel_ring_flag(obj->ring) << 16;
4818 }
4819
4820 drm_gem_object_unreference(&obj->base);
4821 unlock:
4822 mutex_unlock(&dev->struct_mutex);
4823 return ret;
4824 }
4825
4826 int
4827 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4828 struct drm_file *file_priv)
4829 {
4830 return i915_gem_ring_throttle(dev, file_priv);
4831 }
4832
4833 int
4834 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4835 struct drm_file *file_priv)
4836 {
4837 struct drm_i915_gem_madvise *args = data;
4838 struct drm_i915_gem_object *obj;
4839 int ret;
4840
4841 switch (args->madv) {
4842 case I915_MADV_DONTNEED:
4843 case I915_MADV_WILLNEED:
4844 break;
4845 default:
4846 return -EINVAL;
4847 }
4848
4849 ret = i915_mutex_lock_interruptible(dev);
4850 if (ret)
4851 return ret;
4852
4853 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4854 if (&obj->base == NULL) {
4855 ret = -ENOENT;
4856 goto unlock;
4857 }
4858
4859 if (i915_gem_obj_is_pinned(obj)) {
4860 ret = -EINVAL;
4861 goto out;
4862 }
4863
4864 if (obj->madv != __I915_MADV_PURGED)
4865 obj->madv = args->madv;
4866
4867 /* if the object is no longer attached, discard its backing storage */
4868 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
4869 i915_gem_object_truncate(obj);
4870
4871 args->retained = obj->madv != __I915_MADV_PURGED;
4872
4873 out:
4874 drm_gem_object_unreference(&obj->base);
4875 unlock:
4876 mutex_unlock(&dev->struct_mutex);
4877 return ret;
4878 }
4879
4880 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4881 const struct drm_i915_gem_object_ops *ops)
4882 {
4883 INIT_LIST_HEAD(&obj->global_list);
4884 INIT_LIST_HEAD(&obj->ring_list);
4885 INIT_LIST_HEAD(&obj->obj_exec_link);
4886 INIT_LIST_HEAD(&obj->vma_list);
4887
4888 obj->ops = ops;
4889
4890 obj->fence_reg = I915_FENCE_REG_NONE;
4891 obj->madv = I915_MADV_WILLNEED;
4892 /* Avoid an unnecessary call to unbind on the first bind. */
4893 obj->map_and_fenceable = true;
4894
4895 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4896 }
4897
4898 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4899 .get_pages = i915_gem_object_get_pages_gtt,
4900 .put_pages = i915_gem_object_put_pages_gtt,
4901 };
4902
4903 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4904 size_t size)
4905 {
4906 #ifdef __NetBSD__
4907 struct drm_i915_private *const dev_priv = dev->dev_private;
4908 #endif
4909 struct drm_i915_gem_object *obj;
4910 #ifndef __NetBSD__
4911 struct address_space *mapping;
4912 gfp_t mask;
4913 #endif
4914
4915 obj = i915_gem_object_alloc(dev);
4916 if (obj == NULL)
4917 return NULL;
4918
4919 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4920 i915_gem_object_free(obj);
4921 return NULL;
4922 }
4923
4924 #ifdef __NetBSD__
4925 uao_set_pgfl(obj->base.gemo_shm_uao, dev_priv->gtt.pgfl);
4926 #else
4927 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4928 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4929 /* 965gm cannot relocate objects above 4GiB. */
4930 mask &= ~__GFP_HIGHMEM;
4931 mask |= __GFP_DMA32;
4932 }
4933
4934 mapping = file_inode(obj->base.filp)->i_mapping;
4935 mapping_set_gfp_mask(mapping, mask);
4936 #endif
4937
4938 i915_gem_object_init(obj, &i915_gem_object_ops);
4939
4940 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4941 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4942
4943 if (HAS_LLC(dev)) {
4944 /* On some devices, we can have the GPU use the LLC (the CPU
4945 * cache) for about a 10% performance improvement
4946 * compared to uncached. Graphics requests other than
4947 * display scanout are coherent with the CPU in
4948 * accessing this cache. This means in this mode we
4949 * don't need to clflush on the CPU side, and on the
4950 * GPU side we only need to flush internal caches to
4951 * get data visible to the CPU.
4952 *
4953 * However, we maintain the display planes as UC, and so
4954 * need to rebind when first used as such.
4955 */
4956 obj->cache_level = I915_CACHE_LLC;
4957 } else
4958 obj->cache_level = I915_CACHE_NONE;
4959
4960 trace_i915_gem_object_create(obj);
4961
4962 return obj;
4963 }
4964
4965 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4966 {
4967 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4968 struct drm_device *dev = obj->base.dev;
4969 struct drm_i915_private *dev_priv = dev->dev_private;
4970 struct i915_vma *vma, *next;
4971
4972 intel_runtime_pm_get(dev_priv);
4973
4974 trace_i915_gem_object_destroy(obj);
4975
4976 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4977 int ret;
4978
4979 vma->pin_count = 0;
4980 ret = i915_vma_unbind(vma);
4981 if (WARN_ON(ret == -ERESTARTSYS)) {
4982 bool was_interruptible;
4983
4984 was_interruptible = dev_priv->mm.interruptible;
4985 dev_priv->mm.interruptible = false;
4986
4987 WARN_ON(i915_vma_unbind(vma));
4988
4989 dev_priv->mm.interruptible = was_interruptible;
4990 }
4991 }
4992
4993 i915_gem_object_detach_phys(obj);
4994
4995 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4996 * before progressing. */
4997 if (obj->stolen)
4998 i915_gem_object_unpin_pages(obj);
4999
5000 if (WARN_ON(obj->pages_pin_count))
5001 obj->pages_pin_count = 0;
5002 i915_gem_object_put_pages(obj);
5003 i915_gem_object_free_mmap_offset(obj);
5004 i915_gem_object_release_stolen(obj);
5005
5006 BUG_ON(obj->pages);
5007
5008 #ifndef __NetBSD__ /* XXX drm prime */
5009 if (obj->base.import_attach)
5010 drm_prime_gem_destroy(&obj->base, NULL);
5011 #endif
5012
5013 drm_gem_object_release(&obj->base);
5014 i915_gem_info_remove_obj(dev_priv, obj->base.size);
5015
5016 kfree(obj->bit_17);
5017 i915_gem_object_free(obj);
5018
5019 intel_runtime_pm_put(dev_priv);
5020 }
5021
5022 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
5023 struct i915_address_space *vm)
5024 {
5025 struct i915_vma *vma;
5026 list_for_each_entry(vma, &obj->vma_list, vma_link)
5027 if (vma->vm == vm)
5028 return vma;
5029
5030 return NULL;
5031 }
5032
5033 void i915_gem_vma_destroy(struct i915_vma *vma)
5034 {
5035 WARN_ON(vma->node.allocated);
5036
5037 /* Keep the vma as a placeholder in the execbuffer reservation lists */
5038 if (!list_empty(&vma->exec_list))
5039 return;
5040
5041 list_del(&vma->vma_link);
5042
5043 kfree(vma);
5044 }
5045
5046 int
5047 i915_gem_suspend(struct drm_device *dev)
5048 {
5049 struct drm_i915_private *dev_priv = dev->dev_private;
5050 int ret = 0;
5051
5052 mutex_lock(&dev->struct_mutex);
5053 if (dev_priv->ums.mm_suspended)
5054 goto err;
5055
5056 ret = i915_gpu_idle(dev);
5057 if (ret)
5058 goto err;
5059
5060 i915_gem_retire_requests(dev);
5061
5062 /* Under UMS, be paranoid and evict. */
5063 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5064 i915_gem_evict_everything(dev);
5065
5066 i915_kernel_lost_context(dev);
5067 i915_gem_cleanup_ringbuffer(dev);
5068
5069 /* Hack! Don't let anybody do execbuf while we don't control the chip.
5070 * We need to replace this with a semaphore, or something.
5071 * And not confound ums.mm_suspended!
5072 */
5073 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev,
5074 DRIVER_MODESET);
5075 mutex_unlock(&dev->struct_mutex);
5076
5077 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
5078 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5079 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
5080
5081 return 0;
5082
5083 err:
5084 mutex_unlock(&dev->struct_mutex);
5085 return ret;
5086 }
5087
5088 int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
5089 {
5090 struct drm_device *dev = ring->dev;
5091 struct drm_i915_private *dev_priv = dev->dev_private;
5092 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
5093 u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
5094 int i, ret;
5095
5096 if (!HAS_L3_DPF(dev) || !remap_info)
5097 return 0;
5098
5099 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
5100 if (ret)
5101 return ret;
5102
5103 /*
5104 * Note: We do not worry about the concurrent register cacheline hang
5105 * here because no other code should access these registers other than
5106 * at initialization time.
5107 */
5108 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
5109 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
5110 intel_ring_emit(ring, reg_base + i);
5111 intel_ring_emit(ring, remap_info[i/4]);
5112 }
5113
5114 intel_ring_advance(ring);
5115
5116 return ret;
5117 }
5118
5119 void i915_gem_init_swizzling(struct drm_device *dev)
5120 {
5121 struct drm_i915_private *dev_priv = dev->dev_private;
5122
5123 if (INTEL_INFO(dev)->gen < 5 ||
5124 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5125 return;
5126
5127 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5128 DISP_TILE_SURFACE_SWIZZLING);
5129
5130 if (IS_GEN5(dev))
5131 return;
5132
5133 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5134 if (IS_GEN6(dev))
5135 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5136 else if (IS_GEN7(dev))
5137 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5138 else if (IS_GEN8(dev))
5139 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5140 else
5141 BUG();
5142 }
5143
5144 static bool
5145 intel_enable_blt(struct drm_device *dev)
5146 {
5147 if (!HAS_BLT(dev))
5148 return false;
5149
5150 /* The blitter was dysfunctional on early prototypes */
5151 if (IS_GEN6(dev) && dev->pdev->revision < 8) {
5152 DRM_INFO("BLT not supported on this pre-production hardware;"
5153 " graphics performance will be degraded.\n");
5154 return false;
5155 }
5156
5157 return true;
5158 }
5159
5160 static int i915_gem_init_rings(struct drm_device *dev)
5161 {
5162 struct drm_i915_private *dev_priv = dev->dev_private;
5163 int ret;
5164
5165 ret = intel_init_render_ring_buffer(dev);
5166 if (ret)
5167 return ret;
5168
5169 if (HAS_BSD(dev)) {
5170 ret = intel_init_bsd_ring_buffer(dev);
5171 if (ret)
5172 goto cleanup_render_ring;
5173 }
5174
5175 if (intel_enable_blt(dev)) {
5176 ret = intel_init_blt_ring_buffer(dev);
5177 if (ret)
5178 goto cleanup_bsd_ring;
5179 }
5180
5181 if (HAS_VEBOX(dev)) {
5182 ret = intel_init_vebox_ring_buffer(dev);
5183 if (ret)
5184 goto cleanup_blt_ring;
5185 }
5186
5187
5188 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
5189 if (ret)
5190 goto cleanup_vebox_ring;
5191
5192 return 0;
5193
5194 cleanup_vebox_ring:
5195 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
5196 cleanup_blt_ring:
5197 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
5198 cleanup_bsd_ring:
5199 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
5200 cleanup_render_ring:
5201 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
5202
5203 return ret;
5204 }
5205
5206 int
5207 i915_gem_init_hw(struct drm_device *dev)
5208 {
5209 struct drm_i915_private *dev_priv = dev->dev_private;
5210 int ret, i;
5211
5212 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
5213 return -EIO;
5214
5215 if (dev_priv->ellc_size)
5216 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5217
5218 if (IS_HASWELL(dev))
5219 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
5220 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5221
5222 if (HAS_PCH_NOP(dev)) {
5223 if (IS_IVYBRIDGE(dev)) {
5224 u32 temp = I915_READ(GEN7_MSG_CTL);
5225 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5226 I915_WRITE(GEN7_MSG_CTL, temp);
5227 } else if (INTEL_INFO(dev)->gen >= 7) {
5228 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5229 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5230 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5231 }
5232 }
5233
5234 i915_gem_init_swizzling(dev);
5235
5236 ret = i915_gem_init_rings(dev);
5237 if (ret)
5238 return ret;
5239
5240 for (i = 0; i < NUM_L3_SLICES(dev); i++)
5241 i915_gem_l3_remap(&dev_priv->ring[RCS], i);
5242
5243 /*
5244 * XXX: Contexts should only be initialized once. Doing a switch to the
5245 * default context switch however is something we'd like to do after
5246 * reset or thaw (the latter may not actually be necessary for HW, but
5247 * goes with our code better). Context switching requires rings (for
5248 * the do_switch), but before enabling PPGTT. So don't move this.
5249 */
5250 ret = i915_gem_context_enable(dev_priv);
5251 if (ret) {
5252 DRM_ERROR("Context enable failed %d\n", ret);
5253 goto err_out;
5254 }
5255
5256 return 0;
5257
5258 err_out:
5259 i915_gem_cleanup_ringbuffer(dev);
5260 return ret;
5261 }
5262
5263 int i915_gem_init(struct drm_device *dev)
5264 {
5265 struct drm_i915_private *dev_priv = dev->dev_private;
5266 int ret;
5267
5268 mutex_lock(&dev->struct_mutex);
5269
5270 if (IS_VALLEYVIEW(dev)) {
5271 /* VLVA0 (potential hack), BIOS isn't actually waking us */
5272 I915_WRITE(VLV_GTLC_WAKE_CTRL, 1);
5273 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10))
5274 DRM_DEBUG_DRIVER("allow wake ack timed out\n");
5275 }
5276 i915_gem_init_global_gtt(dev);
5277
5278 ret = i915_gem_context_init(dev);
5279 if (ret) {
5280 mutex_unlock(&dev->struct_mutex);
5281 return ret;
5282 }
5283
5284 ret = i915_gem_init_hw(dev);
5285 mutex_unlock(&dev->struct_mutex);
5286 if (ret) {
5287 WARN_ON(dev_priv->mm.aliasing_ppgtt);
5288 i915_gem_context_fini(dev);
5289 drm_mm_takedown(&dev_priv->gtt.base.mm);
5290 return ret;
5291 }
5292
5293 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
5294 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5295 dev_priv->dri1.allow_batchbuffer = 1;
5296 return 0;
5297 }
5298
5299 void
5300 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
5301 {
5302 struct drm_i915_private *dev_priv = dev->dev_private;
5303 struct intel_ring_buffer *ring;
5304 int i;
5305
5306 for_each_ring(ring, dev_priv, i)
5307 intel_cleanup_ring_buffer(ring);
5308 }
5309
5310 int
5311 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
5312 struct drm_file *file_priv)
5313 {
5314 struct drm_i915_private *dev_priv = dev->dev_private;
5315 int ret;
5316
5317 if (drm_core_check_feature(dev, DRIVER_MODESET))
5318 return 0;
5319
5320 if (i915_reset_in_progress(&dev_priv->gpu_error)) {
5321 DRM_ERROR("Reenabling wedged hardware, good luck\n");
5322 atomic_set(&dev_priv->gpu_error.reset_counter, 0);
5323 }
5324
5325 mutex_lock(&dev->struct_mutex);
5326 dev_priv->ums.mm_suspended = 0;
5327
5328 ret = i915_gem_init_hw(dev);
5329 if (ret != 0) {
5330 mutex_unlock(&dev->struct_mutex);
5331 return ret;
5332 }
5333
5334 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
5335 mutex_unlock(&dev->struct_mutex);
5336
5337 ret = drm_irq_install(dev);
5338 if (ret)
5339 goto cleanup_ringbuffer;
5340
5341 return 0;
5342
5343 cleanup_ringbuffer:
5344 mutex_lock(&dev->struct_mutex);
5345 i915_gem_cleanup_ringbuffer(dev);
5346 dev_priv->ums.mm_suspended = 1;
5347 mutex_unlock(&dev->struct_mutex);
5348
5349 return ret;
5350 }
5351
5352 int
5353 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
5354 struct drm_file *file_priv)
5355 {
5356 if (drm_core_check_feature(dev, DRIVER_MODESET))
5357 return 0;
5358
5359 drm_irq_uninstall(dev);
5360
5361 return i915_gem_suspend(dev);
5362 }
5363
5364 void
5365 i915_gem_lastclose(struct drm_device *dev)
5366 {
5367 int ret;
5368
5369 if (drm_core_check_feature(dev, DRIVER_MODESET))
5370 return;
5371
5372 ret = i915_gem_suspend(dev);
5373 if (ret)
5374 DRM_ERROR("failed to idle hardware: %d\n", ret);
5375 }
5376
5377 static void
5378 init_ring_lists(struct intel_ring_buffer *ring)
5379 {
5380 INIT_LIST_HEAD(&ring->active_list);
5381 INIT_LIST_HEAD(&ring->request_list);
5382 }
5383
5384 void i915_init_vm(struct drm_i915_private *dev_priv,
5385 struct i915_address_space *vm)
5386 {
5387 if (!i915_is_ggtt(vm))
5388 drm_mm_init(&vm->mm, vm->start, vm->total);
5389 vm->dev = dev_priv->dev;
5390 INIT_LIST_HEAD(&vm->active_list);
5391 INIT_LIST_HEAD(&vm->inactive_list);
5392 INIT_LIST_HEAD(&vm->global_link);
5393 list_add_tail(&vm->global_link, &dev_priv->vm_list);
5394 }
5395
5396 void
5397 i915_gem_load(struct drm_device *dev)
5398 {
5399 struct drm_i915_private *dev_priv = dev->dev_private;
5400 int i;
5401
5402 dev_priv->slab =
5403 kmem_cache_create("i915_gem_object",
5404 sizeof(struct drm_i915_gem_object), 0,
5405 SLAB_HWCACHE_ALIGN,
5406 NULL);
5407
5408 INIT_LIST_HEAD(&dev_priv->vm_list);
5409 i915_init_vm(dev_priv, &dev_priv->gtt.base);
5410
5411 INIT_LIST_HEAD(&dev_priv->context_list);
5412 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5413 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5414 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5415 for (i = 0; i < I915_NUM_RINGS; i++)
5416 init_ring_lists(&dev_priv->ring[i]);
5417 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5418 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5419 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5420 i915_gem_retire_work_handler);
5421 INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5422 i915_gem_idle_work_handler);
5423 #ifdef __NetBSD__
5424 spin_lock_init(&dev_priv->gpu_error.reset_lock);
5425 DRM_INIT_WAITQUEUE(&dev_priv->gpu_error.reset_queue, "i915errst");
5426 #else
5427 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5428 #endif
5429
5430 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
5431 if (IS_GEN3(dev)) {
5432 I915_WRITE(MI_ARB_STATE,
5433 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
5434 }
5435
5436 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5437
5438 /* Old X drivers will take 0-2 for front, back, depth buffers */
5439 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5440 dev_priv->fence_reg_start = 3;
5441
5442 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
5443 dev_priv->num_fence_regs = 32;
5444 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
5445 dev_priv->num_fence_regs = 16;
5446 else
5447 dev_priv->num_fence_regs = 8;
5448
5449 /* Initialize fence registers to zero */
5450 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5451 i915_gem_restore_fences(dev);
5452
5453 i915_gem_detect_bit_6_swizzle(dev);
5454 #ifdef __NetBSD__
5455 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip");
5456 spin_lock_init(&dev_priv->pending_flip_lock);
5457 #else
5458 init_waitqueue_head(&dev_priv->pending_flip_queue);
5459 #endif
5460
5461 dev_priv->mm.interruptible = true;
5462
5463 dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
5464 dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
5465 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
5466 register_shrinker(&dev_priv->mm.inactive_shrinker);
5467 }
5468
5469 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5470 {
5471 struct drm_i915_file_private *file_priv = file->driver_priv;
5472
5473 cancel_delayed_work_sync(&file_priv->mm.idle_work);
5474
5475 /* Clean up our request list when the client is going away, so that
5476 * later retire_requests won't dereference our soon-to-be-gone
5477 * file_priv.
5478 */
5479 spin_lock(&file_priv->mm.lock);
5480 while (!list_empty(&file_priv->mm.request_list)) {
5481 struct drm_i915_gem_request *request;
5482
5483 request = list_first_entry(&file_priv->mm.request_list,
5484 struct drm_i915_gem_request,
5485 client_list);
5486 list_del(&request->client_list);
5487 request->file_priv = NULL;
5488 }
5489 spin_unlock(&file_priv->mm.lock);
5490 }
5491
5492 static void
5493 i915_gem_file_idle_work_handler(struct work_struct *work)
5494 {
5495 struct drm_i915_file_private *file_priv =
5496 container_of(work, typeof(*file_priv), mm.idle_work.work);
5497
5498 atomic_set(&file_priv->rps_wait_boost, false);
5499 }
5500
5501 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5502 {
5503 struct drm_i915_file_private *file_priv;
5504 int ret;
5505
5506 DRM_DEBUG_DRIVER("\n");
5507
5508 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5509 if (!file_priv)
5510 return -ENOMEM;
5511
5512 file->driver_priv = file_priv;
5513 file_priv->dev_priv = dev->dev_private;
5514 file_priv->file = file;
5515
5516 spin_lock_init(&file_priv->mm.lock);
5517 INIT_LIST_HEAD(&file_priv->mm.request_list);
5518 INIT_DELAYED_WORK(&file_priv->mm.idle_work,
5519 i915_gem_file_idle_work_handler);
5520
5521 ret = i915_gem_context_open(dev, file);
5522 if (ret)
5523 kfree(file_priv);
5524
5525 return ret;
5526 }
5527
5528 #ifndef __NetBSD__
5529 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
5530 {
5531 if (!mutex_is_locked(mutex))
5532 return false;
5533
5534 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
5535 return mutex->owner == task;
5536 #else
5537 /* Since UP may be pre-empted, we cannot assume that we own the lock */
5538 return false;
5539 #endif
5540 }
5541 #endif
5542
5543 static unsigned long
5544 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
5545 {
5546 #ifdef __NetBSD__ /* XXX shrinkers */
5547 return 0;
5548 #else
5549 struct drm_i915_private *dev_priv =
5550 container_of(shrinker,
5551 struct drm_i915_private,
5552 mm.inactive_shrinker);
5553 struct drm_device *dev = dev_priv->dev;
5554 struct drm_i915_gem_object *obj;
5555 bool unlock = true;
5556 unsigned long count;
5557
5558 if (!mutex_trylock(&dev->struct_mutex)) {
5559 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5560 return 0;
5561
5562 if (dev_priv->mm.shrinker_no_lock_stealing)
5563 return 0;
5564
5565 unlock = false;
5566 }
5567
5568 count = 0;
5569 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
5570 if (obj->pages_pin_count == 0)
5571 count += obj->base.size >> PAGE_SHIFT;
5572
5573 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
5574 if (obj->active)
5575 continue;
5576
5577 if (!i915_gem_obj_is_pinned(obj) && obj->pages_pin_count == 0)
5578 count += obj->base.size >> PAGE_SHIFT;
5579 }
5580
5581 if (unlock)
5582 mutex_unlock(&dev->struct_mutex);
5583
5584 return count;
5585 #endif
5586 }
5587
5588 /* All the new VM stuff */
5589 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
5590 struct i915_address_space *vm)
5591 {
5592 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5593 struct i915_vma *vma;
5594
5595 if (!dev_priv->mm.aliasing_ppgtt ||
5596 vm == &dev_priv->mm.aliasing_ppgtt->base)
5597 vm = &dev_priv->gtt.base;
5598
5599 BUG_ON(list_empty(&o->vma_list));
5600 list_for_each_entry(vma, &o->vma_list, vma_link) {
5601 if (vma->vm == vm)
5602 return vma->node.start;
5603
5604 }
5605 return -1;
5606 }
5607
5608 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5609 struct i915_address_space *vm)
5610 {
5611 struct i915_vma *vma;
5612
5613 list_for_each_entry(vma, &o->vma_list, vma_link)
5614 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5615 return true;
5616
5617 return false;
5618 }
5619
5620 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5621 {
5622 struct i915_vma *vma;
5623
5624 list_for_each_entry(vma, &o->vma_list, vma_link)
5625 if (drm_mm_node_allocated(&vma->node))
5626 return true;
5627
5628 return false;
5629 }
5630
5631 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5632 struct i915_address_space *vm)
5633 {
5634 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5635 struct i915_vma *vma;
5636
5637 if (!dev_priv->mm.aliasing_ppgtt ||
5638 vm == &dev_priv->mm.aliasing_ppgtt->base)
5639 vm = &dev_priv->gtt.base;
5640
5641 BUG_ON(list_empty(&o->vma_list));
5642
5643 list_for_each_entry(vma, &o->vma_list, vma_link)
5644 if (vma->vm == vm)
5645 return vma->node.size;
5646
5647 return 0;
5648 }
5649
5650 static unsigned long
5651 i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
5652 {
5653 #ifdef __NetBSD__ /* XXX shrinkers */
5654 return 0;
5655 #else
5656 struct drm_i915_private *dev_priv =
5657 container_of(shrinker,
5658 struct drm_i915_private,
5659 mm.inactive_shrinker);
5660 struct drm_device *dev = dev_priv->dev;
5661 unsigned long freed;
5662 bool unlock = true;
5663
5664 if (!mutex_trylock(&dev->struct_mutex)) {
5665 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5666 return SHRINK_STOP;
5667
5668 if (dev_priv->mm.shrinker_no_lock_stealing)
5669 return SHRINK_STOP;
5670
5671 unlock = false;
5672 }
5673
5674 freed = i915_gem_purge(dev_priv, sc->nr_to_scan);
5675 if (freed < sc->nr_to_scan)
5676 freed += __i915_gem_shrink(dev_priv,
5677 sc->nr_to_scan - freed,
5678 false);
5679 if (freed < sc->nr_to_scan)
5680 freed += i915_gem_shrink_all(dev_priv);
5681
5682 if (unlock)
5683 mutex_unlock(&dev->struct_mutex);
5684
5685 return freed;
5686 #endif
5687 }
5688
5689 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
5690 {
5691 struct i915_vma *vma;
5692
5693 if (WARN_ON(list_empty(&obj->vma_list)))
5694 return NULL;
5695
5696 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
5697 if (vma->vm != obj_to_ggtt(obj))
5698 return NULL;
5699
5700 return vma;
5701 }
5702