i915_gem.c revision 1.18 1 /*
2 * Copyright 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric (at) anholt.net>
25 *
26 */
27
28 #ifdef __NetBSD__
29 #if 0 /* XXX uvmhist option? */
30 #include "opt_uvmhist.h"
31 #endif
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35
36 #include <uvm/uvm.h>
37 #include <uvm/uvm_extern.h>
38 #include <uvm/uvm_fault.h>
39 #include <uvm/uvm_page.h>
40 #include <uvm/uvm_pmap.h>
41 #include <uvm/uvm_prot.h>
42
43 #include <drm/bus_dma_hacks.h>
44 #endif
45
46 #include <drm/drmP.h>
47 #include <drm/drm_vma_manager.h>
48 #include <drm/i915_drm.h>
49 #include "i915_drv.h"
50 #include "i915_trace.h"
51 #include "intel_drv.h"
52 #include <linux/shmem_fs.h>
53 #include <linux/slab.h>
54 #include <linux/swap.h>
55 #include <linux/pci.h>
56 #include <linux/dma-buf.h>
57 #include <linux/errno.h>
58 #include <linux/time.h>
59 #include <linux/err.h>
60 #include <linux/bitops.h>
61 #include <linux/printk.h>
62 #include <asm/param.h>
63 #include <asm/page.h>
64
65 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
66 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
67 bool force);
68 static __must_check int
69 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
70 bool readonly);
71
72 static void i915_gem_write_fence(struct drm_device *dev, int reg,
73 struct drm_i915_gem_object *obj);
74 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
75 struct drm_i915_fence_reg *fence,
76 bool enable);
77
78 static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
79 struct shrink_control *sc);
80 static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
81 struct shrink_control *sc);
82 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
83 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
84 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
85 static void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
86
87 static bool cpu_cache_is_coherent(struct drm_device *dev,
88 enum i915_cache_level level)
89 {
90 return HAS_LLC(dev) || level != I915_CACHE_NONE;
91 }
92
93 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
94 {
95 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
96 return true;
97
98 return obj->pin_display;
99 }
100
101 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
102 {
103 if (obj->tiling_mode)
104 i915_gem_release_mmap(obj);
105
106 /* As we do not have an associated fence register, we will force
107 * a tiling change if we ever need to acquire one.
108 */
109 obj->fence_dirty = false;
110 obj->fence_reg = I915_FENCE_REG_NONE;
111 }
112
113 /* some bookkeeping */
114 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
115 size_t size)
116 {
117 spin_lock(&dev_priv->mm.object_stat_lock);
118 dev_priv->mm.object_count++;
119 dev_priv->mm.object_memory += size;
120 spin_unlock(&dev_priv->mm.object_stat_lock);
121 }
122
123 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
124 size_t size)
125 {
126 spin_lock(&dev_priv->mm.object_stat_lock);
127 dev_priv->mm.object_count--;
128 dev_priv->mm.object_memory -= size;
129 spin_unlock(&dev_priv->mm.object_stat_lock);
130 }
131
132 static int
133 i915_gem_wait_for_error(struct i915_gpu_error *error)
134 {
135 int ret;
136
137 #define EXIT_COND (!i915_reset_in_progress(error) || \
138 i915_terminally_wedged(error))
139 if (EXIT_COND)
140 return 0;
141
142 /*
143 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
144 * userspace. If it takes that long something really bad is going on and
145 * we should simply try to bail out and fail as gracefully as possible.
146 */
147 #ifdef __NetBSD__
148 spin_lock(&error->reset_lock);
149 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &error->reset_queue, &error->reset_lock,
150 10*HZ, EXIT_COND);
151 spin_unlock(&error->reset_lock);
152 #else
153 ret = wait_event_interruptible_timeout(error->reset_queue,
154 EXIT_COND,
155 10*HZ);
156 #endif
157 if (ret == 0) {
158 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
159 return -EIO;
160 } else if (ret < 0) {
161 return ret;
162 }
163 #undef EXIT_COND
164
165 return 0;
166 }
167
168 int i915_mutex_lock_interruptible(struct drm_device *dev)
169 {
170 struct drm_i915_private *dev_priv = dev->dev_private;
171 int ret;
172
173 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
174 if (ret)
175 return ret;
176
177 ret = mutex_lock_interruptible(&dev->struct_mutex);
178 if (ret)
179 return ret;
180
181 WARN_ON(i915_verify_lists(dev));
182 return 0;
183 }
184
185 static inline bool
186 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
187 {
188 return i915_gem_obj_bound_any(obj) && !obj->active;
189 }
190
191 int
192 i915_gem_init_ioctl(struct drm_device *dev, void *data,
193 struct drm_file *file)
194 {
195 struct drm_i915_private *dev_priv = dev->dev_private;
196 struct drm_i915_gem_init *args = data;
197
198 if (drm_core_check_feature(dev, DRIVER_MODESET))
199 return -ENODEV;
200
201 if (args->gtt_start >= args->gtt_end ||
202 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
203 return -EINVAL;
204
205 /* GEM with user mode setting was never supported on ilk and later. */
206 if (INTEL_INFO(dev)->gen >= 5)
207 return -ENODEV;
208
209 mutex_lock(&dev->struct_mutex);
210 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
211 args->gtt_end);
212 dev_priv->gtt.mappable_end = args->gtt_end;
213 mutex_unlock(&dev->struct_mutex);
214
215 return 0;
216 }
217
218 int
219 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
220 struct drm_file *file)
221 {
222 struct drm_i915_private *dev_priv = dev->dev_private;
223 struct drm_i915_gem_get_aperture *args = data;
224 struct drm_i915_gem_object *obj;
225 size_t pinned;
226
227 pinned = 0;
228 mutex_lock(&dev->struct_mutex);
229 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
230 if (i915_gem_obj_is_pinned(obj))
231 pinned += i915_gem_obj_ggtt_size(obj);
232 mutex_unlock(&dev->struct_mutex);
233
234 args->aper_size = dev_priv->gtt.base.total;
235 args->aper_available_size = args->aper_size - pinned;
236
237 return 0;
238 }
239
240 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
241 {
242 drm_dma_handle_t *phys = obj->phys_handle;
243
244 if (!phys)
245 return;
246
247 if (obj->madv == I915_MADV_WILLNEED) {
248 #ifdef __NetBSD__
249 const char *vaddr = phys->vaddr;
250 unsigned i;
251
252 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
253 struct pglist pages;
254 int error;
255
256 TAILQ_INIT(&pages);
257 error = uvm_obj_wirepages(obj->base.gemo_shm_uao,
258 i*PAGE_SIZE, (i+1)*PAGE_SIZE, &pages);
259 if (error)
260 continue;
261
262 struct vm_page *const vm_page = TAILQ_FIRST(&pages);
263 struct page *const page = container_of(vm_page,
264 struct page, p_vmp);
265 char *const dst = kmap_atomic(page);
266 (void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE);
267 drm_clflush_virt_range(dst, PAGE_SIZE);
268 kunmap_atomic(dst);
269
270 vm_page->flags &= ~PG_CLEAN;
271 /* XXX mark page accessed */
272 uvm_obj_unwirepages(obj->base.gemo_shm_uao,
273 i*PAGE_SIZE, (i+1)*PAGE_SIZE);
274 }
275 #else
276 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
277 char *vaddr = phys->vaddr;
278 int i;
279
280 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
281 struct page *page = shmem_read_mapping_page(mapping, i);
282 if (!IS_ERR(page)) {
283 char *dst = kmap_atomic(page);
284 memcpy(dst, vaddr, PAGE_SIZE);
285 drm_clflush_virt_range(dst, PAGE_SIZE);
286 kunmap_atomic(dst);
287
288 set_page_dirty(page);
289 mark_page_accessed(page);
290 page_cache_release(page);
291 }
292 vaddr += PAGE_SIZE;
293 }
294 #endif
295 i915_gem_chipset_flush(obj->base.dev);
296 }
297
298 #ifndef __NetBSD__
299 #ifdef CONFIG_X86
300 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
301 #endif
302 #endif
303 drm_pci_free(obj->base.dev, phys);
304 obj->phys_handle = NULL;
305 }
306
307 int
308 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
309 int align)
310 {
311 drm_dma_handle_t *phys;
312 #ifndef __NetBSD__
313 struct address_space *mapping;
314 #endif
315 char *vaddr;
316 int i;
317
318 if (obj->phys_handle) {
319 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
320 return -EBUSY;
321
322 return 0;
323 }
324
325 if (obj->madv != I915_MADV_WILLNEED)
326 return -EFAULT;
327
328 #ifdef __NetBSD__
329 if (obj->base.gemo_shm_uao == NULL)
330 return -EINVAL;
331 #else
332 if (obj->base.filp == NULL)
333 return -EINVAL;
334 #endif
335
336 /* create a new object */
337 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
338 if (!phys)
339 return -ENOMEM;
340
341 vaddr = phys->vaddr;
342 #ifndef __NetBSD__
343 #ifdef CONFIG_X86
344 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
345 #endif
346 mapping = file_inode(obj->base.filp)->i_mapping;
347 #endif
348 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
349 struct page *page;
350 char *src;
351
352 #ifdef __NetBSD__
353 struct pglist pages;
354 int ret;
355
356 TAILQ_INIT(&pages);
357
358 /* XXX errno NetBSD->Linux */
359 ret = -uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
360 (i+1)*PAGE_SIZE, &pages);
361 if (ret) {
362 drm_pci_free(obj->base.dev, phys);
363 return ret;
364 }
365 KASSERT(!TAILQ_EMPTY(&pages));
366 page = container_of(TAILQ_FIRST(&pages), struct page, p_vmp);
367 #else
368 page = shmem_read_mapping_page(mapping, i);
369 if (IS_ERR(page)) {
370 #ifdef CONFIG_X86
371 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
372 #endif
373 drm_pci_free(obj->base.dev, phys);
374 return PTR_ERR(page);
375 }
376 #endif /* defined(__NetBSD__) */
377
378 src = kmap_atomic(page);
379 memcpy(vaddr, src, PAGE_SIZE);
380 kunmap_atomic(src);
381
382 #ifndef __NetBSD__
383 mark_page_accessed(page);
384 page_cache_release(page);
385 #endif
386
387 vaddr += PAGE_SIZE;
388 }
389
390 obj->phys_handle = phys;
391 return 0;
392 }
393
394 static int
395 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
396 struct drm_i915_gem_pwrite *args,
397 struct drm_file *file_priv)
398 {
399 struct drm_device *dev = obj->base.dev;
400 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset;
401 char __user *user_data = to_user_ptr(args->data_ptr);
402
403 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
404 unsigned long unwritten;
405
406 /* The physical object once assigned is fixed for the lifetime
407 * of the obj, so we can safely drop the lock and continue
408 * to access vaddr.
409 */
410 mutex_unlock(&dev->struct_mutex);
411 unwritten = copy_from_user(vaddr, user_data, args->size);
412 mutex_lock(&dev->struct_mutex);
413 if (unwritten)
414 return -EFAULT;
415 }
416
417 i915_gem_chipset_flush(dev);
418 return 0;
419 }
420
421 void *i915_gem_object_alloc(struct drm_device *dev)
422 {
423 struct drm_i915_private *dev_priv = dev->dev_private;
424 return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL);
425 }
426
427 void i915_gem_object_free(struct drm_i915_gem_object *obj)
428 {
429 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
430 kmem_cache_free(dev_priv->slab, obj);
431 }
432
433 static int
434 i915_gem_create(struct drm_file *file,
435 struct drm_device *dev,
436 uint64_t size,
437 uint32_t *handle_p)
438 {
439 struct drm_i915_gem_object *obj;
440 int ret;
441 u32 handle;
442
443 size = roundup(size, PAGE_SIZE);
444 if (size == 0)
445 return -EINVAL;
446
447 /* Allocate the new object */
448 obj = i915_gem_alloc_object(dev, size);
449 if (obj == NULL)
450 return -ENOMEM;
451
452 ret = drm_gem_handle_create(file, &obj->base, &handle);
453 /* drop reference from allocate - handle holds it now */
454 drm_gem_object_unreference_unlocked(&obj->base);
455 if (ret)
456 return ret;
457
458 *handle_p = handle;
459 return 0;
460 }
461
462 int
463 i915_gem_dumb_create(struct drm_file *file,
464 struct drm_device *dev,
465 struct drm_mode_create_dumb *args)
466 {
467 /* have to work out size/pitch and return them */
468 #ifdef __NetBSD__ /* ALIGN means something else. */
469 args->pitch = round_up(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
470 #else
471 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
472 #endif
473 args->size = args->pitch * args->height;
474 return i915_gem_create(file, dev,
475 args->size, &args->handle);
476 }
477
478 /**
479 * Creates a new mm object and returns a handle to it.
480 */
481 int
482 i915_gem_create_ioctl(struct drm_device *dev, void *data,
483 struct drm_file *file)
484 {
485 struct drm_i915_gem_create *args = data;
486
487 return i915_gem_create(file, dev,
488 args->size, &args->handle);
489 }
490
491 static inline int
492 __copy_to_user_swizzled(char __user *cpu_vaddr,
493 const char *gpu_vaddr, int gpu_offset,
494 int length)
495 {
496 int ret, cpu_offset = 0;
497
498 while (length > 0) {
499 #ifdef __NetBSD__ /* XXX ALIGN means something else. */
500 int cacheline_end = round_up(gpu_offset + 1, 64);
501 #else
502 int cacheline_end = ALIGN(gpu_offset + 1, 64);
503 #endif
504 int this_length = min(cacheline_end - gpu_offset, length);
505 int swizzled_gpu_offset = gpu_offset ^ 64;
506
507 ret = __copy_to_user(cpu_vaddr + cpu_offset,
508 gpu_vaddr + swizzled_gpu_offset,
509 this_length);
510 if (ret)
511 return ret + length;
512
513 cpu_offset += this_length;
514 gpu_offset += this_length;
515 length -= this_length;
516 }
517
518 return 0;
519 }
520
521 static inline int
522 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
523 const char __user *cpu_vaddr,
524 int length)
525 {
526 int ret, cpu_offset = 0;
527
528 while (length > 0) {
529 #ifdef __NetBSD__ /* XXX ALIGN means something else. */
530 int cacheline_end = round_up(gpu_offset + 1, 64);
531 #else
532 int cacheline_end = ALIGN(gpu_offset + 1, 64);
533 #endif
534 int this_length = min(cacheline_end - gpu_offset, length);
535 int swizzled_gpu_offset = gpu_offset ^ 64;
536
537 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
538 cpu_vaddr + cpu_offset,
539 this_length);
540 if (ret)
541 return ret + length;
542
543 cpu_offset += this_length;
544 gpu_offset += this_length;
545 length -= this_length;
546 }
547
548 return 0;
549 }
550
551 /*
552 * Pins the specified object's pages and synchronizes the object with
553 * GPU accesses. Sets needs_clflush to non-zero if the caller should
554 * flush the object from the CPU cache.
555 */
556 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
557 int *needs_clflush)
558 {
559 int ret;
560
561 *needs_clflush = 0;
562
563 #ifdef __NetBSD__
564 if (obj->base.gemo_shm_uao == NULL)
565 return -EINVAL;
566 #else
567 if (!obj->base.filp)
568 return -EINVAL;
569 #endif
570
571 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
572 /* If we're not in the cpu read domain, set ourself into the gtt
573 * read domain and manually flush cachelines (if required). This
574 * optimizes for the case when the gpu will dirty the data
575 * anyway again before the next pread happens. */
576 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
577 obj->cache_level);
578 ret = i915_gem_object_wait_rendering(obj, true);
579 if (ret)
580 return ret;
581 }
582
583 ret = i915_gem_object_get_pages(obj);
584 if (ret)
585 return ret;
586
587 i915_gem_object_pin_pages(obj);
588
589 return ret;
590 }
591
592 /* Per-page copy function for the shmem pread fastpath.
593 * Flushes invalid cachelines before reading the target if
594 * needs_clflush is set. */
595 static int
596 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
597 char __user *user_data,
598 bool page_do_bit17_swizzling, bool needs_clflush)
599 {
600 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
601 return -EFAULT;
602 #else
603 char *vaddr;
604 int ret;
605
606 if (unlikely(page_do_bit17_swizzling))
607 return -EINVAL;
608
609 vaddr = kmap_atomic(page);
610 if (needs_clflush)
611 drm_clflush_virt_range(vaddr + shmem_page_offset,
612 page_length);
613 ret = __copy_to_user_inatomic(user_data,
614 vaddr + shmem_page_offset,
615 page_length);
616 kunmap_atomic(vaddr);
617
618 return ret ? -EFAULT : 0;
619 #endif
620 }
621
622 static void
623 shmem_clflush_swizzled_range(char *addr, unsigned long length,
624 bool swizzled)
625 {
626 if (unlikely(swizzled)) {
627 unsigned long start = (unsigned long) addr;
628 unsigned long end = (unsigned long) addr + length;
629
630 /* For swizzling simply ensure that we always flush both
631 * channels. Lame, but simple and it works. Swizzled
632 * pwrite/pread is far from a hotpath - current userspace
633 * doesn't use it at all. */
634 start = round_down(start, 128);
635 end = round_up(end, 128);
636
637 drm_clflush_virt_range((void *)start, end - start);
638 } else {
639 drm_clflush_virt_range(addr, length);
640 }
641
642 }
643
644 /* Only difference to the fast-path function is that this can handle bit17
645 * and uses non-atomic copy and kmap functions. */
646 static int
647 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
648 char __user *user_data,
649 bool page_do_bit17_swizzling, bool needs_clflush)
650 {
651 char *vaddr;
652 int ret;
653
654 vaddr = kmap(page);
655 if (needs_clflush)
656 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
657 page_length,
658 page_do_bit17_swizzling);
659
660 if (page_do_bit17_swizzling)
661 ret = __copy_to_user_swizzled(user_data,
662 vaddr, shmem_page_offset,
663 page_length);
664 else
665 ret = __copy_to_user(user_data,
666 vaddr + shmem_page_offset,
667 page_length);
668 kunmap(page);
669
670 return ret ? - EFAULT : 0;
671 }
672
673 static int
674 i915_gem_shmem_pread(struct drm_device *dev,
675 struct drm_i915_gem_object *obj,
676 struct drm_i915_gem_pread *args,
677 struct drm_file *file)
678 {
679 char __user *user_data;
680 ssize_t remain;
681 loff_t offset;
682 int shmem_page_offset, page_length, ret = 0;
683 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
684 #ifndef __NetBSD__ /* XXX */
685 int prefaulted = 0;
686 #endif
687 int needs_clflush = 0;
688 #ifndef __NetBSD__
689 struct sg_page_iter sg_iter;
690 #endif
691
692 user_data = to_user_ptr(args->data_ptr);
693 remain = args->size;
694
695 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
696
697 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
698 if (ret)
699 return ret;
700
701 offset = args->offset;
702
703 #ifdef __NetBSD__
704 /*
705 * XXX This is a big #ifdef with a lot of duplicated code, but
706 * factoring out the loop head -- which is all that
707 * substantially differs -- is probably more trouble than it's
708 * worth at the moment.
709 */
710 while (0 < remain) {
711 /* Get the next page. */
712 shmem_page_offset = offset_in_page(offset);
713 KASSERT(shmem_page_offset < PAGE_SIZE);
714 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
715 struct page *const page = i915_gem_object_get_page(obj,
716 atop(offset));
717
718 /* Decide whether to swizzle bit 17. */
719 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
720 (page_to_phys(page) & (1 << 17)) != 0;
721
722 /* Try the fast path. */
723 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
724 user_data, page_do_bit17_swizzling, needs_clflush);
725 if (ret == 0)
726 goto next_page;
727
728 /* Fast path failed. Try the slow path. */
729 mutex_unlock(&dev->struct_mutex);
730 /* XXX prefault */
731 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
732 user_data, page_do_bit17_swizzling, needs_clflush);
733 mutex_lock(&dev->struct_mutex);
734 if (ret)
735 goto out;
736
737 next_page: KASSERT(page_length <= remain);
738 remain -= page_length;
739 user_data += page_length;
740 offset += page_length;
741 }
742 #else
743 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
744 offset >> PAGE_SHIFT) {
745 struct page *page = sg_page_iter_page(&sg_iter);
746
747 if (remain <= 0)
748 break;
749
750 /* Operation in this page
751 *
752 * shmem_page_offset = offset within page in shmem file
753 * page_length = bytes to copy for this page
754 */
755 shmem_page_offset = offset_in_page(offset);
756 page_length = remain;
757 if ((shmem_page_offset + page_length) > PAGE_SIZE)
758 page_length = PAGE_SIZE - shmem_page_offset;
759
760 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
761 (page_to_phys(page) & (1 << 17)) != 0;
762
763 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
764 user_data, page_do_bit17_swizzling,
765 needs_clflush);
766 if (ret == 0)
767 goto next_page;
768
769 mutex_unlock(&dev->struct_mutex);
770
771 if (likely(!i915.prefault_disable) && !prefaulted) {
772 ret = fault_in_multipages_writeable(user_data, remain);
773 /* Userspace is tricking us, but we've already clobbered
774 * its pages with the prefault and promised to write the
775 * data up to the first fault. Hence ignore any errors
776 * and just continue. */
777 (void)ret;
778 prefaulted = 1;
779 }
780
781 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
782 user_data, page_do_bit17_swizzling,
783 needs_clflush);
784
785 mutex_lock(&dev->struct_mutex);
786
787 if (ret)
788 goto out;
789
790 next_page:
791 remain -= page_length;
792 user_data += page_length;
793 offset += page_length;
794 }
795 #endif
796
797 out:
798 i915_gem_object_unpin_pages(obj);
799
800 return ret;
801 }
802
803 /**
804 * Reads data from the object referenced by handle.
805 *
806 * On error, the contents of *data are undefined.
807 */
808 int
809 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
810 struct drm_file *file)
811 {
812 struct drm_i915_gem_pread *args = data;
813 struct drm_gem_object *gobj;
814 struct drm_i915_gem_object *obj;
815 int ret = 0;
816
817 if (args->size == 0)
818 return 0;
819
820 if (!access_ok(VERIFY_WRITE,
821 to_user_ptr(args->data_ptr),
822 args->size))
823 return -EFAULT;
824
825 ret = i915_mutex_lock_interruptible(dev);
826 if (ret)
827 return ret;
828
829 gobj = drm_gem_object_lookup(dev, file, args->handle);
830 if (gobj == NULL) {
831 ret = -ENOENT;
832 goto unlock;
833 }
834 obj = to_intel_bo(gobj);
835
836 /* Bounds check source. */
837 if (args->offset > obj->base.size ||
838 args->size > obj->base.size - args->offset) {
839 ret = -EINVAL;
840 goto out;
841 }
842
843 /* prime objects have no backing filp to GEM pread/pwrite
844 * pages from.
845 */
846 #ifdef __NetBSD__
847 /* Also stolen objects. */
848 if (obj->base.gemo_shm_uao == NULL) {
849 ret = -EINVAL;
850 goto out;
851 }
852 #else
853 if (!obj->base.filp) {
854 ret = -EINVAL;
855 goto out;
856 }
857 #endif
858
859 trace_i915_gem_object_pread(obj, args->offset, args->size);
860
861 ret = i915_gem_shmem_pread(dev, obj, args, file);
862
863 out:
864 drm_gem_object_unreference(&obj->base);
865 unlock:
866 mutex_unlock(&dev->struct_mutex);
867 return ret;
868 }
869
870 /* This is the fast write path which cannot handle
871 * page faults in the source data
872 */
873
874 static inline int
875 fast_user_write(struct io_mapping *mapping,
876 loff_t page_base, int page_offset,
877 char __user *user_data,
878 int length)
879 {
880 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
881 return -EFAULT;
882 #else
883 void __iomem *vaddr_atomic;
884 void *vaddr;
885 unsigned long unwritten;
886
887 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
888 /* We can use the cpu mem copy function because this is X86. */
889 vaddr = (void __force*)vaddr_atomic + page_offset;
890 unwritten = __copy_from_user_inatomic_nocache(vaddr,
891 user_data, length);
892 io_mapping_unmap_atomic(vaddr_atomic);
893 return unwritten;
894 #endif
895 }
896
897 /**
898 * This is the fast pwrite path, where we copy the data directly from the
899 * user into the GTT, uncached.
900 */
901 static int
902 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
903 struct drm_i915_gem_object *obj,
904 struct drm_i915_gem_pwrite *args,
905 struct drm_file *file)
906 {
907 struct drm_i915_private *dev_priv = dev->dev_private;
908 ssize_t remain;
909 loff_t offset, page_base;
910 char __user *user_data;
911 int page_offset, page_length, ret;
912
913 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
914 if (ret)
915 goto out;
916
917 ret = i915_gem_object_set_to_gtt_domain(obj, true);
918 if (ret)
919 goto out_unpin;
920
921 ret = i915_gem_object_put_fence(obj);
922 if (ret)
923 goto out_unpin;
924
925 user_data = to_user_ptr(args->data_ptr);
926 remain = args->size;
927
928 offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
929
930 while (remain > 0) {
931 /* Operation in this page
932 *
933 * page_base = page offset within aperture
934 * page_offset = offset within page
935 * page_length = bytes to copy for this page
936 */
937 page_base = offset & PAGE_MASK;
938 page_offset = offset_in_page(offset);
939 page_length = remain;
940 if ((page_offset + remain) > PAGE_SIZE)
941 page_length = PAGE_SIZE - page_offset;
942
943 /* If we get a fault while copying data, then (presumably) our
944 * source page isn't available. Return the error and we'll
945 * retry in the slow path.
946 */
947 if (fast_user_write(dev_priv->gtt.mappable, page_base,
948 page_offset, user_data, page_length)) {
949 ret = -EFAULT;
950 goto out_unpin;
951 }
952
953 remain -= page_length;
954 user_data += page_length;
955 offset += page_length;
956 }
957
958 out_unpin:
959 i915_gem_object_ggtt_unpin(obj);
960 out:
961 return ret;
962 }
963
964 /* Per-page copy function for the shmem pwrite fastpath.
965 * Flushes invalid cachelines before writing to the target if
966 * needs_clflush_before is set and flushes out any written cachelines after
967 * writing if needs_clflush is set. */
968 static int
969 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
970 char __user *user_data,
971 bool page_do_bit17_swizzling,
972 bool needs_clflush_before,
973 bool needs_clflush_after)
974 {
975 #ifdef __NetBSD__
976 return -EFAULT;
977 #else
978 char *vaddr;
979 int ret;
980
981 if (unlikely(page_do_bit17_swizzling))
982 return -EINVAL;
983
984 vaddr = kmap_atomic(page);
985 if (needs_clflush_before)
986 drm_clflush_virt_range(vaddr + shmem_page_offset,
987 page_length);
988 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
989 user_data, page_length);
990 if (needs_clflush_after)
991 drm_clflush_virt_range(vaddr + shmem_page_offset,
992 page_length);
993 kunmap_atomic(vaddr);
994
995 return ret ? -EFAULT : 0;
996 #endif
997 }
998
999 /* Only difference to the fast-path function is that this can handle bit17
1000 * and uses non-atomic copy and kmap functions. */
1001 static int
1002 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1003 char __user *user_data,
1004 bool page_do_bit17_swizzling,
1005 bool needs_clflush_before,
1006 bool needs_clflush_after)
1007 {
1008 char *vaddr;
1009 int ret;
1010
1011 vaddr = kmap(page);
1012 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1013 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1014 page_length,
1015 page_do_bit17_swizzling);
1016 if (page_do_bit17_swizzling)
1017 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
1018 user_data,
1019 page_length);
1020 else
1021 ret = __copy_from_user(vaddr + shmem_page_offset,
1022 user_data,
1023 page_length);
1024 if (needs_clflush_after)
1025 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1026 page_length,
1027 page_do_bit17_swizzling);
1028 kunmap(page);
1029
1030 return ret ? -EFAULT : 0;
1031 }
1032
1033 static int
1034 i915_gem_shmem_pwrite(struct drm_device *dev,
1035 struct drm_i915_gem_object *obj,
1036 struct drm_i915_gem_pwrite *args,
1037 struct drm_file *file)
1038 {
1039 ssize_t remain;
1040 loff_t offset;
1041 char __user *user_data;
1042 int shmem_page_offset, page_length, ret = 0;
1043 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1044 int hit_slowpath = 0;
1045 int needs_clflush_after = 0;
1046 int needs_clflush_before = 0;
1047 #ifndef __NetBSD__
1048 struct sg_page_iter sg_iter;
1049 #endif
1050
1051 user_data = to_user_ptr(args->data_ptr);
1052 remain = args->size;
1053
1054 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1055
1056 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1057 /* If we're not in the cpu write domain, set ourself into the gtt
1058 * write domain and manually flush cachelines (if required). This
1059 * optimizes for the case when the gpu will use the data
1060 * right away and we therefore have to clflush anyway. */
1061 needs_clflush_after = cpu_write_needs_clflush(obj);
1062 ret = i915_gem_object_wait_rendering(obj, false);
1063 if (ret)
1064 return ret;
1065 }
1066 /* Same trick applies to invalidate partially written cachelines read
1067 * before writing. */
1068 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1069 needs_clflush_before =
1070 !cpu_cache_is_coherent(dev, obj->cache_level);
1071
1072 ret = i915_gem_object_get_pages(obj);
1073 if (ret)
1074 return ret;
1075
1076 i915_gem_object_pin_pages(obj);
1077
1078 offset = args->offset;
1079 obj->dirty = 1;
1080
1081 #ifdef __NetBSD__
1082 while (0 < remain) {
1083 /* Get the next page. */
1084 shmem_page_offset = offset_in_page(offset);
1085 KASSERT(shmem_page_offset < PAGE_SIZE);
1086 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
1087 struct page *const page = i915_gem_object_get_page(obj,
1088 atop(offset));
1089
1090 /* Decide whether to flush the cache or swizzle bit 17. */
1091 const bool partial_cacheline_write = needs_clflush_before &&
1092 ((shmem_page_offset | page_length)
1093 & (cpu_info_primary.ci_cflush_lsize - 1));
1094 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1095 (page_to_phys(page) & (1 << 17)) != 0;
1096
1097 /* Try the fast path. */
1098 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1099 user_data, page_do_bit17_swizzling,
1100 partial_cacheline_write, needs_clflush_after);
1101 if (ret == 0)
1102 goto next_page;
1103
1104 /* Fast path failed. Try the slow path. */
1105 hit_slowpath = 1;
1106 mutex_unlock(&dev->struct_mutex);
1107 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1108 user_data, page_do_bit17_swizzling,
1109 partial_cacheline_write, needs_clflush_after);
1110 mutex_lock(&dev->struct_mutex);
1111 if (ret)
1112 goto out;
1113
1114 next_page: KASSERT(page_length <= remain);
1115 remain -= page_length;
1116 user_data += page_length;
1117 offset += page_length;
1118 }
1119 #else
1120 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1121 offset >> PAGE_SHIFT) {
1122 struct page *page = sg_page_iter_page(&sg_iter);
1123 int partial_cacheline_write;
1124
1125 if (remain <= 0)
1126 break;
1127
1128 /* Operation in this page
1129 *
1130 * shmem_page_offset = offset within page in shmem file
1131 * page_length = bytes to copy for this page
1132 */
1133 shmem_page_offset = offset_in_page(offset);
1134
1135 page_length = remain;
1136 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1137 page_length = PAGE_SIZE - shmem_page_offset;
1138
1139 /* If we don't overwrite a cacheline completely we need to be
1140 * careful to have up-to-date data by first clflushing. Don't
1141 * overcomplicate things and flush the entire patch. */
1142 partial_cacheline_write = needs_clflush_before &&
1143 ((shmem_page_offset | page_length)
1144 & (boot_cpu_data.x86_clflush_size - 1));
1145
1146 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1147 (page_to_phys(page) & (1 << 17)) != 0;
1148
1149 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1150 user_data, page_do_bit17_swizzling,
1151 partial_cacheline_write,
1152 needs_clflush_after);
1153 if (ret == 0)
1154 goto next_page;
1155
1156 hit_slowpath = 1;
1157 mutex_unlock(&dev->struct_mutex);
1158 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1159 user_data, page_do_bit17_swizzling,
1160 partial_cacheline_write,
1161 needs_clflush_after);
1162
1163 mutex_lock(&dev->struct_mutex);
1164
1165 if (ret)
1166 goto out;
1167
1168 next_page:
1169 remain -= page_length;
1170 user_data += page_length;
1171 offset += page_length;
1172 }
1173 #endif
1174
1175 out:
1176 i915_gem_object_unpin_pages(obj);
1177
1178 if (hit_slowpath) {
1179 /*
1180 * Fixup: Flush cpu caches in case we didn't flush the dirty
1181 * cachelines in-line while writing and the object moved
1182 * out of the cpu write domain while we've dropped the lock.
1183 */
1184 if (!needs_clflush_after &&
1185 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1186 if (i915_gem_clflush_object(obj, obj->pin_display))
1187 i915_gem_chipset_flush(dev);
1188 }
1189 }
1190
1191 if (needs_clflush_after)
1192 i915_gem_chipset_flush(dev);
1193
1194 return ret;
1195 }
1196
1197 /**
1198 * Writes data to the object referenced by handle.
1199 *
1200 * On error, the contents of the buffer that were to be modified are undefined.
1201 */
1202 int
1203 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1204 struct drm_file *file)
1205 {
1206 struct drm_i915_gem_pwrite *args = data;
1207 struct drm_gem_object *gobj;
1208 struct drm_i915_gem_object *obj;
1209 int ret;
1210
1211 if (args->size == 0)
1212 return 0;
1213
1214 if (!access_ok(VERIFY_READ,
1215 to_user_ptr(args->data_ptr),
1216 args->size))
1217 return -EFAULT;
1218
1219 #ifndef __NetBSD__ /* XXX prefault */
1220 if (likely(!i915.prefault_disable)) {
1221 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1222 args->size);
1223 if (ret)
1224 return -EFAULT;
1225 }
1226 #endif
1227
1228 ret = i915_mutex_lock_interruptible(dev);
1229 if (ret)
1230 return ret;
1231
1232 gobj = drm_gem_object_lookup(dev, file, args->handle);
1233 if (gobj == NULL) {
1234 ret = -ENOENT;
1235 goto unlock;
1236 }
1237 obj = to_intel_bo(gobj);
1238
1239 /* Bounds check destination. */
1240 if (args->offset > obj->base.size ||
1241 args->size > obj->base.size - args->offset) {
1242 ret = -EINVAL;
1243 goto out;
1244 }
1245
1246 /* prime objects have no backing filp to GEM pread/pwrite
1247 * pages from.
1248 */
1249 #ifdef __NetBSD__
1250 /* Also stolen objects. */
1251 if (obj->base.gemo_shm_uao == NULL) {
1252 ret = -EINVAL;
1253 goto out;
1254 }
1255 #else
1256 if (!obj->base.filp) {
1257 ret = -EINVAL;
1258 goto out;
1259 }
1260 #endif
1261
1262 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1263
1264 ret = -EFAULT;
1265 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1266 * it would end up going through the fenced access, and we'll get
1267 * different detiling behavior between reading and writing.
1268 * pread/pwrite currently are reading and writing from the CPU
1269 * perspective, requiring manual detiling by the client.
1270 */
1271 if (obj->phys_handle) {
1272 ret = i915_gem_phys_pwrite(obj, args, file);
1273 goto out;
1274 }
1275
1276 if (obj->tiling_mode == I915_TILING_NONE &&
1277 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1278 cpu_write_needs_clflush(obj)) {
1279 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1280 /* Note that the gtt paths might fail with non-page-backed user
1281 * pointers (e.g. gtt mappings when moving data between
1282 * textures). Fallback to the shmem path in that case. */
1283 }
1284
1285 if (ret == -EFAULT || ret == -ENOSPC)
1286 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1287
1288 out:
1289 drm_gem_object_unreference(&obj->base);
1290 unlock:
1291 mutex_unlock(&dev->struct_mutex);
1292 return ret;
1293 }
1294
1295 int
1296 i915_gem_check_wedge(struct i915_gpu_error *error,
1297 bool interruptible)
1298 {
1299 if (i915_reset_in_progress(error)) {
1300 /* Non-interruptible callers can't handle -EAGAIN, hence return
1301 * -EIO unconditionally for these. */
1302 if (!interruptible)
1303 return -EIO;
1304
1305 /* Recovery complete, but the reset failed ... */
1306 if (i915_terminally_wedged(error))
1307 return -EIO;
1308
1309 return -EAGAIN;
1310 }
1311
1312 return 0;
1313 }
1314
1315 /*
1316 * Compare seqno against outstanding lazy request. Emit a request if they are
1317 * equal.
1318 */
1319 static int
1320 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1321 {
1322 int ret;
1323
1324 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1325
1326 ret = 0;
1327 if (seqno == ring->outstanding_lazy_seqno)
1328 ret = i915_add_request(ring, NULL);
1329
1330 return ret;
1331 }
1332
1333 #ifndef __NetBSD__
1334 static void fake_irq(unsigned long data)
1335 {
1336 wake_up_process((struct task_struct *)data);
1337 }
1338 #endif
1339
1340 static bool missed_irq(struct drm_i915_private *dev_priv,
1341 struct intel_ring_buffer *ring)
1342 {
1343 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1344 }
1345
1346 static bool can_wait_boost(struct drm_i915_file_private *file_priv)
1347 {
1348 if (file_priv == NULL)
1349 return true;
1350
1351 return !atomic_xchg(&file_priv->rps_wait_boost, true);
1352 }
1353
1354 /**
1355 * __wait_seqno - wait until execution of seqno has finished
1356 * @ring: the ring expected to report seqno
1357 * @seqno: duh!
1358 * @reset_counter: reset sequence associated with the given seqno
1359 * @interruptible: do an interruptible wait (normally yes)
1360 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1361 *
1362 * Note: It is of utmost importance that the passed in seqno and reset_counter
1363 * values have been read by the caller in an smp safe manner. Where read-side
1364 * locks are involved, it is sufficient to read the reset_counter before
1365 * unlocking the lock that protects the seqno. For lockless tricks, the
1366 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1367 * inserted.
1368 *
1369 * Returns 0 if the seqno was found within the alloted time. Else returns the
1370 * errno with remaining time filled in timeout argument.
1371 */
1372 #ifdef __NetBSD__
1373 static int
1374 __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, unsigned reset_counter,
1375 bool interruptible, struct timespec *timeout,
1376 struct drm_i915_file_private *file_priv)
1377 {
1378 struct drm_device *dev = ring->dev;
1379 struct drm_i915_private *dev_priv = dev->dev_private;
1380 bool irq_test_in_progress;
1381 struct timespec before, after;
1382 int ticks;
1383 bool wedged;
1384 int ret;
1385
1386 irq_test_in_progress = (dev_priv->gpu_error.test_irq_rings &
1387 intel_ring_flag(ring));
1388 __insn_barrier();
1389
1390 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1391 return 0;
1392
1393 if (timeout)
1394 ticks = mstohz(timespec_to_ns(timeout) / 1000000);
1395 else
1396 ticks = 1;
1397
1398 if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1399 gen6_rps_boost(dev_priv);
1400 if (file_priv)
1401 mod_delayed_work(dev_priv->wq,
1402 &file_priv->mm.idle_work,
1403 msecs_to_jiffies(100));
1404 }
1405
1406 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1407 return -ENODEV;
1408
1409 nanotime(&before);
1410 spin_lock(&dev_priv->irq_lock);
1411 #define EXIT_COND \
1412 (((reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) \
1413 ? wedged = true : false) || \
1414 i915_seqno_passed(ring->get_seqno(ring, false), \
1415 seqno))
1416
1417 if (timeout) {
1418 /*
1419 * XXX This missed_irq business smells like unlocked
1420 * Linux waitqueue nonsense.
1421 */
1422 if (missed_irq(dev_priv, ring))
1423 ticks = 1;
1424 if (interruptible)
1425 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &ring->irq_queue,
1426 &dev_priv->irq_lock, ticks, EXIT_COND);
1427 else
1428 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1429 &dev_priv->irq_lock, ticks, EXIT_COND);
1430 } else {
1431 if (interruptible)
1432 DRM_SPIN_WAIT_UNTIL(ret, &ring->irq_queue,
1433 &dev_priv->irq_lock, EXIT_COND);
1434 else
1435 DRM_SPIN_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1436 &dev_priv->irq_lock, EXIT_COND);
1437 }
1438 #undef EXIT_COND
1439 spin_unlock(&dev_priv->irq_lock);
1440 nanotime(&after);
1441
1442 if (!irq_test_in_progress)
1443 ring->irq_put(ring);
1444 if (timeout)
1445 timespecsub(&after, &before, timeout);
1446 return MAX(ret, 0); /* ignore remaining ticks */
1447 }
1448 #else
1449 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1450 unsigned reset_counter,
1451 bool interruptible,
1452 struct timespec *timeout,
1453 struct drm_i915_file_private *file_priv)
1454 {
1455 struct drm_device *dev = ring->dev;
1456 struct drm_i915_private *dev_priv = dev->dev_private;
1457 const bool irq_test_in_progress =
1458 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1459 struct timespec before, now;
1460 DEFINE_WAIT(wait);
1461 unsigned long timeout_expire;
1462 int ret;
1463
1464 WARN(dev_priv->pm.irqs_disabled, "IRQs disabled\n");
1465
1466 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1467 return 0;
1468
1469 timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
1470
1471 if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1472 gen6_rps_boost(dev_priv);
1473 if (file_priv)
1474 mod_delayed_work(dev_priv->wq,
1475 &file_priv->mm.idle_work,
1476 msecs_to_jiffies(100));
1477 }
1478
1479 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1480 return -ENODEV;
1481
1482 /* Record current time in case interrupted by signal, or wedged */
1483 trace_i915_gem_request_wait_begin(ring, seqno);
1484 getrawmonotonic(&before);
1485 for (;;) {
1486 struct timer_list timer;
1487
1488 prepare_to_wait(&ring->irq_queue, &wait,
1489 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
1490
1491 /* We need to check whether any gpu reset happened in between
1492 * the caller grabbing the seqno and now ... */
1493 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1494 /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1495 * is truely gone. */
1496 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1497 if (ret == 0)
1498 ret = -EAGAIN;
1499 break;
1500 }
1501
1502 if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
1503 ret = 0;
1504 break;
1505 }
1506
1507 if (interruptible && signal_pending(current)) {
1508 ret = -ERESTARTSYS;
1509 break;
1510 }
1511
1512 if (timeout && time_after_eq(jiffies, timeout_expire)) {
1513 ret = -ETIME;
1514 break;
1515 }
1516
1517 timer.function = NULL;
1518 if (timeout || missed_irq(dev_priv, ring)) {
1519 unsigned long expire;
1520
1521 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
1522 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1523 mod_timer(&timer, expire);
1524 }
1525
1526 io_schedule();
1527
1528 if (timer.function) {
1529 del_singleshot_timer_sync(&timer);
1530 destroy_timer_on_stack(&timer);
1531 }
1532 }
1533 getrawmonotonic(&now);
1534 trace_i915_gem_request_wait_end(ring, seqno);
1535
1536 if (!irq_test_in_progress)
1537 ring->irq_put(ring);
1538
1539 finish_wait(&ring->irq_queue, &wait);
1540
1541 if (timeout) {
1542 struct timespec sleep_time = timespec_sub(now, before);
1543 *timeout = timespec_sub(*timeout, sleep_time);
1544 if (!timespec_valid(timeout)) /* i.e. negative time remains */
1545 set_normalized_timespec(timeout, 0, 0);
1546 }
1547
1548 return ret;
1549 }
1550 #endif
1551
1552 /**
1553 * Waits for a sequence number to be signaled, and cleans up the
1554 * request and object lists appropriately for that event.
1555 */
1556 int
1557 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1558 {
1559 struct drm_device *dev = ring->dev;
1560 struct drm_i915_private *dev_priv = dev->dev_private;
1561 bool interruptible = dev_priv->mm.interruptible;
1562 int ret;
1563
1564 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1565 BUG_ON(seqno == 0);
1566
1567 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1568 if (ret)
1569 return ret;
1570
1571 ret = i915_gem_check_olr(ring, seqno);
1572 if (ret)
1573 return ret;
1574
1575 return __wait_seqno(ring, seqno,
1576 atomic_read(&dev_priv->gpu_error.reset_counter),
1577 interruptible, NULL, NULL);
1578 }
1579
1580 static int
1581 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
1582 struct intel_ring_buffer *ring)
1583 {
1584 i915_gem_retire_requests_ring(ring);
1585
1586 /* Manually manage the write flush as we may have not yet
1587 * retired the buffer.
1588 *
1589 * Note that the last_write_seqno is always the earlier of
1590 * the two (read/write) seqno, so if we haved successfully waited,
1591 * we know we have passed the last write.
1592 */
1593 obj->last_write_seqno = 0;
1594 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1595
1596 return 0;
1597 }
1598
1599 /**
1600 * Ensures that all rendering to the object has completed and the object is
1601 * safe to unbind from the GTT or access from the CPU.
1602 */
1603 static __must_check int
1604 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1605 bool readonly)
1606 {
1607 struct intel_ring_buffer *ring = obj->ring;
1608 u32 seqno;
1609 int ret;
1610
1611 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1612 if (seqno == 0)
1613 return 0;
1614
1615 ret = i915_wait_seqno(ring, seqno);
1616 if (ret)
1617 return ret;
1618
1619 return i915_gem_object_wait_rendering__tail(obj, ring);
1620 }
1621
1622 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1623 * as the object state may change during this call.
1624 */
1625 static __must_check int
1626 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1627 struct drm_i915_file_private *file_priv,
1628 bool readonly)
1629 {
1630 struct drm_device *dev = obj->base.dev;
1631 struct drm_i915_private *dev_priv = dev->dev_private;
1632 struct intel_ring_buffer *ring = obj->ring;
1633 unsigned reset_counter;
1634 u32 seqno;
1635 int ret;
1636
1637 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1638 BUG_ON(!dev_priv->mm.interruptible);
1639
1640 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1641 if (seqno == 0)
1642 return 0;
1643
1644 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1645 if (ret)
1646 return ret;
1647
1648 ret = i915_gem_check_olr(ring, seqno);
1649 if (ret)
1650 return ret;
1651
1652 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1653 mutex_unlock(&dev->struct_mutex);
1654 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
1655 mutex_lock(&dev->struct_mutex);
1656 if (ret)
1657 return ret;
1658
1659 return i915_gem_object_wait_rendering__tail(obj, ring);
1660 }
1661
1662 /**
1663 * Called when user space prepares to use an object with the CPU, either
1664 * through the mmap ioctl's mapping or a GTT mapping.
1665 */
1666 int
1667 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1668 struct drm_file *file)
1669 {
1670 struct drm_i915_gem_set_domain *args = data;
1671 struct drm_gem_object *gobj;
1672 struct drm_i915_gem_object *obj;
1673 uint32_t read_domains = args->read_domains;
1674 uint32_t write_domain = args->write_domain;
1675 int ret;
1676
1677 /* Only handle setting domains to types used by the CPU. */
1678 if (write_domain & I915_GEM_GPU_DOMAINS)
1679 return -EINVAL;
1680
1681 if (read_domains & I915_GEM_GPU_DOMAINS)
1682 return -EINVAL;
1683
1684 /* Having something in the write domain implies it's in the read
1685 * domain, and only that read domain. Enforce that in the request.
1686 */
1687 if (write_domain != 0 && read_domains != write_domain)
1688 return -EINVAL;
1689
1690 ret = i915_mutex_lock_interruptible(dev);
1691 if (ret)
1692 return ret;
1693
1694 gobj = drm_gem_object_lookup(dev, file, args->handle);
1695 if (gobj == NULL) {
1696 ret = -ENOENT;
1697 goto unlock;
1698 }
1699 obj = to_intel_bo(gobj);
1700
1701 /* Try to flush the object off the GPU without holding the lock.
1702 * We will repeat the flush holding the lock in the normal manner
1703 * to catch cases where we are gazumped.
1704 */
1705 ret = i915_gem_object_wait_rendering__nonblocking(obj,
1706 file->driver_priv,
1707 !write_domain);
1708 if (ret)
1709 goto unref;
1710
1711 if (read_domains & I915_GEM_DOMAIN_GTT) {
1712 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1713
1714 /* Silently promote "you're not bound, there was nothing to do"
1715 * to success, since the client was just asking us to
1716 * make sure everything was done.
1717 */
1718 if (ret == -EINVAL)
1719 ret = 0;
1720 } else {
1721 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1722 }
1723
1724 unref:
1725 drm_gem_object_unreference(&obj->base);
1726 unlock:
1727 mutex_unlock(&dev->struct_mutex);
1728 return ret;
1729 }
1730
1731 /**
1732 * Called when user space has done writes to this buffer
1733 */
1734 int
1735 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1736 struct drm_file *file)
1737 {
1738 struct drm_i915_gem_sw_finish *args = data;
1739 struct drm_gem_object *gobj;
1740 struct drm_i915_gem_object *obj;
1741 int ret = 0;
1742
1743 ret = i915_mutex_lock_interruptible(dev);
1744 if (ret)
1745 return ret;
1746
1747 gobj = drm_gem_object_lookup(dev, file, args->handle);
1748 if (gobj == NULL) {
1749 ret = -ENOENT;
1750 goto unlock;
1751 }
1752 obj = to_intel_bo(gobj);
1753
1754 /* Pinned buffers may be scanout, so flush the cache */
1755 if (obj->pin_display)
1756 i915_gem_object_flush_cpu_write_domain(obj, true);
1757
1758 drm_gem_object_unreference(&obj->base);
1759 unlock:
1760 mutex_unlock(&dev->struct_mutex);
1761 return ret;
1762 }
1763
1764 /**
1765 * Maps the contents of an object, returning the address it is mapped
1766 * into.
1767 *
1768 * While the mapping holds a reference on the contents of the object, it doesn't
1769 * imply a ref on the object itself.
1770 */
1771 int
1772 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1773 struct drm_file *file)
1774 {
1775 struct drm_i915_gem_mmap *args = data;
1776 struct drm_gem_object *obj;
1777 unsigned long addr;
1778 #ifdef __NetBSD__
1779 int ret;
1780 #endif
1781
1782 obj = drm_gem_object_lookup(dev, file, args->handle);
1783 if (obj == NULL)
1784 return -ENOENT;
1785
1786 /* prime objects have no backing filp to GEM mmap
1787 * pages from.
1788 */
1789 #ifdef __NetBSD__
1790 /* Also stolen objects (XXX can we get them here?) */
1791 if (obj->gemo_shm_uao == NULL) {
1792 drm_gem_object_unreference_unlocked(obj);
1793 return -EINVAL;
1794 }
1795 #else
1796 if (!obj->filp) {
1797 drm_gem_object_unreference_unlocked(obj);
1798 return -EINVAL;
1799 }
1800 #endif
1801
1802 #ifdef __NetBSD__
1803 addr = (*curproc->p_emul->e_vm_default_addr)(curproc,
1804 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size);
1805 /* XXX errno NetBSD->Linux */
1806 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size,
1807 obj->gemo_shm_uao, args->offset, 0,
1808 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE),
1809 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL,
1810 0));
1811 if (ret) {
1812 drm_gem_object_unreference_unlocked(obj);
1813 return ret;
1814 }
1815 uao_reference(obj->gemo_shm_uao);
1816 drm_gem_object_unreference_unlocked(obj);
1817 #else
1818 addr = vm_mmap(obj->filp, 0, args->size,
1819 PROT_READ | PROT_WRITE, MAP_SHARED,
1820 args->offset);
1821 drm_gem_object_unreference_unlocked(obj);
1822 if (IS_ERR((void *)addr))
1823 return addr;
1824 #endif
1825
1826 args->addr_ptr = (uint64_t) addr;
1827
1828 return 0;
1829 }
1830
1831 #ifdef __NetBSD__ /* XXX gem gtt fault */
1832 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t,
1833 struct vm_page **, int, int, vm_prot_t, int, paddr_t);
1834
1835 int
1836 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1837 int npages, int centeridx, vm_prot_t access_type, int flags)
1838 {
1839 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1840 struct drm_gem_object *gem_obj =
1841 container_of(uobj, struct drm_gem_object, gemo_uvmobj);
1842 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
1843 struct drm_device *dev = obj->base.dev;
1844 struct drm_i915_private *dev_priv = dev->dev_private;
1845 voff_t byte_offset;
1846 pgoff_t page_offset;
1847 int ret = 0;
1848 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0;
1849
1850 byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start));
1851 KASSERT(byte_offset <= obj->base.size);
1852 page_offset = (byte_offset >> PAGE_SHIFT);
1853
1854 intel_runtime_pm_get(dev_priv);
1855
1856 /* Thanks, uvm, but we don't need this lock. */
1857 mutex_exit(uobj->vmobjlock);
1858
1859 ret = i915_mutex_lock_interruptible(dev);
1860 if (ret)
1861 goto out;
1862
1863 trace_i915_gem_object_fault(obj, page_offset, true, write);
1864
1865 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1866 if (ret)
1867 goto unlock;
1868
1869 if ((obj->cache_level != I915_CACHE_NONE) && !HAS_LLC(dev)) {
1870 ret = -EINVAL;
1871 goto unlock;
1872 }
1873
1874 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
1875 if (ret)
1876 goto unlock;
1877
1878 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1879 if (ret)
1880 goto unpin;
1881
1882 ret = i915_gem_object_get_fence(obj);
1883 if (ret)
1884 goto unpin;
1885
1886 obj->fault_mappable = true;
1887
1888 /* XXX errno NetBSD->Linux */
1889 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type,
1890 flags,
1891 (dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj)));
1892 unpin:
1893 i915_gem_object_ggtt_unpin(obj);
1894 unlock:
1895 mutex_unlock(&dev->struct_mutex);
1896 out:
1897 mutex_enter(uobj->vmobjlock);
1898 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
1899 if (ret == -ERESTART)
1900 uvm_wait("i915flt");
1901 /* XXX Deal with GPU hangs here... */
1902 intel_runtime_pm_put(dev_priv);
1903 /* XXX errno Linux->NetBSD */
1904 return -ret;
1905 }
1906
1907 /*
1908 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c.
1909 *
1910 * XXX pmap_enter_default instead of pmap_enter because of a problem
1911 * with using weak aliases in kernel modules or something.
1912 */
1913 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned);
1914
1915 static int
1916 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1917 int npages, int centeridx, vm_prot_t access_type, int flags,
1918 paddr_t gtt_paddr)
1919 {
1920 struct vm_map_entry *entry = ufi->entry;
1921 vaddr_t curr_va;
1922 off_t curr_offset;
1923 paddr_t paddr;
1924 u_int mmapflags;
1925 int lcv, retval;
1926 vm_prot_t mapprot;
1927 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist);
1928 UVMHIST_LOG(maphist," flags=%d", flags,0,0,0);
1929
1930 /*
1931 * we do not allow device mappings to be mapped copy-on-write
1932 * so we kill any attempt to do so here.
1933 */
1934
1935 if (UVM_ET_ISCOPYONWRITE(entry)) {
1936 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)",
1937 entry->etype, 0,0,0);
1938 return(EIO);
1939 }
1940
1941 /*
1942 * now we must determine the offset in udv to use and the VA to
1943 * use for pmap_enter. note that we always use orig_map's pmap
1944 * for pmap_enter (even if we have a submap). since virtual
1945 * addresses in a submap must match the main map, this is ok.
1946 */
1947
1948 /* udv offset = (offset from start of entry) + entry's offset */
1949 curr_offset = entry->offset + (vaddr - entry->start);
1950 /* pmap va = vaddr (virtual address of pps[0]) */
1951 curr_va = vaddr;
1952
1953 /*
1954 * loop over the page range entering in as needed
1955 */
1956
1957 retval = 0;
1958 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
1959 curr_va += PAGE_SIZE) {
1960 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
1961 continue;
1962
1963 if (pps[lcv] == PGO_DONTCARE)
1964 continue;
1965
1966 paddr = (gtt_paddr + curr_offset);
1967 mmapflags = 0;
1968 mapprot = ufi->entry->protection;
1969 UVMHIST_LOG(maphist,
1970 " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d",
1971 ufi->orig_map->pmap, curr_va, paddr, mapprot);
1972 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot,
1973 PMAP_CANFAIL | mapprot | mmapflags) != 0) {
1974 /*
1975 * pmap_enter() didn't have the resource to
1976 * enter this mapping. Unlock everything,
1977 * wait for the pagedaemon to free up some
1978 * pages, and then tell uvm_fault() to start
1979 * the fault again.
1980 *
1981 * XXX Needs some rethinking for the PGO_ALLPAGES
1982 * XXX case.
1983 */
1984 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */
1985 return (ERESTART);
1986 }
1987 }
1988
1989 pmap_update(ufi->orig_map->pmap);
1990 return (retval);
1991 }
1992 #else
1993 /**
1994 * i915_gem_fault - fault a page into the GTT
1995 * vma: VMA in question
1996 * vmf: fault info
1997 *
1998 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1999 * from userspace. The fault handler takes care of binding the object to
2000 * the GTT (if needed), allocating and programming a fence register (again,
2001 * only if needed based on whether the old reg is still valid or the object
2002 * is tiled) and inserting a new PTE into the faulting process.
2003 *
2004 * Note that the faulting process may involve evicting existing objects
2005 * from the GTT and/or fence registers to make room. So performance may
2006 * suffer if the GTT working set is large or there are few fence registers
2007 * left.
2008 */
2009 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2010 {
2011 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
2012 struct drm_device *dev = obj->base.dev;
2013 struct drm_i915_private *dev_priv = dev->dev_private;
2014 pgoff_t page_offset;
2015 unsigned long pfn;
2016 int ret = 0;
2017 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
2018
2019 intel_runtime_pm_get(dev_priv);
2020
2021 /* We don't use vmf->pgoff since that has the fake offset */
2022 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
2023 PAGE_SHIFT;
2024
2025 ret = i915_mutex_lock_interruptible(dev);
2026 if (ret)
2027 goto out;
2028
2029 trace_i915_gem_object_fault(obj, page_offset, true, write);
2030
2031 /* Try to flush the object off the GPU first without holding the lock.
2032 * Upon reacquiring the lock, we will perform our sanity checks and then
2033 * repeat the flush holding the lock in the normal manner to catch cases
2034 * where we are gazumped.
2035 */
2036 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
2037 if (ret)
2038 goto unlock;
2039
2040 /* Access to snoopable pages through the GTT is incoherent. */
2041 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
2042 ret = -EINVAL;
2043 goto unlock;
2044 }
2045
2046 /* Now bind it into the GTT if needed */
2047 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
2048 if (ret)
2049 goto unlock;
2050
2051 ret = i915_gem_object_set_to_gtt_domain(obj, write);
2052 if (ret)
2053 goto unpin;
2054
2055 ret = i915_gem_object_get_fence(obj);
2056 if (ret)
2057 goto unpin;
2058
2059 obj->fault_mappable = true;
2060
2061 pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
2062 pfn >>= PAGE_SHIFT;
2063 pfn += page_offset;
2064
2065 /* Finally, remap it using the new GTT offset */
2066 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
2067 unpin:
2068 i915_gem_object_ggtt_unpin(obj);
2069 unlock:
2070 mutex_unlock(&dev->struct_mutex);
2071 out:
2072 switch (ret) {
2073 case -EIO:
2074 /* If this -EIO is due to a gpu hang, give the reset code a
2075 * chance to clean up the mess. Otherwise return the proper
2076 * SIGBUS. */
2077 if (i915_terminally_wedged(&dev_priv->gpu_error)) {
2078 ret = VM_FAULT_SIGBUS;
2079 break;
2080 }
2081 case -EAGAIN:
2082 /*
2083 * EAGAIN means the gpu is hung and we'll wait for the error
2084 * handler to reset everything when re-faulting in
2085 * i915_mutex_lock_interruptible.
2086 */
2087 case 0:
2088 case -ERESTARTSYS:
2089 case -EINTR:
2090 case -EBUSY:
2091 /*
2092 * EBUSY is ok: this just means that another thread
2093 * already did the job.
2094 */
2095 ret = VM_FAULT_NOPAGE;
2096 break;
2097 case -ENOMEM:
2098 ret = VM_FAULT_OOM;
2099 break;
2100 case -ENOSPC:
2101 case -EFAULT:
2102 ret = VM_FAULT_SIGBUS;
2103 break;
2104 default:
2105 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2106 ret = VM_FAULT_SIGBUS;
2107 break;
2108 }
2109
2110 intel_runtime_pm_put(dev_priv);
2111 return ret;
2112 }
2113
2114 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2115 {
2116 struct i915_vma *vma;
2117
2118 /*
2119 * Only the global gtt is relevant for gtt memory mappings, so restrict
2120 * list traversal to objects bound into the global address space. Note
2121 * that the active list should be empty, but better safe than sorry.
2122 */
2123 WARN_ON(!list_empty(&dev_priv->gtt.base.active_list));
2124 list_for_each_entry(vma, &dev_priv->gtt.base.active_list, mm_list)
2125 i915_gem_release_mmap(vma->obj);
2126 list_for_each_entry(vma, &dev_priv->gtt.base.inactive_list, mm_list)
2127 i915_gem_release_mmap(vma->obj);
2128 }
2129 #endif
2130
2131 /**
2132 * i915_gem_release_mmap - remove physical page mappings
2133 * @obj: obj in question
2134 *
2135 * Preserve the reservation of the mmapping with the DRM core code, but
2136 * relinquish ownership of the pages back to the system.
2137 *
2138 * It is vital that we remove the page mapping if we have mapped a tiled
2139 * object through the GTT and then lose the fence register due to
2140 * resource pressure. Similarly if the object has been moved out of the
2141 * aperture, than pages mapped into userspace must be revoked. Removing the
2142 * mapping will then trigger a page fault on the next user access, allowing
2143 * fixup by i915_gem_fault().
2144 */
2145 void
2146 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2147 {
2148 if (!obj->fault_mappable)
2149 return;
2150
2151 #ifdef __NetBSD__ /* XXX gem gtt fault */
2152 {
2153 struct vm_page *page;
2154
2155 mutex_enter(obj->base.gemo_shm_uao->vmobjlock);
2156 KASSERT(obj->pages != NULL);
2157 /* Force a fresh fault for each page. */
2158 /*
2159 * XXX OOPS! This doesn't actually do what we want.
2160 * This causes a fresh fault for access to the backing
2161 * pages -- but nothing accesses the backing pages
2162 * directly! What is actually entered into CPU page
2163 * table entries is aperture addresses which have been
2164 * programmed by the GTT to refer to those backing
2165 * pages.
2166 *
2167 * We need to clear those page table entries, but
2168 * there's no good way to do that at the moment: nobody
2169 * records for us a map from either uvm objects or
2170 * physical device addresses to a list of all virtual
2171 * pages where they have been mapped. pmap(9) records
2172 * a map only from physical RAM addresses to virtual
2173 * pages; it does nothing for physical device
2174 * addresses.
2175 */
2176 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue)
2177 pmap_page_protect(page, VM_PROT_NONE);
2178 mutex_exit(obj->base.gemo_shm_uao->vmobjlock);
2179 }
2180 #else
2181 drm_vma_node_unmap(&obj->base.vma_node,
2182 obj->base.dev->anon_inode->i_mapping);
2183 #endif
2184 obj->fault_mappable = false;
2185 }
2186
2187 uint32_t
2188 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2189 {
2190 uint32_t gtt_size;
2191
2192 if (INTEL_INFO(dev)->gen >= 4 ||
2193 tiling_mode == I915_TILING_NONE)
2194 return size;
2195
2196 /* Previous chips need a power-of-two fence region when tiling */
2197 if (INTEL_INFO(dev)->gen == 3)
2198 gtt_size = 1024*1024;
2199 else
2200 gtt_size = 512*1024;
2201
2202 while (gtt_size < size)
2203 gtt_size <<= 1;
2204
2205 return gtt_size;
2206 }
2207
2208 /**
2209 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2210 * @obj: object to check
2211 *
2212 * Return the required GTT alignment for an object, taking into account
2213 * potential fence register mapping.
2214 */
2215 uint32_t
2216 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2217 int tiling_mode, bool fenced)
2218 {
2219 /*
2220 * Minimum alignment is 4k (GTT page size), but might be greater
2221 * if a fence register is needed for the object.
2222 */
2223 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2224 tiling_mode == I915_TILING_NONE)
2225 return 4096;
2226
2227 /*
2228 * Previous chips need to be aligned to the size of the smallest
2229 * fence register that can contain the object.
2230 */
2231 return i915_gem_get_gtt_size(dev, size, tiling_mode);
2232 }
2233
2234 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2235 {
2236 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2237 int ret;
2238
2239 if (drm_vma_node_has_offset(&obj->base.vma_node))
2240 return 0;
2241
2242 dev_priv->mm.shrinker_no_lock_stealing = true;
2243
2244 ret = drm_gem_create_mmap_offset(&obj->base);
2245 if (ret != -ENOSPC)
2246 goto out;
2247
2248 /* Badly fragmented mmap space? The only way we can recover
2249 * space is by destroying unwanted objects. We can't randomly release
2250 * mmap_offsets as userspace expects them to be persistent for the
2251 * lifetime of the objects. The closest we can is to release the
2252 * offsets on purgeable objects by truncating it and marking it purged,
2253 * which prevents userspace from ever using that object again.
2254 */
2255 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
2256 ret = drm_gem_create_mmap_offset(&obj->base);
2257 if (ret != -ENOSPC)
2258 goto out;
2259
2260 i915_gem_shrink_all(dev_priv);
2261 ret = drm_gem_create_mmap_offset(&obj->base);
2262 out:
2263 dev_priv->mm.shrinker_no_lock_stealing = false;
2264
2265 return ret;
2266 }
2267
2268 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2269 {
2270 drm_gem_free_mmap_offset(&obj->base);
2271 }
2272
2273 int
2274 i915_gem_mmap_gtt(struct drm_file *file,
2275 struct drm_device *dev,
2276 uint32_t handle,
2277 uint64_t *offset)
2278 {
2279 struct drm_i915_private *dev_priv = dev->dev_private;
2280 struct drm_gem_object *gobj;
2281 struct drm_i915_gem_object *obj;
2282 int ret;
2283
2284 ret = i915_mutex_lock_interruptible(dev);
2285 if (ret)
2286 return ret;
2287
2288 gobj = drm_gem_object_lookup(dev, file, handle);
2289 if (gobj == NULL) {
2290 ret = -ENOENT;
2291 goto unlock;
2292 }
2293 obj = to_intel_bo(gobj);
2294
2295 if (obj->base.size > dev_priv->gtt.mappable_end) {
2296 ret = -E2BIG;
2297 goto out;
2298 }
2299
2300 if (obj->madv != I915_MADV_WILLNEED) {
2301 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2302 ret = -EFAULT;
2303 goto out;
2304 }
2305
2306 ret = i915_gem_object_create_mmap_offset(obj);
2307 if (ret)
2308 goto out;
2309
2310 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2311
2312 out:
2313 drm_gem_object_unreference(&obj->base);
2314 unlock:
2315 mutex_unlock(&dev->struct_mutex);
2316 return ret;
2317 }
2318
2319 /**
2320 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2321 * @dev: DRM device
2322 * @data: GTT mapping ioctl data
2323 * @file: GEM object info
2324 *
2325 * Simply returns the fake offset to userspace so it can mmap it.
2326 * The mmap call will end up in drm_gem_mmap(), which will set things
2327 * up so we can get faults in the handler above.
2328 *
2329 * The fault handler will take care of binding the object into the GTT
2330 * (since it may have been evicted to make room for something), allocating
2331 * a fence register, and mapping the appropriate aperture address into
2332 * userspace.
2333 */
2334 int
2335 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2336 struct drm_file *file)
2337 {
2338 struct drm_i915_gem_mmap_gtt *args = data;
2339
2340 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2341 }
2342
2343 /* Immediately discard the backing storage */
2344 static void
2345 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2346 {
2347 #ifndef __NetBSD__
2348 struct inode *inode;
2349 #endif
2350
2351 i915_gem_object_free_mmap_offset(obj);
2352
2353 #ifdef __NetBSD__
2354 if (obj->base.gemo_shm_uao == NULL)
2355 return;
2356
2357 {
2358 struct uvm_object *const uobj = obj->base.gemo_shm_uao;
2359
2360 if (uobj != NULL) {
2361 /* XXX Calling pgo_put like this is bogus. */
2362 mutex_enter(uobj->vmobjlock);
2363 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size,
2364 (PGO_ALLPAGES | PGO_FREE));
2365 }
2366 }
2367 #else
2368 if (obj->base.filp == NULL)
2369 return;
2370
2371 /* Our goal here is to return as much of the memory as
2372 * is possible back to the system as we are called from OOM.
2373 * To do this we must instruct the shmfs to drop all of its
2374 * backing pages, *now*.
2375 */
2376 inode = file_inode(obj->base.filp);
2377 shmem_truncate_range(inode, 0, (loff_t)-1);
2378 #endif
2379
2380 obj->madv = __I915_MADV_PURGED;
2381 }
2382
2383 static inline int
2384 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
2385 {
2386 return obj->madv == I915_MADV_DONTNEED;
2387 }
2388
2389 #ifdef __NetBSD__
2390 static void
2391 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2392 {
2393 struct drm_device *const dev = obj->base.dev;
2394 int ret;
2395
2396 /* XXX Cargo-culted from the Linux code. */
2397 BUG_ON(obj->madv == __I915_MADV_PURGED);
2398
2399 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2400 if (ret) {
2401 WARN_ON(ret != -EIO);
2402 i915_gem_clflush_object(obj, true);
2403 obj->base.read_domains = obj->base.write_domain =
2404 I915_GEM_DOMAIN_CPU;
2405 }
2406
2407 if (i915_gem_object_needs_bit17_swizzle(obj))
2408 i915_gem_object_save_bit_17_swizzle(obj);
2409
2410 /* XXX Maintain dirty flag? */
2411
2412 bus_dmamap_destroy(dev->dmat, obj->igo_dmamap);
2413 bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2414 obj->base.size, obj->pages, obj->igo_nsegs);
2415
2416 kfree(obj->pages);
2417 }
2418 #else
2419 static void
2420 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2421 {
2422 struct sg_page_iter sg_iter;
2423 int ret;
2424
2425 BUG_ON(obj->madv == __I915_MADV_PURGED);
2426
2427 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2428 if (ret) {
2429 /* In the event of a disaster, abandon all caches and
2430 * hope for the best.
2431 */
2432 WARN_ON(ret != -EIO);
2433 i915_gem_clflush_object(obj, true);
2434 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2435 }
2436
2437 if (i915_gem_object_needs_bit17_swizzle(obj))
2438 i915_gem_object_save_bit_17_swizzle(obj);
2439
2440 if (obj->madv == I915_MADV_DONTNEED)
2441 obj->dirty = 0;
2442
2443 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2444 struct page *page = sg_page_iter_page(&sg_iter);
2445
2446 if (obj->dirty)
2447 set_page_dirty(page);
2448
2449 if (obj->madv == I915_MADV_WILLNEED)
2450 mark_page_accessed(page);
2451
2452 page_cache_release(page);
2453 }
2454 obj->dirty = 0;
2455
2456 sg_free_table(obj->pages);
2457 kfree(obj->pages);
2458 }
2459 #endif
2460
2461 int
2462 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2463 {
2464 const struct drm_i915_gem_object_ops *ops = obj->ops;
2465
2466 if (obj->pages == NULL)
2467 return 0;
2468
2469 if (obj->pages_pin_count)
2470 return -EBUSY;
2471
2472 BUG_ON(i915_gem_obj_bound_any(obj));
2473
2474 /* ->put_pages might need to allocate memory for the bit17 swizzle
2475 * array, hence protect them from being reaped by removing them from gtt
2476 * lists early. */
2477 list_del(&obj->global_list);
2478
2479 ops->put_pages(obj);
2480 obj->pages = NULL;
2481
2482 if (i915_gem_object_is_purgeable(obj))
2483 i915_gem_object_truncate(obj);
2484
2485 return 0;
2486 }
2487
2488 static unsigned long
2489 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
2490 bool purgeable_only)
2491 {
2492 struct list_head still_bound_list;
2493 struct drm_i915_gem_object *obj, *next;
2494 unsigned long count = 0;
2495
2496 list_for_each_entry_safe(obj, next,
2497 &dev_priv->mm.unbound_list,
2498 global_list) {
2499 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2500 i915_gem_object_put_pages(obj) == 0) {
2501 count += obj->base.size >> PAGE_SHIFT;
2502 if (count >= target)
2503 return count;
2504 }
2505 }
2506
2507 /*
2508 * As we may completely rewrite the bound list whilst unbinding
2509 * (due to retiring requests) we have to strictly process only
2510 * one element of the list at the time, and recheck the list
2511 * on every iteration.
2512 */
2513 INIT_LIST_HEAD(&still_bound_list);
2514 while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
2515 struct i915_vma *vma, *v;
2516
2517 obj = list_first_entry(&dev_priv->mm.bound_list,
2518 typeof(*obj), global_list);
2519 list_move_tail(&obj->global_list, &still_bound_list);
2520
2521 if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
2522 continue;
2523
2524 /*
2525 * Hold a reference whilst we unbind this object, as we may
2526 * end up waiting for and retiring requests. This might
2527 * release the final reference (held by the active list)
2528 * and result in the object being freed from under us.
2529 * in this object being freed.
2530 *
2531 * Note 1: Shrinking the bound list is special since only active
2532 * (and hence bound objects) can contain such limbo objects, so
2533 * we don't need special tricks for shrinking the unbound list.
2534 * The only other place where we have to be careful with active
2535 * objects suddenly disappearing due to retiring requests is the
2536 * eviction code.
2537 *
2538 * Note 2: Even though the bound list doesn't hold a reference
2539 * to the object we can safely grab one here: The final object
2540 * unreferencing and the bound_list are both protected by the
2541 * dev->struct_mutex and so we won't ever be able to observe an
2542 * object on the bound_list with a reference count equals 0.
2543 */
2544 drm_gem_object_reference(&obj->base);
2545
2546 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
2547 if (i915_vma_unbind(vma))
2548 break;
2549
2550 if (i915_gem_object_put_pages(obj) == 0)
2551 count += obj->base.size >> PAGE_SHIFT;
2552
2553 drm_gem_object_unreference(&obj->base);
2554 }
2555 list_splice(&still_bound_list, &dev_priv->mm.bound_list);
2556
2557 return count;
2558 }
2559
2560 static unsigned long
2561 i915_gem_purge(struct drm_i915_private *dev_priv, long target)
2562 {
2563 return __i915_gem_shrink(dev_priv, target, true);
2564 }
2565
2566 static unsigned long
2567 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
2568 {
2569 struct drm_i915_gem_object *obj, *next;
2570 long freed = 0;
2571
2572 i915_gem_evict_everything(dev_priv->dev);
2573
2574 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
2575 global_list) {
2576 if (i915_gem_object_put_pages(obj) == 0)
2577 freed += obj->base.size >> PAGE_SHIFT;
2578 }
2579 return freed;
2580 }
2581
2582 #ifdef __NetBSD__
2583 static int
2584 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2585 {
2586 struct drm_device *const dev = obj->base.dev;
2587 struct vm_page *page;
2588 int error;
2589
2590 /* XXX Cargo-culted from the Linux code. */
2591 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2592 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2593
2594 KASSERT(obj->pages == NULL);
2595 TAILQ_INIT(&obj->igo_pageq);
2596 obj->pages = kcalloc((obj->base.size / PAGE_SIZE),
2597 sizeof(obj->pages[0]), GFP_KERNEL);
2598 if (obj->pages == NULL) {
2599 error = -ENOMEM;
2600 goto fail0;
2601 }
2602
2603 /* XXX errno NetBSD->Linux */
2604 error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao,
2605 0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages,
2606 (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT);
2607 if (error)
2608 /* XXX Try i915_gem_purge, i915_gem_shrink_all. */
2609 goto fail1;
2610 KASSERT(0 < obj->igo_nsegs);
2611 KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE));
2612
2613 /*
2614 * Check that the paddrs will fit in 40 bits, or 32 bits on i965.
2615 *
2616 * XXX This is wrong; we ought to pass this constraint to
2617 * bus_dmamem_wire_uvm_object instead.
2618 */
2619 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) {
2620 const uint64_t mask =
2621 (IS_BROADWATER(dev) || IS_CRESTLINE(dev)?
2622 0xffffffffULL : 0xffffffffffULL);
2623 if (VM_PAGE_TO_PHYS(page) & ~mask) {
2624 DRM_ERROR("GEM physical address exceeds %u bits"
2625 ": %"PRIxMAX"\n",
2626 popcount64(mask),
2627 (uintmax_t)VM_PAGE_TO_PHYS(page));
2628 error = -EIO;
2629 goto fail2;
2630 }
2631 }
2632
2633 /* XXX Should create the DMA map when creating the object. */
2634
2635 /* XXX errno NetBSD->Linux */
2636 error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs,
2637 PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap);
2638 if (error)
2639 goto fail2;
2640
2641 /* XXX Cargo-culted from the Linux code. */
2642 if (i915_gem_object_needs_bit17_swizzle(obj))
2643 i915_gem_object_do_bit_17_swizzle(obj);
2644
2645 /* Success! */
2646 return 0;
2647
2648 fail2: bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2649 obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE));
2650 fail1: kfree(obj->pages);
2651 obj->pages = NULL;
2652 fail0: KASSERT(error);
2653 return error;
2654 }
2655 #else
2656 static int
2657 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2658 {
2659 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2660 int page_count, i;
2661 struct address_space *mapping;
2662 struct sg_table *st;
2663 struct scatterlist *sg;
2664 struct sg_page_iter sg_iter;
2665 struct page *page;
2666 unsigned long last_pfn = 0; /* suppress gcc warning */
2667 gfp_t gfp;
2668
2669 /* Assert that the object is not currently in any GPU domain. As it
2670 * wasn't in the GTT, there shouldn't be any way it could have been in
2671 * a GPU cache
2672 */
2673 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2674 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2675
2676 st = kmalloc(sizeof(*st), GFP_KERNEL);
2677 if (st == NULL)
2678 return -ENOMEM;
2679
2680 page_count = obj->base.size / PAGE_SIZE;
2681 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2682 kfree(st);
2683 return -ENOMEM;
2684 }
2685
2686 /* Get the list of pages out of our struct file. They'll be pinned
2687 * at this point until we release them.
2688 *
2689 * Fail silently without starting the shrinker
2690 */
2691 mapping = file_inode(obj->base.filp)->i_mapping;
2692 gfp = mapping_gfp_mask(mapping);
2693 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2694 gfp &= ~(__GFP_IO | __GFP_WAIT);
2695 sg = st->sgl;
2696 st->nents = 0;
2697 for (i = 0; i < page_count; i++) {
2698 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2699 if (IS_ERR(page)) {
2700 i915_gem_purge(dev_priv, page_count);
2701 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2702 }
2703 if (IS_ERR(page)) {
2704 /* We've tried hard to allocate the memory by reaping
2705 * our own buffer, now let the real VM do its job and
2706 * go down in flames if truly OOM.
2707 */
2708 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
2709 gfp |= __GFP_IO | __GFP_WAIT;
2710
2711 i915_gem_shrink_all(dev_priv);
2712 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2713 if (IS_ERR(page))
2714 goto err_pages;
2715
2716 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2717 gfp &= ~(__GFP_IO | __GFP_WAIT);
2718 }
2719 #ifdef CONFIG_SWIOTLB
2720 if (swiotlb_nr_tbl()) {
2721 st->nents++;
2722 sg_set_page(sg, page, PAGE_SIZE, 0);
2723 sg = sg_next(sg);
2724 continue;
2725 }
2726 #endif
2727 if (!i || page_to_pfn(page) != last_pfn + 1) {
2728 if (i)
2729 sg = sg_next(sg);
2730 st->nents++;
2731 sg_set_page(sg, page, PAGE_SIZE, 0);
2732 } else {
2733 sg->length += PAGE_SIZE;
2734 }
2735 last_pfn = page_to_pfn(page);
2736
2737 /* Check that the i965g/gm workaround works. */
2738 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2739 }
2740 #ifdef CONFIG_SWIOTLB
2741 if (!swiotlb_nr_tbl())
2742 #endif
2743 sg_mark_end(sg);
2744 obj->pages = st;
2745
2746 if (i915_gem_object_needs_bit17_swizzle(obj))
2747 i915_gem_object_do_bit_17_swizzle(obj);
2748
2749 return 0;
2750
2751 err_pages:
2752 sg_mark_end(sg);
2753 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
2754 page_cache_release(sg_page_iter_page(&sg_iter));
2755 sg_free_table(st);
2756 kfree(st);
2757 return PTR_ERR(page);
2758 }
2759 #endif
2760
2761 /* Ensure that the associated pages are gathered from the backing storage
2762 * and pinned into our object. i915_gem_object_get_pages() may be called
2763 * multiple times before they are released by a single call to
2764 * i915_gem_object_put_pages() - once the pages are no longer referenced
2765 * either as a result of memory pressure (reaping pages under the shrinker)
2766 * or as the object is itself released.
2767 */
2768 int
2769 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2770 {
2771 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2772 const struct drm_i915_gem_object_ops *ops = obj->ops;
2773 int ret;
2774
2775 if (obj->pages)
2776 return 0;
2777
2778 if (obj->madv != I915_MADV_WILLNEED) {
2779 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2780 return -EFAULT;
2781 }
2782
2783 BUG_ON(obj->pages_pin_count);
2784
2785 ret = ops->get_pages(obj);
2786 if (ret)
2787 return ret;
2788
2789 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2790 return 0;
2791 }
2792
2793 static void
2794 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
2795 struct intel_ring_buffer *ring)
2796 {
2797 struct drm_device *dev = obj->base.dev;
2798 struct drm_i915_private *dev_priv = dev->dev_private;
2799 u32 seqno = intel_ring_get_seqno(ring);
2800
2801 BUG_ON(ring == NULL);
2802 if (obj->ring != ring && obj->last_write_seqno) {
2803 /* Keep the seqno relative to the current ring */
2804 obj->last_write_seqno = seqno;
2805 }
2806 obj->ring = ring;
2807
2808 /* Add a reference if we're newly entering the active list. */
2809 if (!obj->active) {
2810 drm_gem_object_reference(&obj->base);
2811 obj->active = 1;
2812 }
2813
2814 list_move_tail(&obj->ring_list, &ring->active_list);
2815
2816 obj->last_read_seqno = seqno;
2817
2818 if (obj->fenced_gpu_access) {
2819 obj->last_fenced_seqno = seqno;
2820
2821 /* Bump MRU to take account of the delayed flush */
2822 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2823 struct drm_i915_fence_reg *reg;
2824
2825 reg = &dev_priv->fence_regs[obj->fence_reg];
2826 list_move_tail(®->lru_list,
2827 &dev_priv->mm.fence_list);
2828 }
2829 }
2830 }
2831
2832 void i915_vma_move_to_active(struct i915_vma *vma,
2833 struct intel_ring_buffer *ring)
2834 {
2835 list_move_tail(&vma->mm_list, &vma->vm->active_list);
2836 return i915_gem_object_move_to_active(vma->obj, ring);
2837 }
2838
2839 static void
2840 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
2841 {
2842 struct drm_device *dev = obj->base.dev;
2843 struct drm_i915_private *dev_priv = dev->dev_private;
2844 struct i915_address_space *vm;
2845 struct i915_vma *vma;
2846
2847 if ((obj->base.write_domain & I915_GEM_DOMAIN_GTT) != 0) {
2848 #if 0
2849 printk(KERN_ERR "%s: %p 0x%x flushing gtt\n", __func__, obj,
2850 obj->base.write_domain);
2851 #endif
2852 i915_gem_object_flush_gtt_write_domain(obj);
2853 }
2854 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
2855 BUG_ON(!obj->active);
2856
2857 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
2858 vma = i915_gem_obj_to_vma(obj, vm);
2859 if (vma && !list_empty(&vma->mm_list))
2860 list_move_tail(&vma->mm_list, &vm->inactive_list);
2861 }
2862
2863 list_del_init(&obj->ring_list);
2864 obj->ring = NULL;
2865
2866 obj->last_read_seqno = 0;
2867 obj->last_write_seqno = 0;
2868 obj->base.write_domain = 0;
2869
2870 obj->last_fenced_seqno = 0;
2871 obj->fenced_gpu_access = false;
2872
2873 obj->active = 0;
2874 drm_gem_object_unreference(&obj->base);
2875
2876 WARN_ON(i915_verify_lists(dev));
2877 }
2878
2879 static int
2880 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2881 {
2882 struct drm_i915_private *dev_priv = dev->dev_private;
2883 struct intel_ring_buffer *ring;
2884 int ret, i, j;
2885
2886 /* Carefully retire all requests without writing to the rings */
2887 for_each_ring(ring, dev_priv, i) {
2888 ret = intel_ring_idle(ring);
2889 if (ret)
2890 return ret;
2891 }
2892 i915_gem_retire_requests(dev);
2893
2894 /* Finally reset hw state */
2895 for_each_ring(ring, dev_priv, i) {
2896 intel_ring_init_seqno(ring, seqno);
2897
2898 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
2899 ring->sync_seqno[j] = 0;
2900 }
2901
2902 return 0;
2903 }
2904
2905 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2906 {
2907 struct drm_i915_private *dev_priv = dev->dev_private;
2908 int ret;
2909
2910 if (seqno == 0)
2911 return -EINVAL;
2912
2913 /* HWS page needs to be set less than what we
2914 * will inject to ring
2915 */
2916 ret = i915_gem_init_seqno(dev, seqno - 1);
2917 if (ret)
2918 return ret;
2919
2920 /* Carefully set the last_seqno value so that wrap
2921 * detection still works
2922 */
2923 dev_priv->next_seqno = seqno;
2924 dev_priv->last_seqno = seqno - 1;
2925 if (dev_priv->last_seqno == 0)
2926 dev_priv->last_seqno--;
2927
2928 return 0;
2929 }
2930
2931 int
2932 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2933 {
2934 struct drm_i915_private *dev_priv = dev->dev_private;
2935
2936 /* reserve 0 for non-seqno */
2937 if (dev_priv->next_seqno == 0) {
2938 int ret = i915_gem_init_seqno(dev, 0);
2939 if (ret)
2940 return ret;
2941
2942 dev_priv->next_seqno = 1;
2943 }
2944
2945 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2946 return 0;
2947 }
2948
2949 int __i915_add_request(struct intel_ring_buffer *ring,
2950 struct drm_file *file,
2951 struct drm_i915_gem_object *obj,
2952 u32 *out_seqno)
2953 {
2954 struct drm_i915_private *dev_priv = ring->dev->dev_private;
2955 struct drm_i915_gem_request *request;
2956 u32 request_ring_position, request_start;
2957 int ret;
2958
2959 request_start = intel_ring_get_tail(ring);
2960 /*
2961 * Emit any outstanding flushes - execbuf can fail to emit the flush
2962 * after having emitted the batchbuffer command. Hence we need to fix
2963 * things up similar to emitting the lazy request. The difference here
2964 * is that the flush _must_ happen before the next request, no matter
2965 * what.
2966 */
2967 ret = intel_ring_flush_all_caches(ring);
2968 if (ret)
2969 return ret;
2970
2971 request = ring->preallocated_lazy_request;
2972 if (WARN_ON(request == NULL))
2973 return -ENOMEM;
2974
2975 /* Record the position of the start of the request so that
2976 * should we detect the updated seqno part-way through the
2977 * GPU processing the request, we never over-estimate the
2978 * position of the head.
2979 */
2980 request_ring_position = intel_ring_get_tail(ring);
2981
2982 ret = ring->add_request(ring);
2983 if (ret)
2984 return ret;
2985
2986 request->seqno = intel_ring_get_seqno(ring);
2987 request->ring = ring;
2988 request->head = request_start;
2989 request->tail = request_ring_position;
2990
2991 /* Whilst this request exists, batch_obj will be on the
2992 * active_list, and so will hold the active reference. Only when this
2993 * request is retired will the the batch_obj be moved onto the
2994 * inactive_list and lose its active reference. Hence we do not need
2995 * to explicitly hold another reference here.
2996 */
2997 request->batch_obj = obj;
2998
2999 /* Hold a reference to the current context so that we can inspect
3000 * it later in case a hangcheck error event fires.
3001 */
3002 request->ctx = ring->last_context;
3003 if (request->ctx)
3004 i915_gem_context_reference(request->ctx);
3005
3006 request->emitted_jiffies = jiffies;
3007 list_add_tail(&request->list, &ring->request_list);
3008 request->file_priv = NULL;
3009
3010 if (file) {
3011 struct drm_i915_file_private *file_priv = file->driver_priv;
3012
3013 spin_lock(&file_priv->mm.lock);
3014 request->file_priv = file_priv;
3015 list_add_tail(&request->client_list,
3016 &file_priv->mm.request_list);
3017 spin_unlock(&file_priv->mm.lock);
3018 }
3019
3020 trace_i915_gem_request_add(ring, request->seqno);
3021 ring->outstanding_lazy_seqno = 0;
3022 ring->preallocated_lazy_request = NULL;
3023
3024 if (!dev_priv->ums.mm_suspended) {
3025 i915_queue_hangcheck(ring->dev);
3026
3027 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
3028 queue_delayed_work(dev_priv->wq,
3029 &dev_priv->mm.retire_work,
3030 round_jiffies_up_relative(HZ));
3031 intel_mark_busy(dev_priv->dev);
3032 }
3033
3034 if (out_seqno)
3035 *out_seqno = request->seqno;
3036 return 0;
3037 }
3038
3039 static inline void
3040 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
3041 {
3042 struct drm_i915_file_private *file_priv = request->file_priv;
3043
3044 if (!file_priv)
3045 return;
3046
3047 spin_lock(&file_priv->mm.lock);
3048 list_del(&request->client_list);
3049 request->file_priv = NULL;
3050 spin_unlock(&file_priv->mm.lock);
3051 }
3052
3053 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
3054 const struct i915_hw_context *ctx)
3055 {
3056 unsigned long elapsed;
3057
3058 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
3059
3060 if (ctx->hang_stats.banned)
3061 return true;
3062
3063 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
3064 if (!i915_gem_context_is_default(ctx)) {
3065 DRM_DEBUG("context hanging too fast, banning!\n");
3066 return true;
3067 } else if (dev_priv->gpu_error.stop_rings == 0) {
3068 DRM_ERROR("gpu hanging too fast, banning!\n");
3069 return true;
3070 }
3071 }
3072
3073 return false;
3074 }
3075
3076 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
3077 struct i915_hw_context *ctx,
3078 const bool guilty)
3079 {
3080 struct i915_ctx_hang_stats *hs;
3081
3082 if (WARN_ON(!ctx))
3083 return;
3084
3085 hs = &ctx->hang_stats;
3086
3087 if (guilty) {
3088 hs->banned = i915_context_is_banned(dev_priv, ctx);
3089 hs->batch_active++;
3090 hs->guilty_ts = get_seconds();
3091 } else {
3092 hs->batch_pending++;
3093 }
3094 }
3095
3096 static void i915_gem_free_request(struct drm_i915_gem_request *request)
3097 {
3098 list_del(&request->list);
3099 i915_gem_request_remove_from_client(request);
3100
3101 if (request->ctx)
3102 i915_gem_context_unreference(request->ctx);
3103
3104 kfree(request);
3105 }
3106
3107 struct drm_i915_gem_request *
3108 i915_gem_find_active_request(struct intel_ring_buffer *ring)
3109 {
3110 struct drm_i915_gem_request *request;
3111 u32 completed_seqno;
3112
3113 completed_seqno = ring->get_seqno(ring, false);
3114
3115 list_for_each_entry(request, &ring->request_list, list) {
3116 if (i915_seqno_passed(completed_seqno, request->seqno))
3117 continue;
3118
3119 return request;
3120 }
3121
3122 return NULL;
3123 }
3124
3125 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
3126 struct intel_ring_buffer *ring)
3127 {
3128 struct drm_i915_gem_request *request;
3129 bool ring_hung;
3130
3131 request = i915_gem_find_active_request(ring);
3132
3133 if (request == NULL)
3134 return;
3135
3136 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
3137
3138 i915_set_reset_status(dev_priv, request->ctx, ring_hung);
3139
3140 list_for_each_entry_continue(request, &ring->request_list, list)
3141 i915_set_reset_status(dev_priv, request->ctx, false);
3142 }
3143
3144 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
3145 struct intel_ring_buffer *ring)
3146 {
3147 while (!list_empty(&ring->active_list)) {
3148 struct drm_i915_gem_object *obj;
3149
3150 obj = list_first_entry(&ring->active_list,
3151 struct drm_i915_gem_object,
3152 ring_list);
3153
3154 i915_gem_object_move_to_inactive(obj);
3155 }
3156
3157 /*
3158 * We must free the requests after all the corresponding objects have
3159 * been moved off active lists. Which is the same order as the normal
3160 * retire_requests function does. This is important if object hold
3161 * implicit references on things like e.g. ppgtt address spaces through
3162 * the request.
3163 */
3164 while (!list_empty(&ring->request_list)) {
3165 struct drm_i915_gem_request *request;
3166
3167 request = list_first_entry(&ring->request_list,
3168 struct drm_i915_gem_request,
3169 list);
3170
3171 i915_gem_free_request(request);
3172 }
3173 }
3174
3175 void i915_gem_restore_fences(struct drm_device *dev)
3176 {
3177 struct drm_i915_private *dev_priv = dev->dev_private;
3178 int i;
3179
3180 for (i = 0; i < dev_priv->num_fence_regs; i++) {
3181 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
3182
3183 /*
3184 * Commit delayed tiling changes if we have an object still
3185 * attached to the fence, otherwise just clear the fence.
3186 */
3187 if (reg->obj) {
3188 i915_gem_object_update_fence(reg->obj, reg,
3189 reg->obj->tiling_mode);
3190 } else {
3191 i915_gem_write_fence(dev, i, NULL);
3192 }
3193 }
3194 }
3195
3196 void i915_gem_reset(struct drm_device *dev)
3197 {
3198 struct drm_i915_private *dev_priv = dev->dev_private;
3199 struct intel_ring_buffer *ring;
3200 int i;
3201
3202 /*
3203 * Before we free the objects from the requests, we need to inspect
3204 * them for finding the guilty party. As the requests only borrow
3205 * their reference to the objects, the inspection must be done first.
3206 */
3207 for_each_ring(ring, dev_priv, i)
3208 i915_gem_reset_ring_status(dev_priv, ring);
3209
3210 for_each_ring(ring, dev_priv, i)
3211 i915_gem_reset_ring_cleanup(dev_priv, ring);
3212
3213 i915_gem_cleanup_ringbuffer(dev);
3214
3215 i915_gem_context_reset(dev);
3216
3217 i915_gem_restore_fences(dev);
3218 }
3219
3220 /**
3221 * This function clears the request list as sequence numbers are passed.
3222 */
3223 static void
3224 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
3225 {
3226 uint32_t seqno;
3227
3228 if (list_empty(&ring->request_list))
3229 return;
3230
3231 WARN_ON(i915_verify_lists(ring->dev));
3232
3233 seqno = ring->get_seqno(ring, true);
3234
3235 /* Move any buffers on the active list that are no longer referenced
3236 * by the ringbuffer to the flushing/inactive lists as appropriate,
3237 * before we free the context associated with the requests.
3238 */
3239 while (!list_empty(&ring->active_list)) {
3240 struct drm_i915_gem_object *obj;
3241
3242 obj = list_first_entry(&ring->active_list,
3243 struct drm_i915_gem_object,
3244 ring_list);
3245
3246 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
3247 break;
3248
3249 i915_gem_object_move_to_inactive(obj);
3250 }
3251
3252
3253 while (!list_empty(&ring->request_list)) {
3254 struct drm_i915_gem_request *request;
3255
3256 request = list_first_entry(&ring->request_list,
3257 struct drm_i915_gem_request,
3258 list);
3259
3260 if (!i915_seqno_passed(seqno, request->seqno))
3261 break;
3262
3263 trace_i915_gem_request_retire(ring, request->seqno);
3264 /* We know the GPU must have read the request to have
3265 * sent us the seqno + interrupt, so use the position
3266 * of tail of the request to update the last known position
3267 * of the GPU head.
3268 */
3269 ring->last_retired_head = request->tail;
3270
3271 i915_gem_free_request(request);
3272 }
3273
3274 if (unlikely(ring->trace_irq_seqno &&
3275 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
3276 ring->irq_put(ring);
3277 ring->trace_irq_seqno = 0;
3278 }
3279
3280 WARN_ON(i915_verify_lists(ring->dev));
3281 }
3282
3283 bool
3284 i915_gem_retire_requests(struct drm_device *dev)
3285 {
3286 struct drm_i915_private *dev_priv = dev->dev_private;
3287 struct intel_ring_buffer *ring;
3288 bool idle = true;
3289 int i;
3290
3291 for_each_ring(ring, dev_priv, i) {
3292 i915_gem_retire_requests_ring(ring);
3293 idle &= list_empty(&ring->request_list);
3294 }
3295
3296 if (idle)
3297 mod_delayed_work(dev_priv->wq,
3298 &dev_priv->mm.idle_work,
3299 msecs_to_jiffies(100));
3300
3301 return idle;
3302 }
3303
3304 static void
3305 i915_gem_retire_work_handler(struct work_struct *work)
3306 {
3307 struct drm_i915_private *dev_priv =
3308 container_of(work, typeof(*dev_priv), mm.retire_work.work);
3309 struct drm_device *dev = dev_priv->dev;
3310 bool idle;
3311
3312 /* Come back later if the device is busy... */
3313 idle = false;
3314 if (mutex_trylock(&dev->struct_mutex)) {
3315 idle = i915_gem_retire_requests(dev);
3316 mutex_unlock(&dev->struct_mutex);
3317 }
3318 if (!idle)
3319 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3320 round_jiffies_up_relative(HZ));
3321 }
3322
3323 static void
3324 i915_gem_idle_work_handler(struct work_struct *work)
3325 {
3326 struct drm_i915_private *dev_priv =
3327 container_of(work, typeof(*dev_priv), mm.idle_work.work);
3328
3329 intel_mark_idle(dev_priv->dev);
3330 }
3331
3332 /**
3333 * Ensures that an object will eventually get non-busy by flushing any required
3334 * write domains, emitting any outstanding lazy request and retiring and
3335 * completed requests.
3336 */
3337 static int
3338 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3339 {
3340 int ret;
3341
3342 if (obj->active) {
3343 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
3344 if (ret)
3345 return ret;
3346
3347 i915_gem_retire_requests_ring(obj->ring);
3348 }
3349
3350 return 0;
3351 }
3352
3353 /**
3354 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3355 * @DRM_IOCTL_ARGS: standard ioctl arguments
3356 *
3357 * Returns 0 if successful, else an error is returned with the remaining time in
3358 * the timeout parameter.
3359 * -ETIME: object is still busy after timeout
3360 * -ERESTARTSYS: signal interrupted the wait
3361 * -ENONENT: object doesn't exist
3362 * Also possible, but rare:
3363 * -EAGAIN: GPU wedged
3364 * -ENOMEM: damn
3365 * -ENODEV: Internal IRQ fail
3366 * -E?: The add request failed
3367 *
3368 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3369 * non-zero timeout parameter the wait ioctl will wait for the given number of
3370 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3371 * without holding struct_mutex the object may become re-busied before this
3372 * function completes. A similar but shorter * race condition exists in the busy
3373 * ioctl
3374 */
3375 int
3376 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3377 {
3378 struct drm_i915_private *dev_priv = dev->dev_private;
3379 struct drm_i915_gem_wait *args = data;
3380 struct drm_gem_object *gobj;
3381 struct drm_i915_gem_object *obj;
3382 struct intel_ring_buffer *ring = NULL;
3383 struct timespec timeout_stack, *timeout = NULL;
3384 unsigned reset_counter;
3385 u32 seqno = 0;
3386 int ret = 0;
3387
3388 if (args->timeout_ns >= 0) {
3389 timeout_stack = ns_to_timespec(args->timeout_ns);
3390 timeout = &timeout_stack;
3391 }
3392
3393 ret = i915_mutex_lock_interruptible(dev);
3394 if (ret)
3395 return ret;
3396
3397 gobj = drm_gem_object_lookup(dev, file, args->bo_handle);
3398 if (gobj == NULL) {
3399 mutex_unlock(&dev->struct_mutex);
3400 return -ENOENT;
3401 }
3402 obj = to_intel_bo(gobj);
3403
3404 /* Need to make sure the object gets inactive eventually. */
3405 ret = i915_gem_object_flush_active(obj);
3406 if (ret)
3407 goto out;
3408
3409 if (obj->active) {
3410 seqno = obj->last_read_seqno;
3411 ring = obj->ring;
3412 }
3413
3414 if (seqno == 0)
3415 goto out;
3416
3417 /* Do this after OLR check to make sure we make forward progress polling
3418 * on this IOCTL with a 0 timeout (like busy ioctl)
3419 */
3420 if (!args->timeout_ns) {
3421 ret = -ETIME;
3422 goto out;
3423 }
3424
3425 drm_gem_object_unreference(&obj->base);
3426 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3427 mutex_unlock(&dev->struct_mutex);
3428
3429 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
3430 if (timeout)
3431 args->timeout_ns = timespec_to_ns(timeout);
3432 return ret;
3433
3434 out:
3435 drm_gem_object_unreference(&obj->base);
3436 mutex_unlock(&dev->struct_mutex);
3437 return ret;
3438 }
3439
3440 /**
3441 * i915_gem_object_sync - sync an object to a ring.
3442 *
3443 * @obj: object which may be in use on another ring.
3444 * @to: ring we wish to use the object on. May be NULL.
3445 *
3446 * This code is meant to abstract object synchronization with the GPU.
3447 * Calling with NULL implies synchronizing the object with the CPU
3448 * rather than a particular GPU ring.
3449 *
3450 * Returns 0 if successful, else propagates up the lower layer error.
3451 */
3452 int
3453 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3454 struct intel_ring_buffer *to)
3455 {
3456 struct intel_ring_buffer *from = obj->ring;
3457 u32 seqno;
3458 int ret, idx;
3459
3460 if (from == NULL || to == from)
3461 return 0;
3462
3463 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
3464 return i915_gem_object_wait_rendering(obj, false);
3465
3466 idx = intel_ring_sync_index(from, to);
3467
3468 seqno = obj->last_read_seqno;
3469 if (seqno <= from->sync_seqno[idx])
3470 return 0;
3471
3472 ret = i915_gem_check_olr(obj->ring, seqno);
3473 if (ret)
3474 return ret;
3475
3476 trace_i915_gem_ring_sync_to(from, to, seqno);
3477 ret = to->sync_to(to, from, seqno);
3478 if (!ret)
3479 /* We use last_read_seqno because sync_to()
3480 * might have just caused seqno wrap under
3481 * the radar.
3482 */
3483 from->sync_seqno[idx] = obj->last_read_seqno;
3484
3485 return ret;
3486 }
3487
3488 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3489 {
3490 u32 old_write_domain, old_read_domains;
3491
3492 /* Force a pagefault for domain tracking on next user access */
3493 i915_gem_release_mmap(obj);
3494
3495 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3496 return;
3497
3498 /* Wait for any direct GTT access to complete */
3499 mb();
3500
3501 old_read_domains = obj->base.read_domains;
3502 old_write_domain = obj->base.write_domain;
3503
3504 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3505 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3506
3507 trace_i915_gem_object_change_domain(obj,
3508 old_read_domains,
3509 old_write_domain);
3510 }
3511
3512 int i915_vma_unbind(struct i915_vma *vma)
3513 {
3514 struct drm_i915_gem_object *obj = vma->obj;
3515 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3516 int ret;
3517
3518 if (list_empty(&vma->vma_link))
3519 return 0;
3520
3521 if (!drm_mm_node_allocated(&vma->node)) {
3522 i915_gem_vma_destroy(vma);
3523 return 0;
3524 }
3525
3526 if (vma->pin_count)
3527 return -EBUSY;
3528
3529 BUG_ON(obj->pages == NULL);
3530
3531 ret = i915_gem_object_finish_gpu(obj);
3532 if (ret)
3533 return ret;
3534 /* Continue on if we fail due to EIO, the GPU is hung so we
3535 * should be safe and we need to cleanup or else we might
3536 * cause memory corruption through use-after-free.
3537 */
3538
3539 i915_gem_object_finish_gtt(obj);
3540
3541 /* release the fence reg _after_ flushing */
3542 ret = i915_gem_object_put_fence(obj);
3543 if (ret)
3544 return ret;
3545
3546 trace_i915_vma_unbind(vma);
3547
3548 vma->unbind_vma(vma);
3549
3550 i915_gem_gtt_finish_object(obj);
3551
3552 list_del_init(&vma->mm_list);
3553 /* Avoid an unnecessary call to unbind on rebind. */
3554 if (i915_is_ggtt(vma->vm))
3555 obj->map_and_fenceable = true;
3556
3557 drm_mm_remove_node(&vma->node);
3558 i915_gem_vma_destroy(vma);
3559
3560 /* Since the unbound list is global, only move to that list if
3561 * no more VMAs exist. */
3562 if (list_empty(&obj->vma_list))
3563 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3564
3565 /* And finally now the object is completely decoupled from this vma,
3566 * we can drop its hold on the backing storage and allow it to be
3567 * reaped by the shrinker.
3568 */
3569 i915_gem_object_unpin_pages(obj);
3570
3571 return 0;
3572 }
3573
3574 int i915_gpu_idle(struct drm_device *dev)
3575 {
3576 struct drm_i915_private *dev_priv = dev->dev_private;
3577 struct intel_ring_buffer *ring;
3578 int ret, i;
3579
3580 /* Flush everything onto the inactive list. */
3581 for_each_ring(ring, dev_priv, i) {
3582 ret = i915_switch_context(ring, ring->default_context);
3583 if (ret)
3584 return ret;
3585
3586 ret = intel_ring_idle(ring);
3587 if (ret)
3588 return ret;
3589 }
3590
3591 return 0;
3592 }
3593
3594 static void i965_write_fence_reg(struct drm_device *dev, int reg,
3595 struct drm_i915_gem_object *obj)
3596 {
3597 struct drm_i915_private *dev_priv = dev->dev_private;
3598 int fence_reg;
3599 int fence_pitch_shift;
3600
3601 if (INTEL_INFO(dev)->gen >= 6) {
3602 fence_reg = FENCE_REG_SANDYBRIDGE_0;
3603 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
3604 } else {
3605 fence_reg = FENCE_REG_965_0;
3606 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
3607 }
3608
3609 fence_reg += reg * 8;
3610
3611 /* To w/a incoherency with non-atomic 64-bit register updates,
3612 * we split the 64-bit update into two 32-bit writes. In order
3613 * for a partial fence not to be evaluated between writes, we
3614 * precede the update with write to turn off the fence register,
3615 * and only enable the fence as the last step.
3616 *
3617 * For extra levels of paranoia, we make sure each step lands
3618 * before applying the next step.
3619 */
3620 I915_WRITE(fence_reg, 0);
3621 POSTING_READ(fence_reg);
3622
3623 if (obj) {
3624 u32 size = i915_gem_obj_ggtt_size(obj);
3625 uint64_t val;
3626
3627 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
3628 0xfffff000) << 32;
3629 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
3630 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
3631 if (obj->tiling_mode == I915_TILING_Y)
3632 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3633 val |= I965_FENCE_REG_VALID;
3634
3635 I915_WRITE(fence_reg + 4, val >> 32);
3636 POSTING_READ(fence_reg + 4);
3637
3638 I915_WRITE(fence_reg + 0, val);
3639 POSTING_READ(fence_reg);
3640 } else {
3641 I915_WRITE(fence_reg + 4, 0);
3642 POSTING_READ(fence_reg + 4);
3643 }
3644 }
3645
3646 static void i915_write_fence_reg(struct drm_device *dev, int reg,
3647 struct drm_i915_gem_object *obj)
3648 {
3649 struct drm_i915_private *dev_priv = dev->dev_private;
3650 u32 val;
3651
3652 if (obj) {
3653 u32 size = i915_gem_obj_ggtt_size(obj);
3654 int pitch_val;
3655 int tile_width;
3656
3657 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
3658 (size & -size) != size ||
3659 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3660 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3661 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
3662
3663 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
3664 tile_width = 128;
3665 else
3666 tile_width = 512;
3667
3668 /* Note: pitch better be a power of two tile widths */
3669 pitch_val = obj->stride / tile_width;
3670 pitch_val = ffs(pitch_val) - 1;
3671
3672 val = i915_gem_obj_ggtt_offset(obj);
3673 if (obj->tiling_mode == I915_TILING_Y)
3674 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3675 val |= I915_FENCE_SIZE_BITS(size);
3676 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3677 val |= I830_FENCE_REG_VALID;
3678 } else
3679 val = 0;
3680
3681 if (reg < 8)
3682 reg = FENCE_REG_830_0 + reg * 4;
3683 else
3684 reg = FENCE_REG_945_8 + (reg - 8) * 4;
3685
3686 I915_WRITE(reg, val);
3687 POSTING_READ(reg);
3688 }
3689
3690 static void i830_write_fence_reg(struct drm_device *dev, int reg,
3691 struct drm_i915_gem_object *obj)
3692 {
3693 struct drm_i915_private *dev_priv = dev->dev_private;
3694 uint32_t val;
3695
3696 if (obj) {
3697 u32 size = i915_gem_obj_ggtt_size(obj);
3698 uint32_t pitch_val;
3699
3700 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
3701 (size & -size) != size ||
3702 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3703 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
3704 i915_gem_obj_ggtt_offset(obj), size);
3705
3706 pitch_val = obj->stride / 128;
3707 pitch_val = ffs(pitch_val) - 1;
3708
3709 val = i915_gem_obj_ggtt_offset(obj);
3710 if (obj->tiling_mode == I915_TILING_Y)
3711 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3712 val |= I830_FENCE_SIZE_BITS(size);
3713 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3714 val |= I830_FENCE_REG_VALID;
3715 } else
3716 val = 0;
3717
3718 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
3719 POSTING_READ(FENCE_REG_830_0 + reg * 4);
3720 }
3721
3722 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
3723 {
3724 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
3725 }
3726
3727 static void i915_gem_write_fence(struct drm_device *dev, int reg,
3728 struct drm_i915_gem_object *obj)
3729 {
3730 struct drm_i915_private *dev_priv = dev->dev_private;
3731
3732 /* Ensure that all CPU reads are completed before installing a fence
3733 * and all writes before removing the fence.
3734 */
3735 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
3736 mb();
3737
3738 WARN(obj && (!obj->stride || !obj->tiling_mode),
3739 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
3740 obj->stride, obj->tiling_mode);
3741
3742 switch (INTEL_INFO(dev)->gen) {
3743 case 8:
3744 case 7:
3745 case 6:
3746 case 5:
3747 case 4: i965_write_fence_reg(dev, reg, obj); break;
3748 case 3: i915_write_fence_reg(dev, reg, obj); break;
3749 case 2: i830_write_fence_reg(dev, reg, obj); break;
3750 default: BUG();
3751 }
3752
3753 /* And similarly be paranoid that no direct access to this region
3754 * is reordered to before the fence is installed.
3755 */
3756 if (i915_gem_object_needs_mb(obj))
3757 mb();
3758 }
3759
3760 static inline int fence_number(struct drm_i915_private *dev_priv,
3761 struct drm_i915_fence_reg *fence)
3762 {
3763 return fence - dev_priv->fence_regs;
3764 }
3765
3766 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
3767 struct drm_i915_fence_reg *fence,
3768 bool enable)
3769 {
3770 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3771 int reg = fence_number(dev_priv, fence);
3772
3773 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
3774
3775 if (enable) {
3776 obj->fence_reg = reg;
3777 fence->obj = obj;
3778 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
3779 } else {
3780 obj->fence_reg = I915_FENCE_REG_NONE;
3781 fence->obj = NULL;
3782 list_del_init(&fence->lru_list);
3783 }
3784 obj->fence_dirty = false;
3785 }
3786
3787 static int
3788 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
3789 {
3790 if (obj->last_fenced_seqno) {
3791 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
3792 if (ret)
3793 return ret;
3794
3795 obj->last_fenced_seqno = 0;
3796 }
3797
3798 obj->fenced_gpu_access = false;
3799 return 0;
3800 }
3801
3802 int
3803 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
3804 {
3805 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3806 struct drm_i915_fence_reg *fence;
3807 int ret;
3808
3809 ret = i915_gem_object_wait_fence(obj);
3810 if (ret)
3811 return ret;
3812
3813 if (obj->fence_reg == I915_FENCE_REG_NONE)
3814 return 0;
3815
3816 fence = &dev_priv->fence_regs[obj->fence_reg];
3817
3818 i915_gem_object_fence_lost(obj);
3819 i915_gem_object_update_fence(obj, fence, false);
3820
3821 return 0;
3822 }
3823
3824 static struct drm_i915_fence_reg *
3825 i915_find_fence_reg(struct drm_device *dev)
3826 {
3827 struct drm_i915_private *dev_priv = dev->dev_private;
3828 struct drm_i915_fence_reg *reg, *avail;
3829 int i;
3830
3831 /* First try to find a free reg */
3832 avail = NULL;
3833 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
3834 reg = &dev_priv->fence_regs[i];
3835 if (!reg->obj)
3836 return reg;
3837
3838 if (!reg->pin_count)
3839 avail = reg;
3840 }
3841
3842 if (avail == NULL)
3843 goto deadlock;
3844
3845 /* None available, try to steal one or wait for a user to finish */
3846 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
3847 if (reg->pin_count)
3848 continue;
3849
3850 return reg;
3851 }
3852
3853 deadlock:
3854 /* Wait for completion of pending flips which consume fences */
3855 if (intel_has_pending_fb_unpin(dev))
3856 return ERR_PTR(-EAGAIN);
3857
3858 return ERR_PTR(-EDEADLK);
3859 }
3860
3861 /**
3862 * i915_gem_object_get_fence - set up fencing for an object
3863 * @obj: object to map through a fence reg
3864 *
3865 * When mapping objects through the GTT, userspace wants to be able to write
3866 * to them without having to worry about swizzling if the object is tiled.
3867 * This function walks the fence regs looking for a free one for @obj,
3868 * stealing one if it can't find any.
3869 *
3870 * It then sets up the reg based on the object's properties: address, pitch
3871 * and tiling format.
3872 *
3873 * For an untiled surface, this removes any existing fence.
3874 */
3875 int
3876 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
3877 {
3878 struct drm_device *dev = obj->base.dev;
3879 struct drm_i915_private *dev_priv = dev->dev_private;
3880 bool enable = obj->tiling_mode != I915_TILING_NONE;
3881 struct drm_i915_fence_reg *reg;
3882 int ret;
3883
3884 /* Have we updated the tiling parameters upon the object and so
3885 * will need to serialise the write to the associated fence register?
3886 */
3887 if (obj->fence_dirty) {
3888 ret = i915_gem_object_wait_fence(obj);
3889 if (ret)
3890 return ret;
3891 }
3892
3893 /* Just update our place in the LRU if our fence is getting reused. */
3894 if (obj->fence_reg != I915_FENCE_REG_NONE) {
3895 reg = &dev_priv->fence_regs[obj->fence_reg];
3896 if (!obj->fence_dirty) {
3897 list_move_tail(®->lru_list,
3898 &dev_priv->mm.fence_list);
3899 return 0;
3900 }
3901 } else if (enable) {
3902 reg = i915_find_fence_reg(dev);
3903 if (IS_ERR(reg))
3904 return PTR_ERR(reg);
3905
3906 if (reg->obj) {
3907 struct drm_i915_gem_object *old = reg->obj;
3908
3909 ret = i915_gem_object_wait_fence(old);
3910 if (ret)
3911 return ret;
3912
3913 i915_gem_object_fence_lost(old);
3914 }
3915 } else
3916 return 0;
3917
3918 i915_gem_object_update_fence(obj, reg, enable);
3919
3920 return 0;
3921 }
3922
3923 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
3924 struct drm_mm_node *gtt_space,
3925 unsigned long cache_level)
3926 {
3927 struct drm_mm_node *other;
3928
3929 /* On non-LLC machines we have to be careful when putting differing
3930 * types of snoopable memory together to avoid the prefetcher
3931 * crossing memory domains and dying.
3932 */
3933 if (HAS_LLC(dev))
3934 return true;
3935
3936 if (!drm_mm_node_allocated(gtt_space))
3937 return true;
3938
3939 if (list_empty(>t_space->node_list))
3940 return true;
3941
3942 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3943 if (other->allocated && !other->hole_follows && other->color != cache_level)
3944 return false;
3945
3946 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3947 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3948 return false;
3949
3950 return true;
3951 }
3952
3953 static void i915_gem_verify_gtt(struct drm_device *dev)
3954 {
3955 #if WATCH_GTT
3956 struct drm_i915_private *dev_priv = dev->dev_private;
3957 struct drm_i915_gem_object *obj;
3958 int err = 0;
3959
3960 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3961 if (obj->gtt_space == NULL) {
3962 printk(KERN_ERR "object found on GTT list with no space reserved\n");
3963 err++;
3964 continue;
3965 }
3966
3967 if (obj->cache_level != obj->gtt_space->color) {
3968 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
3969 i915_gem_obj_ggtt_offset(obj),
3970 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3971 obj->cache_level,
3972 obj->gtt_space->color);
3973 err++;
3974 continue;
3975 }
3976
3977 if (!i915_gem_valid_gtt_space(dev,
3978 obj->gtt_space,
3979 obj->cache_level)) {
3980 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
3981 i915_gem_obj_ggtt_offset(obj),
3982 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3983 obj->cache_level);
3984 err++;
3985 continue;
3986 }
3987 }
3988
3989 WARN_ON(err);
3990 #endif
3991 }
3992
3993 /**
3994 * Finds free space in the GTT aperture and binds the object there.
3995 */
3996 static struct i915_vma *
3997 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3998 struct i915_address_space *vm,
3999 unsigned alignment,
4000 uint64_t flags)
4001 {
4002 struct drm_device *dev = obj->base.dev;
4003 struct drm_i915_private *dev_priv = dev->dev_private;
4004 u32 size, fence_size, fence_alignment, unfenced_alignment;
4005 unsigned long start =
4006 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
4007 unsigned long end =
4008 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
4009 struct i915_vma *vma;
4010 int ret;
4011
4012 fence_size = i915_gem_get_gtt_size(dev,
4013 obj->base.size,
4014 obj->tiling_mode);
4015 fence_alignment = i915_gem_get_gtt_alignment(dev,
4016 obj->base.size,
4017 obj->tiling_mode, true);
4018 unfenced_alignment =
4019 i915_gem_get_gtt_alignment(dev,
4020 obj->base.size,
4021 obj->tiling_mode, false);
4022
4023 if (alignment == 0)
4024 alignment = flags & PIN_MAPPABLE ? fence_alignment :
4025 unfenced_alignment;
4026 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
4027 DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
4028 return ERR_PTR(-EINVAL);
4029 }
4030
4031 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
4032
4033 /* If the object is bigger than the entire aperture, reject it early
4034 * before evicting everything in a vain attempt to find space.
4035 */
4036 if (obj->base.size > end) {
4037 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
4038 obj->base.size,
4039 flags & PIN_MAPPABLE ? "mappable" : "total",
4040 end);
4041 return ERR_PTR(-E2BIG);
4042 }
4043
4044 ret = i915_gem_object_get_pages(obj);
4045 if (ret)
4046 return ERR_PTR(ret);
4047
4048 i915_gem_object_pin_pages(obj);
4049
4050 vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
4051 if (IS_ERR(vma))
4052 goto err_unpin;
4053
4054 search_free:
4055 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
4056 size, alignment,
4057 obj->cache_level,
4058 start, end,
4059 DRM_MM_SEARCH_DEFAULT,
4060 DRM_MM_CREATE_DEFAULT);
4061 if (ret) {
4062 ret = i915_gem_evict_something(dev, vm, size, alignment,
4063 obj->cache_level,
4064 start, end,
4065 flags);
4066 if (ret == 0)
4067 goto search_free;
4068
4069 goto err_free_vma;
4070 }
4071 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
4072 obj->cache_level))) {
4073 ret = -EINVAL;
4074 goto err_remove_node;
4075 }
4076
4077 ret = i915_gem_gtt_prepare_object(obj);
4078 if (ret)
4079 goto err_remove_node;
4080
4081 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
4082 list_add_tail(&vma->mm_list, &vm->inactive_list);
4083
4084 if (i915_is_ggtt(vm)) {
4085 bool mappable, fenceable;
4086
4087 fenceable = (vma->node.size == fence_size &&
4088 (vma->node.start & (fence_alignment - 1)) == 0);
4089
4090 mappable = (vma->node.start + obj->base.size <=
4091 dev_priv->gtt.mappable_end);
4092
4093 obj->map_and_fenceable = mappable && fenceable;
4094 }
4095
4096 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4097
4098 trace_i915_vma_bind(vma, flags);
4099 vma->bind_vma(vma, obj->cache_level,
4100 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
4101
4102 i915_gem_verify_gtt(dev);
4103 return vma;
4104
4105 err_remove_node:
4106 drm_mm_remove_node(&vma->node);
4107 err_free_vma:
4108 i915_gem_vma_destroy(vma);
4109 vma = ERR_PTR(ret);
4110 err_unpin:
4111 i915_gem_object_unpin_pages(obj);
4112 return vma;
4113 }
4114
4115 bool
4116 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
4117 bool force)
4118 {
4119 /* If we don't have a page list set up, then we're not pinned
4120 * to GPU, and we can ignore the cache flush because it'll happen
4121 * again at bind time.
4122 */
4123 if (obj->pages == NULL)
4124 return false;
4125
4126 /*
4127 * Stolen memory is always coherent with the GPU as it is explicitly
4128 * marked as wc by the system, or the system is cache-coherent.
4129 */
4130 if (obj->stolen)
4131 return false;
4132
4133 /* If the GPU is snooping the contents of the CPU cache,
4134 * we do not need to manually clear the CPU cache lines. However,
4135 * the caches are only snooped when the render cache is
4136 * flushed/invalidated. As we always have to emit invalidations
4137 * and flushes when moving into and out of the RENDER domain, correct
4138 * snooping behaviour occurs naturally as the result of our domain
4139 * tracking.
4140 */
4141 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
4142 return false;
4143
4144 trace_i915_gem_object_clflush(obj);
4145 #ifdef __NetBSD__
4146 drm_clflush_pglist(&obj->igo_pageq);
4147 #else
4148 drm_clflush_sg(obj->pages);
4149 #endif
4150
4151 return true;
4152 }
4153
4154 /** Flushes the GTT write domain for the object if it's dirty. */
4155 static void
4156 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
4157 {
4158 uint32_t old_write_domain;
4159
4160 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
4161 return;
4162
4163 /* No actual flushing is required for the GTT write domain. Writes
4164 * to it immediately go to main memory as far as we know, so there's
4165 * no chipset flush. It also doesn't land in render cache.
4166 *
4167 * However, we do have to enforce the order so that all writes through
4168 * the GTT land before any writes to the device, such as updates to
4169 * the GATT itself.
4170 */
4171 wmb();
4172
4173 old_write_domain = obj->base.write_domain;
4174 obj->base.write_domain = 0;
4175
4176 trace_i915_gem_object_change_domain(obj,
4177 obj->base.read_domains,
4178 old_write_domain);
4179 }
4180
4181 /** Flushes the CPU write domain for the object if it's dirty. */
4182 static void
4183 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
4184 bool force)
4185 {
4186 uint32_t old_write_domain;
4187
4188 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
4189 return;
4190
4191 if (i915_gem_clflush_object(obj, force))
4192 i915_gem_chipset_flush(obj->base.dev);
4193
4194 old_write_domain = obj->base.write_domain;
4195 obj->base.write_domain = 0;
4196
4197 trace_i915_gem_object_change_domain(obj,
4198 obj->base.read_domains,
4199 old_write_domain);
4200 }
4201
4202 /**
4203 * Moves a single object to the GTT read, and possibly write domain.
4204 *
4205 * This function returns when the move is complete, including waiting on
4206 * flushes to occur.
4207 */
4208 int
4209 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4210 {
4211 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4212 uint32_t old_write_domain, old_read_domains;
4213 int ret;
4214
4215 /* Not valid to be called on unbound objects. */
4216 if (!i915_gem_obj_bound_any(obj))
4217 return -EINVAL;
4218
4219 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4220 return 0;
4221
4222 ret = i915_gem_object_wait_rendering(obj, !write);
4223 if (ret)
4224 return ret;
4225
4226 i915_gem_object_flush_cpu_write_domain(obj, false);
4227
4228 /* Serialise direct access to this object with the barriers for
4229 * coherent writes from the GPU, by effectively invalidating the
4230 * GTT domain upon first access.
4231 */
4232 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
4233 mb();
4234
4235 old_write_domain = obj->base.write_domain;
4236 old_read_domains = obj->base.read_domains;
4237
4238 /* It should now be out of any other write domains, and we can update
4239 * the domain values for our changes.
4240 */
4241 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4242 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4243 if (write) {
4244 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
4245 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
4246 obj->dirty = 1;
4247 }
4248
4249 trace_i915_gem_object_change_domain(obj,
4250 old_read_domains,
4251 old_write_domain);
4252
4253 /* And bump the LRU for this access */
4254 if (i915_gem_object_is_inactive(obj)) {
4255 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4256 if (vma)
4257 list_move_tail(&vma->mm_list,
4258 &dev_priv->gtt.base.inactive_list);
4259
4260 }
4261
4262 return 0;
4263 }
4264
4265 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4266 enum i915_cache_level cache_level)
4267 {
4268 struct drm_device *dev = obj->base.dev;
4269 struct i915_vma *vma, *next;
4270 int ret;
4271
4272 if (obj->cache_level == cache_level)
4273 return 0;
4274
4275 if (i915_gem_obj_is_pinned(obj)) {
4276 DRM_DEBUG("can not change the cache level of pinned objects\n");
4277 return -EBUSY;
4278 }
4279
4280 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4281 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
4282 ret = i915_vma_unbind(vma);
4283 if (ret)
4284 return ret;
4285 }
4286 }
4287
4288 if (i915_gem_obj_bound_any(obj)) {
4289 ret = i915_gem_object_finish_gpu(obj);
4290 if (ret)
4291 return ret;
4292
4293 i915_gem_object_finish_gtt(obj);
4294
4295 /* Before SandyBridge, you could not use tiling or fence
4296 * registers with snooped memory, so relinquish any fences
4297 * currently pointing to our region in the aperture.
4298 */
4299 if (INTEL_INFO(dev)->gen < 6) {
4300 ret = i915_gem_object_put_fence(obj);
4301 if (ret)
4302 return ret;
4303 }
4304
4305 list_for_each_entry(vma, &obj->vma_list, vma_link)
4306 if (drm_mm_node_allocated(&vma->node))
4307 vma->bind_vma(vma, cache_level,
4308 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
4309 }
4310
4311 list_for_each_entry(vma, &obj->vma_list, vma_link)
4312 vma->node.color = cache_level;
4313 obj->cache_level = cache_level;
4314
4315 if (cpu_write_needs_clflush(obj)) {
4316 u32 old_read_domains, old_write_domain;
4317
4318 /* If we're coming from LLC cached, then we haven't
4319 * actually been tracking whether the data is in the
4320 * CPU cache or not, since we only allow one bit set
4321 * in obj->write_domain and have been skipping the clflushes.
4322 * Just set it to the CPU cache for now.
4323 */
4324 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
4325
4326 old_read_domains = obj->base.read_domains;
4327 old_write_domain = obj->base.write_domain;
4328
4329 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4330 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4331
4332 trace_i915_gem_object_change_domain(obj,
4333 old_read_domains,
4334 old_write_domain);
4335 }
4336
4337 i915_gem_verify_gtt(dev);
4338 return 0;
4339 }
4340
4341 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4342 struct drm_file *file)
4343 {
4344 struct drm_i915_gem_caching *args = data;
4345 struct drm_gem_object *gobj;
4346 struct drm_i915_gem_object *obj;
4347 int ret;
4348
4349 ret = i915_mutex_lock_interruptible(dev);
4350 if (ret)
4351 return ret;
4352
4353 gobj = drm_gem_object_lookup(dev, file, args->handle);
4354 if (gobj == NULL) {
4355 ret = -ENOENT;
4356 goto unlock;
4357 }
4358 obj = to_intel_bo(gobj);
4359
4360 switch (obj->cache_level) {
4361 case I915_CACHE_LLC:
4362 case I915_CACHE_L3_LLC:
4363 args->caching = I915_CACHING_CACHED;
4364 break;
4365
4366 case I915_CACHE_WT:
4367 args->caching = I915_CACHING_DISPLAY;
4368 break;
4369
4370 default:
4371 args->caching = I915_CACHING_NONE;
4372 break;
4373 }
4374
4375 drm_gem_object_unreference(&obj->base);
4376 unlock:
4377 mutex_unlock(&dev->struct_mutex);
4378 return ret;
4379 }
4380
4381 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4382 struct drm_file *file)
4383 {
4384 struct drm_i915_gem_caching *args = data;
4385 struct drm_gem_object *gobj;
4386 struct drm_i915_gem_object *obj;
4387 enum i915_cache_level level;
4388 int ret;
4389
4390 switch (args->caching) {
4391 case I915_CACHING_NONE:
4392 level = I915_CACHE_NONE;
4393 break;
4394 case I915_CACHING_CACHED:
4395 level = I915_CACHE_LLC;
4396 break;
4397 case I915_CACHING_DISPLAY:
4398 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4399 break;
4400 default:
4401 return -EINVAL;
4402 }
4403
4404 ret = i915_mutex_lock_interruptible(dev);
4405 if (ret)
4406 return ret;
4407
4408 gobj = drm_gem_object_lookup(dev, file, args->handle);
4409 if (gobj == NULL) {
4410 ret = -ENOENT;
4411 goto unlock;
4412 }
4413 obj = to_intel_bo(gobj);
4414
4415 ret = i915_gem_object_set_cache_level(obj, level);
4416
4417 drm_gem_object_unreference(&obj->base);
4418 unlock:
4419 mutex_unlock(&dev->struct_mutex);
4420 return ret;
4421 }
4422
4423 static bool is_pin_display(struct drm_i915_gem_object *obj)
4424 {
4425 /* There are 3 sources that pin objects:
4426 * 1. The display engine (scanouts, sprites, cursors);
4427 * 2. Reservations for execbuffer;
4428 * 3. The user.
4429 *
4430 * We can ignore reservations as we hold the struct_mutex and
4431 * are only called outside of the reservation path. The user
4432 * can only increment pin_count once, and so if after
4433 * subtracting the potential reference by the user, any pin_count
4434 * remains, it must be due to another use by the display engine.
4435 */
4436 return i915_gem_obj_to_ggtt(obj)->pin_count - !!obj->user_pin_count;
4437 }
4438
4439 /*
4440 * Prepare buffer for display plane (scanout, cursors, etc).
4441 * Can be called from an uninterruptible phase (modesetting) and allows
4442 * any flushes to be pipelined (for pageflips).
4443 */
4444 int
4445 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4446 u32 alignment,
4447 struct intel_ring_buffer *pipelined)
4448 {
4449 u32 old_read_domains, old_write_domain;
4450 int ret;
4451
4452 if (pipelined != obj->ring) {
4453 ret = i915_gem_object_sync(obj, pipelined);
4454 if (ret)
4455 return ret;
4456 }
4457
4458 /* Mark the pin_display early so that we account for the
4459 * display coherency whilst setting up the cache domains.
4460 */
4461 obj->pin_display = true;
4462
4463 /* The display engine is not coherent with the LLC cache on gen6. As
4464 * a result, we make sure that the pinning that is about to occur is
4465 * done with uncached PTEs. This is lowest common denominator for all
4466 * chipsets.
4467 *
4468 * However for gen6+, we could do better by using the GFDT bit instead
4469 * of uncaching, which would allow us to flush all the LLC-cached data
4470 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4471 */
4472 ret = i915_gem_object_set_cache_level(obj,
4473 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4474 if (ret)
4475 goto err_unpin_display;
4476
4477 /* As the user may map the buffer once pinned in the display plane
4478 * (e.g. libkms for the bootup splash), we have to ensure that we
4479 * always use map_and_fenceable for all scanout buffers.
4480 */
4481 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
4482 if (ret)
4483 goto err_unpin_display;
4484
4485 i915_gem_object_flush_cpu_write_domain(obj, true);
4486
4487 old_write_domain = obj->base.write_domain;
4488 old_read_domains = obj->base.read_domains;
4489
4490 /* It should now be out of any other write domains, and we can update
4491 * the domain values for our changes.
4492 */
4493 obj->base.write_domain = 0;
4494 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4495
4496 trace_i915_gem_object_change_domain(obj,
4497 old_read_domains,
4498 old_write_domain);
4499
4500 return 0;
4501
4502 err_unpin_display:
4503 obj->pin_display = is_pin_display(obj);
4504 return ret;
4505 }
4506
4507 void
4508 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
4509 {
4510 i915_gem_object_ggtt_unpin(obj);
4511 obj->pin_display = is_pin_display(obj);
4512 }
4513
4514 int
4515 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
4516 {
4517 int ret;
4518
4519 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
4520 return 0;
4521
4522 ret = i915_gem_object_wait_rendering(obj, false);
4523 if (ret)
4524 return ret;
4525
4526 /* Ensure that we invalidate the GPU's caches and TLBs. */
4527 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
4528 return 0;
4529 }
4530
4531 /**
4532 * Moves a single object to the CPU read, and possibly write domain.
4533 *
4534 * This function returns when the move is complete, including waiting on
4535 * flushes to occur.
4536 */
4537 int
4538 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4539 {
4540 uint32_t old_write_domain, old_read_domains;
4541 int ret;
4542
4543 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4544 return 0;
4545
4546 ret = i915_gem_object_wait_rendering(obj, !write);
4547 if (ret)
4548 return ret;
4549
4550 i915_gem_object_flush_gtt_write_domain(obj);
4551
4552 old_write_domain = obj->base.write_domain;
4553 old_read_domains = obj->base.read_domains;
4554
4555 /* Flush the CPU cache if it's still invalid. */
4556 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4557 i915_gem_clflush_object(obj, false);
4558
4559 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4560 }
4561
4562 /* It should now be out of any other write domains, and we can update
4563 * the domain values for our changes.
4564 */
4565 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4566
4567 /* If we're writing through the CPU, then the GPU read domains will
4568 * need to be invalidated at next use.
4569 */
4570 if (write) {
4571 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4572 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4573 }
4574
4575 trace_i915_gem_object_change_domain(obj,
4576 old_read_domains,
4577 old_write_domain);
4578
4579 return 0;
4580 }
4581
4582 /* Throttle our rendering by waiting until the ring has completed our requests
4583 * emitted over 20 msec ago.
4584 *
4585 * Note that if we were to use the current jiffies each time around the loop,
4586 * we wouldn't escape the function with any frames outstanding if the time to
4587 * render a frame was over 20ms.
4588 *
4589 * This should get us reasonable parallelism between CPU and GPU but also
4590 * relatively low latency when blocking on a particular request to finish.
4591 */
4592 static int
4593 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4594 {
4595 struct drm_i915_private *dev_priv = dev->dev_private;
4596 struct drm_i915_file_private *file_priv = file->driver_priv;
4597 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
4598 struct drm_i915_gem_request *request;
4599 struct intel_ring_buffer *ring = NULL;
4600 unsigned reset_counter;
4601 u32 seqno = 0;
4602 int ret;
4603
4604 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4605 if (ret)
4606 return ret;
4607
4608 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4609 if (ret)
4610 return ret;
4611
4612 spin_lock(&file_priv->mm.lock);
4613 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4614 if (time_after_eq(request->emitted_jiffies, recent_enough))
4615 break;
4616
4617 ring = request->ring;
4618 seqno = request->seqno;
4619 }
4620 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
4621 spin_unlock(&file_priv->mm.lock);
4622
4623 if (seqno == 0)
4624 return 0;
4625
4626 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
4627 if (ret == 0)
4628 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4629
4630 return ret;
4631 }
4632
4633 static bool
4634 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4635 {
4636 struct drm_i915_gem_object *obj = vma->obj;
4637
4638 if (alignment &&
4639 vma->node.start & (alignment - 1))
4640 return true;
4641
4642 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4643 return true;
4644
4645 if (flags & PIN_OFFSET_BIAS &&
4646 vma->node.start < (flags & PIN_OFFSET_MASK))
4647 return true;
4648
4649 return false;
4650 }
4651
4652 int
4653 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4654 struct i915_address_space *vm,
4655 uint32_t alignment,
4656 uint64_t flags)
4657 {
4658 struct i915_vma *vma;
4659 int ret;
4660
4661 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4662 return -EINVAL;
4663
4664 vma = i915_gem_obj_to_vma(obj, vm);
4665 if (vma) {
4666 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4667 return -EBUSY;
4668
4669 if (i915_vma_misplaced(vma, alignment, flags)) {
4670 WARN(vma->pin_count,
4671 "bo is already pinned with incorrect alignment:"
4672 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
4673 " obj->map_and_fenceable=%d\n",
4674 i915_gem_obj_offset(obj, vm), alignment,
4675 !!(flags & PIN_MAPPABLE),
4676 obj->map_and_fenceable);
4677 ret = i915_vma_unbind(vma);
4678 if (ret)
4679 return ret;
4680
4681 vma = NULL;
4682 }
4683 }
4684
4685 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4686 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
4687 if (IS_ERR(vma))
4688 return PTR_ERR(vma);
4689 }
4690
4691 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
4692 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
4693
4694 vma->pin_count++;
4695 if (flags & PIN_MAPPABLE)
4696 obj->pin_mappable |= true;
4697
4698 return 0;
4699 }
4700
4701 void
4702 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
4703 {
4704 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4705
4706 BUG_ON(!vma);
4707 BUG_ON(vma->pin_count == 0);
4708 BUG_ON(!i915_gem_obj_ggtt_bound(obj));
4709
4710 if (--vma->pin_count == 0)
4711 obj->pin_mappable = false;
4712 }
4713
4714 int
4715 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4716 struct drm_file *file)
4717 {
4718 struct drm_i915_gem_pin *args = data;
4719 struct drm_gem_object *gobj;
4720 struct drm_i915_gem_object *obj;
4721 int ret;
4722
4723 if (INTEL_INFO(dev)->gen >= 6)
4724 return -ENODEV;
4725
4726 ret = i915_mutex_lock_interruptible(dev);
4727 if (ret)
4728 return ret;
4729
4730 gobj = drm_gem_object_lookup(dev, file, args->handle);
4731 if (gobj == NULL) {
4732 ret = -ENOENT;
4733 goto unlock;
4734 }
4735 obj = to_intel_bo(gobj);
4736
4737 if (obj->madv != I915_MADV_WILLNEED) {
4738 DRM_DEBUG("Attempting to pin a purgeable buffer\n");
4739 ret = -EFAULT;
4740 goto out;
4741 }
4742
4743 if (obj->pin_filp != NULL && obj->pin_filp != file) {
4744 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n",
4745 args->handle);
4746 ret = -EINVAL;
4747 goto out;
4748 }
4749
4750 if (obj->user_pin_count == ULONG_MAX) {
4751 ret = -EBUSY;
4752 goto out;
4753 }
4754
4755 if (obj->user_pin_count == 0) {
4756 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
4757 if (ret)
4758 goto out;
4759 }
4760
4761 obj->user_pin_count++;
4762 obj->pin_filp = file;
4763
4764 args->offset = i915_gem_obj_ggtt_offset(obj);
4765 out:
4766 drm_gem_object_unreference(&obj->base);
4767 unlock:
4768 mutex_unlock(&dev->struct_mutex);
4769 return ret;
4770 }
4771
4772 int
4773 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4774 struct drm_file *file)
4775 {
4776 struct drm_i915_gem_pin *args = data;
4777 struct drm_gem_object *gobj;
4778 struct drm_i915_gem_object *obj;
4779 int ret;
4780
4781 ret = i915_mutex_lock_interruptible(dev);
4782 if (ret)
4783 return ret;
4784
4785 gobj = drm_gem_object_lookup(dev, file, args->handle);
4786 if (gobj == NULL) {
4787 ret = -ENOENT;
4788 goto unlock;
4789 }
4790 obj = to_intel_bo(gobj);
4791
4792 if (obj->pin_filp != file) {
4793 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4794 args->handle);
4795 ret = -EINVAL;
4796 goto out;
4797 }
4798 obj->user_pin_count--;
4799 if (obj->user_pin_count == 0) {
4800 obj->pin_filp = NULL;
4801 i915_gem_object_ggtt_unpin(obj);
4802 }
4803
4804 out:
4805 drm_gem_object_unreference(&obj->base);
4806 unlock:
4807 mutex_unlock(&dev->struct_mutex);
4808 return ret;
4809 }
4810
4811 int
4812 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4813 struct drm_file *file)
4814 {
4815 struct drm_i915_gem_busy *args = data;
4816 struct drm_gem_object *gobj;
4817 struct drm_i915_gem_object *obj;
4818 int ret;
4819
4820 ret = i915_mutex_lock_interruptible(dev);
4821 if (ret)
4822 return ret;
4823
4824 gobj = drm_gem_object_lookup(dev, file, args->handle);
4825 if (gobj == NULL) {
4826 ret = -ENOENT;
4827 goto unlock;
4828 }
4829 obj = to_intel_bo(gobj);
4830
4831 /* Count all active objects as busy, even if they are currently not used
4832 * by the gpu. Users of this interface expect objects to eventually
4833 * become non-busy without any further actions, therefore emit any
4834 * necessary flushes here.
4835 */
4836 ret = i915_gem_object_flush_active(obj);
4837
4838 args->busy = obj->active;
4839 if (obj->ring) {
4840 BUILD_BUG_ON(I915_NUM_RINGS > 16);
4841 args->busy |= intel_ring_flag(obj->ring) << 16;
4842 }
4843
4844 drm_gem_object_unreference(&obj->base);
4845 unlock:
4846 mutex_unlock(&dev->struct_mutex);
4847 return ret;
4848 }
4849
4850 int
4851 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4852 struct drm_file *file_priv)
4853 {
4854 return i915_gem_ring_throttle(dev, file_priv);
4855 }
4856
4857 int
4858 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4859 struct drm_file *file_priv)
4860 {
4861 struct drm_i915_gem_madvise *args = data;
4862 struct drm_gem_object *gobj;
4863 struct drm_i915_gem_object *obj;
4864 int ret;
4865
4866 switch (args->madv) {
4867 case I915_MADV_DONTNEED:
4868 case I915_MADV_WILLNEED:
4869 break;
4870 default:
4871 return -EINVAL;
4872 }
4873
4874 ret = i915_mutex_lock_interruptible(dev);
4875 if (ret)
4876 return ret;
4877
4878 gobj = drm_gem_object_lookup(dev, file_priv, args->handle);
4879 if (gobj == NULL) {
4880 ret = -ENOENT;
4881 goto unlock;
4882 }
4883 obj = to_intel_bo(gobj);
4884
4885 if (i915_gem_obj_is_pinned(obj)) {
4886 ret = -EINVAL;
4887 goto out;
4888 }
4889
4890 if (obj->madv != __I915_MADV_PURGED)
4891 obj->madv = args->madv;
4892
4893 /* if the object is no longer attached, discard its backing storage */
4894 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
4895 i915_gem_object_truncate(obj);
4896
4897 args->retained = obj->madv != __I915_MADV_PURGED;
4898
4899 out:
4900 drm_gem_object_unreference(&obj->base);
4901 unlock:
4902 mutex_unlock(&dev->struct_mutex);
4903 return ret;
4904 }
4905
4906 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4907 const struct drm_i915_gem_object_ops *ops)
4908 {
4909 INIT_LIST_HEAD(&obj->global_list);
4910 INIT_LIST_HEAD(&obj->ring_list);
4911 INIT_LIST_HEAD(&obj->obj_exec_link);
4912 INIT_LIST_HEAD(&obj->vma_list);
4913
4914 obj->ops = ops;
4915
4916 obj->fence_reg = I915_FENCE_REG_NONE;
4917 obj->madv = I915_MADV_WILLNEED;
4918 /* Avoid an unnecessary call to unbind on the first bind. */
4919 obj->map_and_fenceable = true;
4920
4921 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4922 }
4923
4924 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4925 .get_pages = i915_gem_object_get_pages_gtt,
4926 .put_pages = i915_gem_object_put_pages_gtt,
4927 };
4928
4929 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4930 size_t size)
4931 {
4932 #ifdef __NetBSD__
4933 struct drm_i915_private *const dev_priv = dev->dev_private;
4934 #endif
4935 struct drm_i915_gem_object *obj;
4936 #ifndef __NetBSD__
4937 struct address_space *mapping;
4938 gfp_t mask;
4939 #endif
4940
4941 obj = i915_gem_object_alloc(dev);
4942 if (obj == NULL)
4943 return NULL;
4944
4945 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4946 i915_gem_object_free(obj);
4947 return NULL;
4948 }
4949
4950 #ifdef __NetBSD__
4951 uao_set_pgfl(obj->base.gemo_shm_uao, dev_priv->gtt.pgfl);
4952 #else
4953 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4954 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4955 /* 965gm cannot relocate objects above 4GiB. */
4956 mask &= ~__GFP_HIGHMEM;
4957 mask |= __GFP_DMA32;
4958 }
4959
4960 mapping = file_inode(obj->base.filp)->i_mapping;
4961 mapping_set_gfp_mask(mapping, mask);
4962 #endif
4963
4964 i915_gem_object_init(obj, &i915_gem_object_ops);
4965
4966 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4967 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4968
4969 if (HAS_LLC(dev)) {
4970 /* On some devices, we can have the GPU use the LLC (the CPU
4971 * cache) for about a 10% performance improvement
4972 * compared to uncached. Graphics requests other than
4973 * display scanout are coherent with the CPU in
4974 * accessing this cache. This means in this mode we
4975 * don't need to clflush on the CPU side, and on the
4976 * GPU side we only need to flush internal caches to
4977 * get data visible to the CPU.
4978 *
4979 * However, we maintain the display planes as UC, and so
4980 * need to rebind when first used as such.
4981 */
4982 obj->cache_level = I915_CACHE_LLC;
4983 } else
4984 obj->cache_level = I915_CACHE_NONE;
4985
4986 trace_i915_gem_object_create(obj);
4987
4988 return obj;
4989 }
4990
4991 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4992 {
4993 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4994 struct drm_device *dev = obj->base.dev;
4995 struct drm_i915_private *dev_priv = dev->dev_private;
4996 struct i915_vma *vma, *next;
4997
4998 intel_runtime_pm_get(dev_priv);
4999
5000 trace_i915_gem_object_destroy(obj);
5001
5002 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
5003 int ret;
5004
5005 vma->pin_count = 0;
5006 ret = i915_vma_unbind(vma);
5007 if (WARN_ON(ret == -ERESTARTSYS)) {
5008 bool was_interruptible;
5009
5010 was_interruptible = dev_priv->mm.interruptible;
5011 dev_priv->mm.interruptible = false;
5012
5013 WARN_ON(i915_vma_unbind(vma));
5014
5015 dev_priv->mm.interruptible = was_interruptible;
5016 }
5017 }
5018
5019 i915_gem_object_detach_phys(obj);
5020
5021 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
5022 * before progressing. */
5023 if (obj->stolen)
5024 i915_gem_object_unpin_pages(obj);
5025
5026 if (WARN_ON(obj->pages_pin_count))
5027 obj->pages_pin_count = 0;
5028 i915_gem_object_put_pages(obj);
5029 i915_gem_object_free_mmap_offset(obj);
5030 i915_gem_object_release_stolen(obj);
5031
5032 BUG_ON(obj->pages);
5033
5034 #ifndef __NetBSD__ /* XXX drm prime */
5035 if (obj->base.import_attach)
5036 drm_prime_gem_destroy(&obj->base, NULL);
5037 #endif
5038
5039 drm_gem_object_release(&obj->base);
5040 i915_gem_info_remove_obj(dev_priv, obj->base.size);
5041
5042 kfree(obj->bit_17);
5043 i915_gem_object_free(obj);
5044
5045 intel_runtime_pm_put(dev_priv);
5046 }
5047
5048 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
5049 struct i915_address_space *vm)
5050 {
5051 struct i915_vma *vma;
5052 list_for_each_entry(vma, &obj->vma_list, vma_link)
5053 if (vma->vm == vm)
5054 return vma;
5055
5056 return NULL;
5057 }
5058
5059 void i915_gem_vma_destroy(struct i915_vma *vma)
5060 {
5061 WARN_ON(vma->node.allocated);
5062
5063 /* Keep the vma as a placeholder in the execbuffer reservation lists */
5064 if (!list_empty(&vma->exec_list))
5065 return;
5066
5067 list_del(&vma->vma_link);
5068
5069 kfree(vma);
5070 }
5071
5072 int
5073 i915_gem_suspend(struct drm_device *dev)
5074 {
5075 struct drm_i915_private *dev_priv = dev->dev_private;
5076 int ret = 0;
5077
5078 mutex_lock(&dev->struct_mutex);
5079 if (dev_priv->ums.mm_suspended)
5080 goto err;
5081
5082 ret = i915_gpu_idle(dev);
5083 if (ret)
5084 goto err;
5085
5086 i915_gem_retire_requests(dev);
5087
5088 /* Under UMS, be paranoid and evict. */
5089 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5090 i915_gem_evict_everything(dev);
5091
5092 i915_kernel_lost_context(dev);
5093 i915_gem_cleanup_ringbuffer(dev);
5094
5095 /* Hack! Don't let anybody do execbuf while we don't control the chip.
5096 * We need to replace this with a semaphore, or something.
5097 * And not confound ums.mm_suspended!
5098 */
5099 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev,
5100 DRIVER_MODESET);
5101 mutex_unlock(&dev->struct_mutex);
5102
5103 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
5104 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5105 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
5106
5107 return 0;
5108
5109 err:
5110 mutex_unlock(&dev->struct_mutex);
5111 return ret;
5112 }
5113
5114 int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
5115 {
5116 struct drm_device *dev = ring->dev;
5117 struct drm_i915_private *dev_priv = dev->dev_private;
5118 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
5119 u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
5120 int i, ret;
5121
5122 if (!HAS_L3_DPF(dev) || !remap_info)
5123 return 0;
5124
5125 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
5126 if (ret)
5127 return ret;
5128
5129 /*
5130 * Note: We do not worry about the concurrent register cacheline hang
5131 * here because no other code should access these registers other than
5132 * at initialization time.
5133 */
5134 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
5135 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
5136 intel_ring_emit(ring, reg_base + i);
5137 intel_ring_emit(ring, remap_info[i/4]);
5138 }
5139
5140 intel_ring_advance(ring);
5141
5142 return ret;
5143 }
5144
5145 void i915_gem_init_swizzling(struct drm_device *dev)
5146 {
5147 struct drm_i915_private *dev_priv = dev->dev_private;
5148
5149 if (INTEL_INFO(dev)->gen < 5 ||
5150 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5151 return;
5152
5153 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5154 DISP_TILE_SURFACE_SWIZZLING);
5155
5156 if (IS_GEN5(dev))
5157 return;
5158
5159 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5160 if (IS_GEN6(dev))
5161 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5162 else if (IS_GEN7(dev))
5163 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5164 else if (IS_GEN8(dev))
5165 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5166 else
5167 BUG();
5168 }
5169
5170 static bool
5171 intel_enable_blt(struct drm_device *dev)
5172 {
5173 if (!HAS_BLT(dev))
5174 return false;
5175
5176 /* The blitter was dysfunctional on early prototypes */
5177 if (IS_GEN6(dev) && dev->pdev->revision < 8) {
5178 DRM_INFO("BLT not supported on this pre-production hardware;"
5179 " graphics performance will be degraded.\n");
5180 return false;
5181 }
5182
5183 return true;
5184 }
5185
5186 static int i915_gem_init_rings(struct drm_device *dev)
5187 {
5188 struct drm_i915_private *dev_priv = dev->dev_private;
5189 int ret;
5190
5191 ret = intel_init_render_ring_buffer(dev);
5192 if (ret)
5193 return ret;
5194
5195 if (HAS_BSD(dev)) {
5196 ret = intel_init_bsd_ring_buffer(dev);
5197 if (ret)
5198 goto cleanup_render_ring;
5199 }
5200
5201 if (intel_enable_blt(dev)) {
5202 ret = intel_init_blt_ring_buffer(dev);
5203 if (ret)
5204 goto cleanup_bsd_ring;
5205 }
5206
5207 if (HAS_VEBOX(dev)) {
5208 ret = intel_init_vebox_ring_buffer(dev);
5209 if (ret)
5210 goto cleanup_blt_ring;
5211 }
5212
5213
5214 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
5215 if (ret)
5216 goto cleanup_vebox_ring;
5217
5218 return 0;
5219
5220 cleanup_vebox_ring:
5221 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
5222 cleanup_blt_ring:
5223 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
5224 cleanup_bsd_ring:
5225 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
5226 cleanup_render_ring:
5227 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
5228
5229 return ret;
5230 }
5231
5232 int
5233 i915_gem_init_hw(struct drm_device *dev)
5234 {
5235 struct drm_i915_private *dev_priv = dev->dev_private;
5236 int ret, i;
5237
5238 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
5239 return -EIO;
5240
5241 if (dev_priv->ellc_size)
5242 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5243
5244 if (IS_HASWELL(dev))
5245 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
5246 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5247
5248 if (HAS_PCH_NOP(dev)) {
5249 if (IS_IVYBRIDGE(dev)) {
5250 u32 temp = I915_READ(GEN7_MSG_CTL);
5251 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5252 I915_WRITE(GEN7_MSG_CTL, temp);
5253 } else if (INTEL_INFO(dev)->gen >= 7) {
5254 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5255 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5256 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5257 }
5258 }
5259
5260 i915_gem_init_swizzling(dev);
5261
5262 ret = i915_gem_init_rings(dev);
5263 if (ret)
5264 return ret;
5265
5266 for (i = 0; i < NUM_L3_SLICES(dev); i++)
5267 i915_gem_l3_remap(&dev_priv->ring[RCS], i);
5268
5269 /*
5270 * XXX: Contexts should only be initialized once. Doing a switch to the
5271 * default context switch however is something we'd like to do after
5272 * reset or thaw (the latter may not actually be necessary for HW, but
5273 * goes with our code better). Context switching requires rings (for
5274 * the do_switch), but before enabling PPGTT. So don't move this.
5275 */
5276 ret = i915_gem_context_enable(dev_priv);
5277 if (ret) {
5278 DRM_ERROR("Context enable failed %d\n", ret);
5279 goto err_out;
5280 }
5281
5282 return 0;
5283
5284 err_out:
5285 i915_gem_cleanup_ringbuffer(dev);
5286 return ret;
5287 }
5288
5289 int i915_gem_init(struct drm_device *dev)
5290 {
5291 struct drm_i915_private *dev_priv = dev->dev_private;
5292 int ret;
5293
5294 mutex_lock(&dev->struct_mutex);
5295
5296 if (IS_VALLEYVIEW(dev)) {
5297 /* VLVA0 (potential hack), BIOS isn't actually waking us */
5298 I915_WRITE(VLV_GTLC_WAKE_CTRL, 1);
5299 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10))
5300 DRM_DEBUG_DRIVER("allow wake ack timed out\n");
5301 }
5302 i915_gem_init_global_gtt(dev);
5303
5304 ret = i915_gem_context_init(dev);
5305 if (ret) {
5306 mutex_unlock(&dev->struct_mutex);
5307 return ret;
5308 }
5309
5310 ret = i915_gem_init_hw(dev);
5311 mutex_unlock(&dev->struct_mutex);
5312 if (ret) {
5313 WARN_ON(dev_priv->mm.aliasing_ppgtt);
5314 i915_gem_context_fini(dev);
5315 drm_mm_takedown(&dev_priv->gtt.base.mm);
5316 return ret;
5317 }
5318
5319 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
5320 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5321 dev_priv->dri1.allow_batchbuffer = 1;
5322 return 0;
5323 }
5324
5325 void
5326 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
5327 {
5328 struct drm_i915_private *dev_priv = dev->dev_private;
5329 struct intel_ring_buffer *ring;
5330 int i;
5331
5332 for_each_ring(ring, dev_priv, i)
5333 intel_cleanup_ring_buffer(ring);
5334 }
5335
5336 int
5337 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
5338 struct drm_file *file_priv)
5339 {
5340 struct drm_i915_private *dev_priv = dev->dev_private;
5341 int ret;
5342
5343 if (drm_core_check_feature(dev, DRIVER_MODESET))
5344 return 0;
5345
5346 if (i915_reset_in_progress(&dev_priv->gpu_error)) {
5347 DRM_ERROR("Reenabling wedged hardware, good luck\n");
5348 atomic_set(&dev_priv->gpu_error.reset_counter, 0);
5349 }
5350
5351 mutex_lock(&dev->struct_mutex);
5352 dev_priv->ums.mm_suspended = 0;
5353
5354 ret = i915_gem_init_hw(dev);
5355 if (ret != 0) {
5356 mutex_unlock(&dev->struct_mutex);
5357 return ret;
5358 }
5359
5360 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
5361 mutex_unlock(&dev->struct_mutex);
5362
5363 ret = drm_irq_install(dev);
5364 if (ret)
5365 goto cleanup_ringbuffer;
5366
5367 return 0;
5368
5369 cleanup_ringbuffer:
5370 mutex_lock(&dev->struct_mutex);
5371 i915_gem_cleanup_ringbuffer(dev);
5372 dev_priv->ums.mm_suspended = 1;
5373 mutex_unlock(&dev->struct_mutex);
5374
5375 return ret;
5376 }
5377
5378 int
5379 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
5380 struct drm_file *file_priv)
5381 {
5382 if (drm_core_check_feature(dev, DRIVER_MODESET))
5383 return 0;
5384
5385 drm_irq_uninstall(dev);
5386
5387 return i915_gem_suspend(dev);
5388 }
5389
5390 void
5391 i915_gem_lastclose(struct drm_device *dev)
5392 {
5393 int ret;
5394
5395 if (drm_core_check_feature(dev, DRIVER_MODESET))
5396 return;
5397
5398 ret = i915_gem_suspend(dev);
5399 if (ret)
5400 DRM_ERROR("failed to idle hardware: %d\n", ret);
5401 }
5402
5403 static void
5404 init_ring_lists(struct intel_ring_buffer *ring)
5405 {
5406 INIT_LIST_HEAD(&ring->active_list);
5407 INIT_LIST_HEAD(&ring->request_list);
5408 }
5409
5410 void i915_init_vm(struct drm_i915_private *dev_priv,
5411 struct i915_address_space *vm)
5412 {
5413 if (!i915_is_ggtt(vm))
5414 drm_mm_init(&vm->mm, vm->start, vm->total);
5415 vm->dev = dev_priv->dev;
5416 INIT_LIST_HEAD(&vm->active_list);
5417 INIT_LIST_HEAD(&vm->inactive_list);
5418 INIT_LIST_HEAD(&vm->global_link);
5419 list_add_tail(&vm->global_link, &dev_priv->vm_list);
5420 }
5421
5422 void
5423 i915_gem_load(struct drm_device *dev)
5424 {
5425 struct drm_i915_private *dev_priv = dev->dev_private;
5426 int i;
5427
5428 dev_priv->slab =
5429 kmem_cache_create("i915_gem_object",
5430 sizeof(struct drm_i915_gem_object), 0,
5431 SLAB_HWCACHE_ALIGN,
5432 NULL);
5433
5434 INIT_LIST_HEAD(&dev_priv->vm_list);
5435 i915_init_vm(dev_priv, &dev_priv->gtt.base);
5436
5437 INIT_LIST_HEAD(&dev_priv->context_list);
5438 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5439 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5440 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5441 for (i = 0; i < I915_NUM_RINGS; i++)
5442 init_ring_lists(&dev_priv->ring[i]);
5443 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5444 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5445 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5446 i915_gem_retire_work_handler);
5447 INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5448 i915_gem_idle_work_handler);
5449 #ifdef __NetBSD__
5450 spin_lock_init(&dev_priv->gpu_error.reset_lock);
5451 DRM_INIT_WAITQUEUE(&dev_priv->gpu_error.reset_queue, "i915errst");
5452 #else
5453 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5454 #endif
5455
5456 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
5457 if (IS_GEN3(dev)) {
5458 I915_WRITE(MI_ARB_STATE,
5459 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
5460 }
5461
5462 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5463
5464 /* Old X drivers will take 0-2 for front, back, depth buffers */
5465 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5466 dev_priv->fence_reg_start = 3;
5467
5468 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
5469 dev_priv->num_fence_regs = 32;
5470 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
5471 dev_priv->num_fence_regs = 16;
5472 else
5473 dev_priv->num_fence_regs = 8;
5474
5475 /* Initialize fence registers to zero */
5476 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5477 i915_gem_restore_fences(dev);
5478
5479 i915_gem_detect_bit_6_swizzle(dev);
5480 #ifdef __NetBSD__
5481 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip");
5482 spin_lock_init(&dev_priv->pending_flip_lock);
5483 #else
5484 init_waitqueue_head(&dev_priv->pending_flip_queue);
5485 #endif
5486
5487 dev_priv->mm.interruptible = true;
5488
5489 dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
5490 dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
5491 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
5492 register_shrinker(&dev_priv->mm.inactive_shrinker);
5493 }
5494
5495 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5496 {
5497 struct drm_i915_file_private *file_priv = file->driver_priv;
5498
5499 cancel_delayed_work_sync(&file_priv->mm.idle_work);
5500
5501 /* Clean up our request list when the client is going away, so that
5502 * later retire_requests won't dereference our soon-to-be-gone
5503 * file_priv.
5504 */
5505 spin_lock(&file_priv->mm.lock);
5506 while (!list_empty(&file_priv->mm.request_list)) {
5507 struct drm_i915_gem_request *request;
5508
5509 request = list_first_entry(&file_priv->mm.request_list,
5510 struct drm_i915_gem_request,
5511 client_list);
5512 list_del(&request->client_list);
5513 request->file_priv = NULL;
5514 }
5515 spin_unlock(&file_priv->mm.lock);
5516 }
5517
5518 static void
5519 i915_gem_file_idle_work_handler(struct work_struct *work)
5520 {
5521 struct drm_i915_file_private *file_priv =
5522 container_of(work, typeof(*file_priv), mm.idle_work.work);
5523
5524 atomic_set(&file_priv->rps_wait_boost, false);
5525 }
5526
5527 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5528 {
5529 struct drm_i915_file_private *file_priv;
5530 int ret;
5531
5532 DRM_DEBUG_DRIVER("\n");
5533
5534 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5535 if (!file_priv)
5536 return -ENOMEM;
5537
5538 file->driver_priv = file_priv;
5539 file_priv->dev_priv = dev->dev_private;
5540 file_priv->file = file;
5541
5542 spin_lock_init(&file_priv->mm.lock);
5543 INIT_LIST_HEAD(&file_priv->mm.request_list);
5544 INIT_DELAYED_WORK(&file_priv->mm.idle_work,
5545 i915_gem_file_idle_work_handler);
5546
5547 ret = i915_gem_context_open(dev, file);
5548 if (ret)
5549 kfree(file_priv);
5550
5551 return ret;
5552 }
5553
5554 #ifndef __NetBSD__
5555 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
5556 {
5557 if (!mutex_is_locked(mutex))
5558 return false;
5559
5560 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
5561 return mutex->owner == task;
5562 #else
5563 /* Since UP may be pre-empted, we cannot assume that we own the lock */
5564 return false;
5565 #endif
5566 }
5567 #endif
5568
5569 static unsigned long
5570 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
5571 {
5572 #ifdef __NetBSD__ /* XXX shrinkers */
5573 return 0;
5574 #else
5575 struct drm_i915_private *dev_priv =
5576 container_of(shrinker,
5577 struct drm_i915_private,
5578 mm.inactive_shrinker);
5579 struct drm_device *dev = dev_priv->dev;
5580 struct drm_i915_gem_object *obj;
5581 bool unlock = true;
5582 unsigned long count;
5583
5584 if (!mutex_trylock(&dev->struct_mutex)) {
5585 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5586 return 0;
5587
5588 if (dev_priv->mm.shrinker_no_lock_stealing)
5589 return 0;
5590
5591 unlock = false;
5592 }
5593
5594 count = 0;
5595 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
5596 if (obj->pages_pin_count == 0)
5597 count += obj->base.size >> PAGE_SHIFT;
5598
5599 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
5600 if (obj->active)
5601 continue;
5602
5603 if (!i915_gem_obj_is_pinned(obj) && obj->pages_pin_count == 0)
5604 count += obj->base.size >> PAGE_SHIFT;
5605 }
5606
5607 if (unlock)
5608 mutex_unlock(&dev->struct_mutex);
5609
5610 return count;
5611 #endif
5612 }
5613
5614 /* All the new VM stuff */
5615 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
5616 struct i915_address_space *vm)
5617 {
5618 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5619 struct i915_vma *vma;
5620
5621 if (!dev_priv->mm.aliasing_ppgtt ||
5622 vm == &dev_priv->mm.aliasing_ppgtt->base)
5623 vm = &dev_priv->gtt.base;
5624
5625 BUG_ON(list_empty(&o->vma_list));
5626 list_for_each_entry(vma, &o->vma_list, vma_link) {
5627 if (vma->vm == vm)
5628 return vma->node.start;
5629
5630 }
5631 return -1;
5632 }
5633
5634 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5635 struct i915_address_space *vm)
5636 {
5637 struct i915_vma *vma;
5638
5639 list_for_each_entry(vma, &o->vma_list, vma_link)
5640 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5641 return true;
5642
5643 return false;
5644 }
5645
5646 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5647 {
5648 struct i915_vma *vma;
5649
5650 list_for_each_entry(vma, &o->vma_list, vma_link)
5651 if (drm_mm_node_allocated(&vma->node))
5652 return true;
5653
5654 return false;
5655 }
5656
5657 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5658 struct i915_address_space *vm)
5659 {
5660 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5661 struct i915_vma *vma;
5662
5663 if (!dev_priv->mm.aliasing_ppgtt ||
5664 vm == &dev_priv->mm.aliasing_ppgtt->base)
5665 vm = &dev_priv->gtt.base;
5666
5667 BUG_ON(list_empty(&o->vma_list));
5668
5669 list_for_each_entry(vma, &o->vma_list, vma_link)
5670 if (vma->vm == vm)
5671 return vma->node.size;
5672
5673 return 0;
5674 }
5675
5676 static unsigned long
5677 i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
5678 {
5679 #ifdef __NetBSD__ /* XXX shrinkers */
5680 return 0;
5681 #else
5682 struct drm_i915_private *dev_priv =
5683 container_of(shrinker,
5684 struct drm_i915_private,
5685 mm.inactive_shrinker);
5686 struct drm_device *dev = dev_priv->dev;
5687 unsigned long freed;
5688 bool unlock = true;
5689
5690 if (!mutex_trylock(&dev->struct_mutex)) {
5691 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5692 return SHRINK_STOP;
5693
5694 if (dev_priv->mm.shrinker_no_lock_stealing)
5695 return SHRINK_STOP;
5696
5697 unlock = false;
5698 }
5699
5700 freed = i915_gem_purge(dev_priv, sc->nr_to_scan);
5701 if (freed < sc->nr_to_scan)
5702 freed += __i915_gem_shrink(dev_priv,
5703 sc->nr_to_scan - freed,
5704 false);
5705 if (freed < sc->nr_to_scan)
5706 freed += i915_gem_shrink_all(dev_priv);
5707
5708 if (unlock)
5709 mutex_unlock(&dev->struct_mutex);
5710
5711 return freed;
5712 #endif
5713 }
5714
5715 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
5716 {
5717 struct i915_vma *vma;
5718
5719 if (WARN_ON(list_empty(&obj->vma_list)))
5720 return NULL;
5721
5722 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
5723 if (vma->vm != obj_to_ggtt(obj))
5724 return NULL;
5725
5726 return vma;
5727 }
5728