i915_gem.c revision 1.19 1 /*
2 * Copyright 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric (at) anholt.net>
25 *
26 */
27
28 #ifdef __NetBSD__
29 #if 0 /* XXX uvmhist option? */
30 #include "opt_uvmhist.h"
31 #endif
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35
36 #include <uvm/uvm.h>
37 #include <uvm/uvm_extern.h>
38 #include <uvm/uvm_fault.h>
39 #include <uvm/uvm_page.h>
40 #include <uvm/uvm_pmap.h>
41 #include <uvm/uvm_prot.h>
42
43 #include <drm/bus_dma_hacks.h>
44 #endif
45
46 #include <drm/drmP.h>
47 #include <drm/drm_vma_manager.h>
48 #include <drm/i915_drm.h>
49 #include "i915_drv.h"
50 #include "i915_trace.h"
51 #include "intel_drv.h"
52 #include <linux/shmem_fs.h>
53 #include <linux/slab.h>
54 #include <linux/swap.h>
55 #include <linux/pci.h>
56 #include <linux/dma-buf.h>
57 #include <linux/errno.h>
58 #include <linux/time.h>
59 #include <linux/err.h>
60 #include <linux/bitops.h>
61 #include <linux/printk.h>
62 #include <asm/param.h>
63 #include <asm/page.h>
64
65 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
66 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
67 bool force);
68 static __must_check int
69 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
70 bool readonly);
71
72 static void i915_gem_write_fence(struct drm_device *dev, int reg,
73 struct drm_i915_gem_object *obj);
74 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
75 struct drm_i915_fence_reg *fence,
76 bool enable);
77
78 static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
79 struct shrink_control *sc);
80 static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
81 struct shrink_control *sc);
82 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
83 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
84 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
85 static void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
86
87 static bool cpu_cache_is_coherent(struct drm_device *dev,
88 enum i915_cache_level level)
89 {
90 return HAS_LLC(dev) || level != I915_CACHE_NONE;
91 }
92
93 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
94 {
95 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
96 return true;
97
98 return obj->pin_display;
99 }
100
101 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
102 {
103 if (obj->tiling_mode)
104 i915_gem_release_mmap(obj);
105
106 /* As we do not have an associated fence register, we will force
107 * a tiling change if we ever need to acquire one.
108 */
109 obj->fence_dirty = false;
110 obj->fence_reg = I915_FENCE_REG_NONE;
111 }
112
113 /* some bookkeeping */
114 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
115 size_t size)
116 {
117 spin_lock(&dev_priv->mm.object_stat_lock);
118 dev_priv->mm.object_count++;
119 dev_priv->mm.object_memory += size;
120 spin_unlock(&dev_priv->mm.object_stat_lock);
121 }
122
123 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
124 size_t size)
125 {
126 spin_lock(&dev_priv->mm.object_stat_lock);
127 dev_priv->mm.object_count--;
128 dev_priv->mm.object_memory -= size;
129 spin_unlock(&dev_priv->mm.object_stat_lock);
130 }
131
132 static int
133 i915_gem_wait_for_error(struct i915_gpu_error *error)
134 {
135 int ret;
136
137 #define EXIT_COND (!i915_reset_in_progress(error) || \
138 i915_terminally_wedged(error))
139 if (EXIT_COND)
140 return 0;
141
142 /*
143 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
144 * userspace. If it takes that long something really bad is going on and
145 * we should simply try to bail out and fail as gracefully as possible.
146 */
147 #ifdef __NetBSD__
148 spin_lock(&error->reset_lock);
149 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &error->reset_queue, &error->reset_lock,
150 10*HZ, EXIT_COND);
151 spin_unlock(&error->reset_lock);
152 #else
153 ret = wait_event_interruptible_timeout(error->reset_queue,
154 EXIT_COND,
155 10*HZ);
156 #endif
157 if (ret == 0) {
158 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
159 return -EIO;
160 } else if (ret < 0) {
161 return ret;
162 }
163 #undef EXIT_COND
164
165 return 0;
166 }
167
168 int i915_mutex_lock_interruptible(struct drm_device *dev)
169 {
170 struct drm_i915_private *dev_priv = dev->dev_private;
171 int ret;
172
173 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
174 if (ret)
175 return ret;
176
177 ret = mutex_lock_interruptible(&dev->struct_mutex);
178 if (ret)
179 return ret;
180
181 WARN_ON(i915_verify_lists(dev));
182 return 0;
183 }
184
185 static inline bool
186 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
187 {
188 return i915_gem_obj_bound_any(obj) && !obj->active;
189 }
190
191 int
192 i915_gem_init_ioctl(struct drm_device *dev, void *data,
193 struct drm_file *file)
194 {
195 struct drm_i915_private *dev_priv = dev->dev_private;
196 struct drm_i915_gem_init *args = data;
197
198 if (drm_core_check_feature(dev, DRIVER_MODESET))
199 return -ENODEV;
200
201 if (args->gtt_start >= args->gtt_end ||
202 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
203 return -EINVAL;
204
205 /* GEM with user mode setting was never supported on ilk and later. */
206 if (INTEL_INFO(dev)->gen >= 5)
207 return -ENODEV;
208
209 mutex_lock(&dev->struct_mutex);
210 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
211 args->gtt_end);
212 dev_priv->gtt.mappable_end = args->gtt_end;
213 mutex_unlock(&dev->struct_mutex);
214
215 return 0;
216 }
217
218 int
219 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
220 struct drm_file *file)
221 {
222 struct drm_i915_private *dev_priv = dev->dev_private;
223 struct drm_i915_gem_get_aperture *args = data;
224 struct drm_i915_gem_object *obj;
225 size_t pinned;
226
227 pinned = 0;
228 mutex_lock(&dev->struct_mutex);
229 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
230 if (i915_gem_obj_is_pinned(obj))
231 pinned += i915_gem_obj_ggtt_size(obj);
232 mutex_unlock(&dev->struct_mutex);
233
234 args->aper_size = dev_priv->gtt.base.total;
235 args->aper_available_size = args->aper_size - pinned;
236
237 return 0;
238 }
239
240 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
241 {
242 drm_dma_handle_t *phys = obj->phys_handle;
243
244 if (!phys)
245 return;
246
247 if (obj->madv == I915_MADV_WILLNEED) {
248 #ifdef __NetBSD__
249 const char *vaddr = phys->vaddr;
250 unsigned i;
251
252 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
253 struct pglist pages;
254 int error;
255
256 TAILQ_INIT(&pages);
257 error = uvm_obj_wirepages(obj->base.gemo_shm_uao,
258 i*PAGE_SIZE, (i+1)*PAGE_SIZE, &pages);
259 if (error)
260 continue;
261
262 struct vm_page *const vm_page = TAILQ_FIRST(&pages);
263 struct page *const page = container_of(vm_page,
264 struct page, p_vmp);
265 char *const dst = kmap_atomic(page);
266 (void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE);
267 drm_clflush_virt_range(dst, PAGE_SIZE);
268 kunmap_atomic(dst);
269
270 vm_page->flags &= ~PG_CLEAN;
271 /* XXX mark page accessed */
272 uvm_obj_unwirepages(obj->base.gemo_shm_uao,
273 i*PAGE_SIZE, (i+1)*PAGE_SIZE);
274 }
275 #else
276 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
277 char *vaddr = phys->vaddr;
278 int i;
279
280 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
281 struct page *page = shmem_read_mapping_page(mapping, i);
282 if (!IS_ERR(page)) {
283 char *dst = kmap_atomic(page);
284 memcpy(dst, vaddr, PAGE_SIZE);
285 drm_clflush_virt_range(dst, PAGE_SIZE);
286 kunmap_atomic(dst);
287
288 set_page_dirty(page);
289 mark_page_accessed(page);
290 page_cache_release(page);
291 }
292 vaddr += PAGE_SIZE;
293 }
294 #endif
295 i915_gem_chipset_flush(obj->base.dev);
296 }
297
298 #ifndef __NetBSD__
299 #ifdef CONFIG_X86
300 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
301 #endif
302 #endif
303 drm_pci_free(obj->base.dev, phys);
304 obj->phys_handle = NULL;
305 }
306
307 int
308 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
309 int align)
310 {
311 drm_dma_handle_t *phys;
312 #ifndef __NetBSD__
313 struct address_space *mapping;
314 #endif
315 char *vaddr;
316 int i;
317
318 if (obj->phys_handle) {
319 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
320 return -EBUSY;
321
322 return 0;
323 }
324
325 if (obj->madv != I915_MADV_WILLNEED)
326 return -EFAULT;
327
328 #ifdef __NetBSD__
329 if (obj->base.gemo_shm_uao == NULL)
330 return -EINVAL;
331 #else
332 if (obj->base.filp == NULL)
333 return -EINVAL;
334 #endif
335
336 /* create a new object */
337 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
338 if (!phys)
339 return -ENOMEM;
340
341 vaddr = phys->vaddr;
342 #ifndef __NetBSD__
343 #ifdef CONFIG_X86
344 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
345 #endif
346 mapping = file_inode(obj->base.filp)->i_mapping;
347 #endif
348 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
349 struct page *page;
350 char *src;
351
352 #ifdef __NetBSD__
353 struct pglist pages;
354 int ret;
355
356 TAILQ_INIT(&pages);
357
358 /* XXX errno NetBSD->Linux */
359 ret = -uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
360 (i+1)*PAGE_SIZE, &pages);
361 if (ret) {
362 drm_pci_free(obj->base.dev, phys);
363 return ret;
364 }
365 KASSERT(!TAILQ_EMPTY(&pages));
366 page = container_of(TAILQ_FIRST(&pages), struct page, p_vmp);
367 #else
368 page = shmem_read_mapping_page(mapping, i);
369 if (IS_ERR(page)) {
370 #ifdef CONFIG_X86
371 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
372 #endif
373 drm_pci_free(obj->base.dev, phys);
374 return PTR_ERR(page);
375 }
376 #endif /* defined(__NetBSD__) */
377
378 src = kmap_atomic(page);
379 memcpy(vaddr, src, PAGE_SIZE);
380 kunmap_atomic(src);
381
382 #ifndef __NetBSD__
383 mark_page_accessed(page);
384 page_cache_release(page);
385 #endif
386
387 vaddr += PAGE_SIZE;
388 }
389
390 obj->phys_handle = phys;
391 return 0;
392 }
393
394 static int
395 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
396 struct drm_i915_gem_pwrite *args,
397 struct drm_file *file_priv)
398 {
399 struct drm_device *dev = obj->base.dev;
400 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset;
401 char __user *user_data = to_user_ptr(args->data_ptr);
402
403 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
404 unsigned long unwritten;
405
406 /* The physical object once assigned is fixed for the lifetime
407 * of the obj, so we can safely drop the lock and continue
408 * to access vaddr.
409 */
410 mutex_unlock(&dev->struct_mutex);
411 unwritten = copy_from_user(vaddr, user_data, args->size);
412 mutex_lock(&dev->struct_mutex);
413 if (unwritten)
414 return -EFAULT;
415 }
416
417 i915_gem_chipset_flush(dev);
418 return 0;
419 }
420
421 void *i915_gem_object_alloc(struct drm_device *dev)
422 {
423 struct drm_i915_private *dev_priv = dev->dev_private;
424 return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL);
425 }
426
427 void i915_gem_object_free(struct drm_i915_gem_object *obj)
428 {
429 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
430 kmem_cache_free(dev_priv->slab, obj);
431 }
432
433 static int
434 i915_gem_create(struct drm_file *file,
435 struct drm_device *dev,
436 uint64_t size,
437 uint32_t *handle_p)
438 {
439 struct drm_i915_gem_object *obj;
440 int ret;
441 u32 handle;
442
443 size = roundup(size, PAGE_SIZE);
444 if (size == 0)
445 return -EINVAL;
446
447 /* Allocate the new object */
448 obj = i915_gem_alloc_object(dev, size);
449 if (obj == NULL)
450 return -ENOMEM;
451
452 ret = drm_gem_handle_create(file, &obj->base, &handle);
453 /* drop reference from allocate - handle holds it now */
454 drm_gem_object_unreference_unlocked(&obj->base);
455 if (ret)
456 return ret;
457
458 *handle_p = handle;
459 return 0;
460 }
461
462 int
463 i915_gem_dumb_create(struct drm_file *file,
464 struct drm_device *dev,
465 struct drm_mode_create_dumb *args)
466 {
467 /* have to work out size/pitch and return them */
468 #ifdef __NetBSD__ /* ALIGN means something else. */
469 args->pitch = round_up(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
470 #else
471 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
472 #endif
473 args->size = args->pitch * args->height;
474 return i915_gem_create(file, dev,
475 args->size, &args->handle);
476 }
477
478 /**
479 * Creates a new mm object and returns a handle to it.
480 */
481 int
482 i915_gem_create_ioctl(struct drm_device *dev, void *data,
483 struct drm_file *file)
484 {
485 struct drm_i915_gem_create *args = data;
486
487 return i915_gem_create(file, dev,
488 args->size, &args->handle);
489 }
490
491 static inline int
492 __copy_to_user_swizzled(char __user *cpu_vaddr,
493 const char *gpu_vaddr, int gpu_offset,
494 int length)
495 {
496 int ret, cpu_offset = 0;
497
498 while (length > 0) {
499 #ifdef __NetBSD__ /* XXX ALIGN means something else. */
500 int cacheline_end = round_up(gpu_offset + 1, 64);
501 #else
502 int cacheline_end = ALIGN(gpu_offset + 1, 64);
503 #endif
504 int this_length = min(cacheline_end - gpu_offset, length);
505 int swizzled_gpu_offset = gpu_offset ^ 64;
506
507 ret = __copy_to_user(cpu_vaddr + cpu_offset,
508 gpu_vaddr + swizzled_gpu_offset,
509 this_length);
510 if (ret)
511 return ret + length;
512
513 cpu_offset += this_length;
514 gpu_offset += this_length;
515 length -= this_length;
516 }
517
518 return 0;
519 }
520
521 static inline int
522 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
523 const char __user *cpu_vaddr,
524 int length)
525 {
526 int ret, cpu_offset = 0;
527
528 while (length > 0) {
529 #ifdef __NetBSD__ /* XXX ALIGN means something else. */
530 int cacheline_end = round_up(gpu_offset + 1, 64);
531 #else
532 int cacheline_end = ALIGN(gpu_offset + 1, 64);
533 #endif
534 int this_length = min(cacheline_end - gpu_offset, length);
535 int swizzled_gpu_offset = gpu_offset ^ 64;
536
537 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
538 cpu_vaddr + cpu_offset,
539 this_length);
540 if (ret)
541 return ret + length;
542
543 cpu_offset += this_length;
544 gpu_offset += this_length;
545 length -= this_length;
546 }
547
548 return 0;
549 }
550
551 /*
552 * Pins the specified object's pages and synchronizes the object with
553 * GPU accesses. Sets needs_clflush to non-zero if the caller should
554 * flush the object from the CPU cache.
555 */
556 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
557 int *needs_clflush)
558 {
559 int ret;
560
561 *needs_clflush = 0;
562
563 #ifdef __NetBSD__
564 if (obj->base.gemo_shm_uao == NULL)
565 return -EINVAL;
566 #else
567 if (!obj->base.filp)
568 return -EINVAL;
569 #endif
570
571 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
572 /* If we're not in the cpu read domain, set ourself into the gtt
573 * read domain and manually flush cachelines (if required). This
574 * optimizes for the case when the gpu will dirty the data
575 * anyway again before the next pread happens. */
576 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
577 obj->cache_level);
578 ret = i915_gem_object_wait_rendering(obj, true);
579 if (ret)
580 return ret;
581 }
582
583 ret = i915_gem_object_get_pages(obj);
584 if (ret)
585 return ret;
586
587 i915_gem_object_pin_pages(obj);
588
589 return ret;
590 }
591
592 /* Per-page copy function for the shmem pread fastpath.
593 * Flushes invalid cachelines before reading the target if
594 * needs_clflush is set. */
595 static int
596 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
597 char __user *user_data,
598 bool page_do_bit17_swizzling, bool needs_clflush)
599 {
600 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
601 return -EFAULT;
602 #else
603 char *vaddr;
604 int ret;
605
606 if (unlikely(page_do_bit17_swizzling))
607 return -EINVAL;
608
609 vaddr = kmap_atomic(page);
610 if (needs_clflush)
611 drm_clflush_virt_range(vaddr + shmem_page_offset,
612 page_length);
613 ret = __copy_to_user_inatomic(user_data,
614 vaddr + shmem_page_offset,
615 page_length);
616 kunmap_atomic(vaddr);
617
618 return ret ? -EFAULT : 0;
619 #endif
620 }
621
622 static void
623 shmem_clflush_swizzled_range(char *addr, unsigned long length,
624 bool swizzled)
625 {
626 if (unlikely(swizzled)) {
627 unsigned long start = (unsigned long) addr;
628 unsigned long end = (unsigned long) addr + length;
629
630 /* For swizzling simply ensure that we always flush both
631 * channels. Lame, but simple and it works. Swizzled
632 * pwrite/pread is far from a hotpath - current userspace
633 * doesn't use it at all. */
634 start = round_down(start, 128);
635 end = round_up(end, 128);
636
637 drm_clflush_virt_range((void *)start, end - start);
638 } else {
639 drm_clflush_virt_range(addr, length);
640 }
641
642 }
643
644 /* Only difference to the fast-path function is that this can handle bit17
645 * and uses non-atomic copy and kmap functions. */
646 static int
647 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
648 char __user *user_data,
649 bool page_do_bit17_swizzling, bool needs_clflush)
650 {
651 char *vaddr;
652 int ret;
653
654 vaddr = kmap(page);
655 if (needs_clflush)
656 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
657 page_length,
658 page_do_bit17_swizzling);
659
660 if (page_do_bit17_swizzling)
661 ret = __copy_to_user_swizzled(user_data,
662 vaddr, shmem_page_offset,
663 page_length);
664 else
665 ret = __copy_to_user(user_data,
666 vaddr + shmem_page_offset,
667 page_length);
668 kunmap(page);
669
670 return ret ? - EFAULT : 0;
671 }
672
673 static int
674 i915_gem_shmem_pread(struct drm_device *dev,
675 struct drm_i915_gem_object *obj,
676 struct drm_i915_gem_pread *args,
677 struct drm_file *file)
678 {
679 char __user *user_data;
680 ssize_t remain;
681 loff_t offset;
682 int shmem_page_offset, page_length, ret = 0;
683 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
684 #ifndef __NetBSD__ /* XXX */
685 int prefaulted = 0;
686 #endif
687 int needs_clflush = 0;
688 #ifndef __NetBSD__
689 struct sg_page_iter sg_iter;
690 #endif
691
692 user_data = to_user_ptr(args->data_ptr);
693 remain = args->size;
694
695 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
696
697 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
698 if (ret)
699 return ret;
700
701 offset = args->offset;
702
703 #ifdef __NetBSD__
704 /*
705 * XXX This is a big #ifdef with a lot of duplicated code, but
706 * factoring out the loop head -- which is all that
707 * substantially differs -- is probably more trouble than it's
708 * worth at the moment.
709 */
710 while (0 < remain) {
711 /* Get the next page. */
712 shmem_page_offset = offset_in_page(offset);
713 KASSERT(shmem_page_offset < PAGE_SIZE);
714 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
715 struct page *const page = i915_gem_object_get_page(obj,
716 atop(offset));
717
718 /* Decide whether to swizzle bit 17. */
719 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
720 (page_to_phys(page) & (1 << 17)) != 0;
721
722 /* Try the fast path. */
723 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
724 user_data, page_do_bit17_swizzling, needs_clflush);
725 if (ret == 0)
726 goto next_page;
727
728 /* Fast path failed. Try the slow path. */
729 mutex_unlock(&dev->struct_mutex);
730 /* XXX prefault */
731 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
732 user_data, page_do_bit17_swizzling, needs_clflush);
733 mutex_lock(&dev->struct_mutex);
734 if (ret)
735 goto out;
736
737 next_page: KASSERT(page_length <= remain);
738 remain -= page_length;
739 user_data += page_length;
740 offset += page_length;
741 }
742 #else
743 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
744 offset >> PAGE_SHIFT) {
745 struct page *page = sg_page_iter_page(&sg_iter);
746
747 if (remain <= 0)
748 break;
749
750 /* Operation in this page
751 *
752 * shmem_page_offset = offset within page in shmem file
753 * page_length = bytes to copy for this page
754 */
755 shmem_page_offset = offset_in_page(offset);
756 page_length = remain;
757 if ((shmem_page_offset + page_length) > PAGE_SIZE)
758 page_length = PAGE_SIZE - shmem_page_offset;
759
760 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
761 (page_to_phys(page) & (1 << 17)) != 0;
762
763 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
764 user_data, page_do_bit17_swizzling,
765 needs_clflush);
766 if (ret == 0)
767 goto next_page;
768
769 mutex_unlock(&dev->struct_mutex);
770
771 if (likely(!i915.prefault_disable) && !prefaulted) {
772 ret = fault_in_multipages_writeable(user_data, remain);
773 /* Userspace is tricking us, but we've already clobbered
774 * its pages with the prefault and promised to write the
775 * data up to the first fault. Hence ignore any errors
776 * and just continue. */
777 (void)ret;
778 prefaulted = 1;
779 }
780
781 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
782 user_data, page_do_bit17_swizzling,
783 needs_clflush);
784
785 mutex_lock(&dev->struct_mutex);
786
787 if (ret)
788 goto out;
789
790 next_page:
791 remain -= page_length;
792 user_data += page_length;
793 offset += page_length;
794 }
795 #endif
796
797 out:
798 i915_gem_object_unpin_pages(obj);
799
800 return ret;
801 }
802
803 /**
804 * Reads data from the object referenced by handle.
805 *
806 * On error, the contents of *data are undefined.
807 */
808 int
809 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
810 struct drm_file *file)
811 {
812 struct drm_i915_gem_pread *args = data;
813 struct drm_gem_object *gobj;
814 struct drm_i915_gem_object *obj;
815 int ret = 0;
816
817 if (args->size == 0)
818 return 0;
819
820 if (!access_ok(VERIFY_WRITE,
821 to_user_ptr(args->data_ptr),
822 args->size))
823 return -EFAULT;
824
825 ret = i915_mutex_lock_interruptible(dev);
826 if (ret)
827 return ret;
828
829 gobj = drm_gem_object_lookup(dev, file, args->handle);
830 if (gobj == NULL) {
831 ret = -ENOENT;
832 goto unlock;
833 }
834 obj = to_intel_bo(gobj);
835
836 /* Bounds check source. */
837 if (args->offset > obj->base.size ||
838 args->size > obj->base.size - args->offset) {
839 ret = -EINVAL;
840 goto out;
841 }
842
843 /* prime objects have no backing filp to GEM pread/pwrite
844 * pages from.
845 */
846 #ifdef __NetBSD__
847 /* Also stolen objects. */
848 if (obj->base.gemo_shm_uao == NULL) {
849 ret = -EINVAL;
850 goto out;
851 }
852 #else
853 if (!obj->base.filp) {
854 ret = -EINVAL;
855 goto out;
856 }
857 #endif
858
859 trace_i915_gem_object_pread(obj, args->offset, args->size);
860
861 ret = i915_gem_shmem_pread(dev, obj, args, file);
862
863 out:
864 drm_gem_object_unreference(&obj->base);
865 unlock:
866 mutex_unlock(&dev->struct_mutex);
867 return ret;
868 }
869
870 /* This is the fast write path which cannot handle
871 * page faults in the source data
872 */
873
874 static inline int
875 fast_user_write(struct io_mapping *mapping,
876 loff_t page_base, int page_offset,
877 char __user *user_data,
878 int length)
879 {
880 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
881 return -EFAULT;
882 #else
883 void __iomem *vaddr_atomic;
884 void *vaddr;
885 unsigned long unwritten;
886
887 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
888 /* We can use the cpu mem copy function because this is X86. */
889 vaddr = (void __force*)vaddr_atomic + page_offset;
890 unwritten = __copy_from_user_inatomic_nocache(vaddr,
891 user_data, length);
892 io_mapping_unmap_atomic(vaddr_atomic);
893 return unwritten;
894 #endif
895 }
896
897 /**
898 * This is the fast pwrite path, where we copy the data directly from the
899 * user into the GTT, uncached.
900 */
901 static int
902 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
903 struct drm_i915_gem_object *obj,
904 struct drm_i915_gem_pwrite *args,
905 struct drm_file *file)
906 {
907 struct drm_i915_private *dev_priv = dev->dev_private;
908 ssize_t remain;
909 loff_t offset, page_base;
910 char __user *user_data;
911 int page_offset, page_length, ret;
912
913 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
914 if (ret)
915 goto out;
916
917 ret = i915_gem_object_set_to_gtt_domain(obj, true);
918 if (ret)
919 goto out_unpin;
920
921 ret = i915_gem_object_put_fence(obj);
922 if (ret)
923 goto out_unpin;
924
925 user_data = to_user_ptr(args->data_ptr);
926 remain = args->size;
927
928 offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
929
930 while (remain > 0) {
931 /* Operation in this page
932 *
933 * page_base = page offset within aperture
934 * page_offset = offset within page
935 * page_length = bytes to copy for this page
936 */
937 page_base = offset & PAGE_MASK;
938 page_offset = offset_in_page(offset);
939 page_length = remain;
940 if ((page_offset + remain) > PAGE_SIZE)
941 page_length = PAGE_SIZE - page_offset;
942
943 /* If we get a fault while copying data, then (presumably) our
944 * source page isn't available. Return the error and we'll
945 * retry in the slow path.
946 */
947 if (fast_user_write(dev_priv->gtt.mappable, page_base,
948 page_offset, user_data, page_length)) {
949 ret = -EFAULT;
950 goto out_unpin;
951 }
952
953 remain -= page_length;
954 user_data += page_length;
955 offset += page_length;
956 }
957
958 out_unpin:
959 i915_gem_object_ggtt_unpin(obj);
960 out:
961 return ret;
962 }
963
964 /* Per-page copy function for the shmem pwrite fastpath.
965 * Flushes invalid cachelines before writing to the target if
966 * needs_clflush_before is set and flushes out any written cachelines after
967 * writing if needs_clflush is set. */
968 static int
969 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
970 char __user *user_data,
971 bool page_do_bit17_swizzling,
972 bool needs_clflush_before,
973 bool needs_clflush_after)
974 {
975 #ifdef __NetBSD__
976 return -EFAULT;
977 #else
978 char *vaddr;
979 int ret;
980
981 if (unlikely(page_do_bit17_swizzling))
982 return -EINVAL;
983
984 vaddr = kmap_atomic(page);
985 if (needs_clflush_before)
986 drm_clflush_virt_range(vaddr + shmem_page_offset,
987 page_length);
988 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
989 user_data, page_length);
990 if (needs_clflush_after)
991 drm_clflush_virt_range(vaddr + shmem_page_offset,
992 page_length);
993 kunmap_atomic(vaddr);
994
995 return ret ? -EFAULT : 0;
996 #endif
997 }
998
999 /* Only difference to the fast-path function is that this can handle bit17
1000 * and uses non-atomic copy and kmap functions. */
1001 static int
1002 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1003 char __user *user_data,
1004 bool page_do_bit17_swizzling,
1005 bool needs_clflush_before,
1006 bool needs_clflush_after)
1007 {
1008 char *vaddr;
1009 int ret;
1010
1011 vaddr = kmap(page);
1012 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1013 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1014 page_length,
1015 page_do_bit17_swizzling);
1016 if (page_do_bit17_swizzling)
1017 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
1018 user_data,
1019 page_length);
1020 else
1021 ret = __copy_from_user(vaddr + shmem_page_offset,
1022 user_data,
1023 page_length);
1024 if (needs_clflush_after)
1025 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1026 page_length,
1027 page_do_bit17_swizzling);
1028 kunmap(page);
1029
1030 return ret ? -EFAULT : 0;
1031 }
1032
1033 static int
1034 i915_gem_shmem_pwrite(struct drm_device *dev,
1035 struct drm_i915_gem_object *obj,
1036 struct drm_i915_gem_pwrite *args,
1037 struct drm_file *file)
1038 {
1039 ssize_t remain;
1040 loff_t offset;
1041 char __user *user_data;
1042 int shmem_page_offset, page_length, ret = 0;
1043 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1044 int hit_slowpath = 0;
1045 int needs_clflush_after = 0;
1046 int needs_clflush_before = 0;
1047 #ifndef __NetBSD__
1048 struct sg_page_iter sg_iter;
1049 #endif
1050
1051 user_data = to_user_ptr(args->data_ptr);
1052 remain = args->size;
1053
1054 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1055
1056 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1057 /* If we're not in the cpu write domain, set ourself into the gtt
1058 * write domain and manually flush cachelines (if required). This
1059 * optimizes for the case when the gpu will use the data
1060 * right away and we therefore have to clflush anyway. */
1061 needs_clflush_after = cpu_write_needs_clflush(obj);
1062 ret = i915_gem_object_wait_rendering(obj, false);
1063 if (ret)
1064 return ret;
1065 }
1066 /* Same trick applies to invalidate partially written cachelines read
1067 * before writing. */
1068 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1069 needs_clflush_before =
1070 !cpu_cache_is_coherent(dev, obj->cache_level);
1071
1072 ret = i915_gem_object_get_pages(obj);
1073 if (ret)
1074 return ret;
1075
1076 i915_gem_object_pin_pages(obj);
1077
1078 offset = args->offset;
1079 obj->dirty = 1;
1080
1081 #ifdef __NetBSD__
1082 while (0 < remain) {
1083 /* Get the next page. */
1084 shmem_page_offset = offset_in_page(offset);
1085 KASSERT(shmem_page_offset < PAGE_SIZE);
1086 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
1087 struct page *const page = i915_gem_object_get_page(obj,
1088 atop(offset));
1089
1090 /* Decide whether to flush the cache or swizzle bit 17. */
1091 const bool partial_cacheline_write = needs_clflush_before &&
1092 ((shmem_page_offset | page_length)
1093 & (cpu_info_primary.ci_cflush_lsize - 1));
1094 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1095 (page_to_phys(page) & (1 << 17)) != 0;
1096
1097 /* Try the fast path. */
1098 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1099 user_data, page_do_bit17_swizzling,
1100 partial_cacheline_write, needs_clflush_after);
1101 if (ret == 0)
1102 goto next_page;
1103
1104 /* Fast path failed. Try the slow path. */
1105 hit_slowpath = 1;
1106 mutex_unlock(&dev->struct_mutex);
1107 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1108 user_data, page_do_bit17_swizzling,
1109 partial_cacheline_write, needs_clflush_after);
1110 mutex_lock(&dev->struct_mutex);
1111 if (ret)
1112 goto out;
1113
1114 next_page: KASSERT(page_length <= remain);
1115 remain -= page_length;
1116 user_data += page_length;
1117 offset += page_length;
1118 }
1119 #else
1120 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1121 offset >> PAGE_SHIFT) {
1122 struct page *page = sg_page_iter_page(&sg_iter);
1123 int partial_cacheline_write;
1124
1125 if (remain <= 0)
1126 break;
1127
1128 /* Operation in this page
1129 *
1130 * shmem_page_offset = offset within page in shmem file
1131 * page_length = bytes to copy for this page
1132 */
1133 shmem_page_offset = offset_in_page(offset);
1134
1135 page_length = remain;
1136 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1137 page_length = PAGE_SIZE - shmem_page_offset;
1138
1139 /* If we don't overwrite a cacheline completely we need to be
1140 * careful to have up-to-date data by first clflushing. Don't
1141 * overcomplicate things and flush the entire patch. */
1142 partial_cacheline_write = needs_clflush_before &&
1143 ((shmem_page_offset | page_length)
1144 & (boot_cpu_data.x86_clflush_size - 1));
1145
1146 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1147 (page_to_phys(page) & (1 << 17)) != 0;
1148
1149 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1150 user_data, page_do_bit17_swizzling,
1151 partial_cacheline_write,
1152 needs_clflush_after);
1153 if (ret == 0)
1154 goto next_page;
1155
1156 hit_slowpath = 1;
1157 mutex_unlock(&dev->struct_mutex);
1158 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1159 user_data, page_do_bit17_swizzling,
1160 partial_cacheline_write,
1161 needs_clflush_after);
1162
1163 mutex_lock(&dev->struct_mutex);
1164
1165 if (ret)
1166 goto out;
1167
1168 next_page:
1169 remain -= page_length;
1170 user_data += page_length;
1171 offset += page_length;
1172 }
1173 #endif
1174
1175 out:
1176 i915_gem_object_unpin_pages(obj);
1177
1178 if (hit_slowpath) {
1179 /*
1180 * Fixup: Flush cpu caches in case we didn't flush the dirty
1181 * cachelines in-line while writing and the object moved
1182 * out of the cpu write domain while we've dropped the lock.
1183 */
1184 if (!needs_clflush_after &&
1185 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1186 if (i915_gem_clflush_object(obj, obj->pin_display))
1187 i915_gem_chipset_flush(dev);
1188 }
1189 }
1190
1191 if (needs_clflush_after)
1192 i915_gem_chipset_flush(dev);
1193
1194 return ret;
1195 }
1196
1197 /**
1198 * Writes data to the object referenced by handle.
1199 *
1200 * On error, the contents of the buffer that were to be modified are undefined.
1201 */
1202 int
1203 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1204 struct drm_file *file)
1205 {
1206 struct drm_i915_gem_pwrite *args = data;
1207 struct drm_gem_object *gobj;
1208 struct drm_i915_gem_object *obj;
1209 int ret;
1210
1211 if (args->size == 0)
1212 return 0;
1213
1214 if (!access_ok(VERIFY_READ,
1215 to_user_ptr(args->data_ptr),
1216 args->size))
1217 return -EFAULT;
1218
1219 #ifndef __NetBSD__ /* XXX prefault */
1220 if (likely(!i915.prefault_disable)) {
1221 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1222 args->size);
1223 if (ret)
1224 return -EFAULT;
1225 }
1226 #endif
1227
1228 ret = i915_mutex_lock_interruptible(dev);
1229 if (ret)
1230 return ret;
1231
1232 gobj = drm_gem_object_lookup(dev, file, args->handle);
1233 if (gobj == NULL) {
1234 ret = -ENOENT;
1235 goto unlock;
1236 }
1237 obj = to_intel_bo(gobj);
1238
1239 /* Bounds check destination. */
1240 if (args->offset > obj->base.size ||
1241 args->size > obj->base.size - args->offset) {
1242 ret = -EINVAL;
1243 goto out;
1244 }
1245
1246 /* prime objects have no backing filp to GEM pread/pwrite
1247 * pages from.
1248 */
1249 #ifdef __NetBSD__
1250 /* Also stolen objects. */
1251 if (obj->base.gemo_shm_uao == NULL) {
1252 ret = -EINVAL;
1253 goto out;
1254 }
1255 #else
1256 if (!obj->base.filp) {
1257 ret = -EINVAL;
1258 goto out;
1259 }
1260 #endif
1261
1262 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1263
1264 ret = -EFAULT;
1265 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1266 * it would end up going through the fenced access, and we'll get
1267 * different detiling behavior between reading and writing.
1268 * pread/pwrite currently are reading and writing from the CPU
1269 * perspective, requiring manual detiling by the client.
1270 */
1271 if (obj->phys_handle) {
1272 ret = i915_gem_phys_pwrite(obj, args, file);
1273 goto out;
1274 }
1275
1276 if (obj->tiling_mode == I915_TILING_NONE &&
1277 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1278 cpu_write_needs_clflush(obj)) {
1279 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1280 /* Note that the gtt paths might fail with non-page-backed user
1281 * pointers (e.g. gtt mappings when moving data between
1282 * textures). Fallback to the shmem path in that case. */
1283 }
1284
1285 if (ret == -EFAULT || ret == -ENOSPC)
1286 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1287
1288 out:
1289 drm_gem_object_unreference(&obj->base);
1290 unlock:
1291 mutex_unlock(&dev->struct_mutex);
1292 return ret;
1293 }
1294
1295 int
1296 i915_gem_check_wedge(struct i915_gpu_error *error,
1297 bool interruptible)
1298 {
1299 if (i915_reset_in_progress(error)) {
1300 /* Non-interruptible callers can't handle -EAGAIN, hence return
1301 * -EIO unconditionally for these. */
1302 if (!interruptible)
1303 return -EIO;
1304
1305 /* Recovery complete, but the reset failed ... */
1306 if (i915_terminally_wedged(error))
1307 return -EIO;
1308
1309 return -EAGAIN;
1310 }
1311
1312 return 0;
1313 }
1314
1315 /*
1316 * Compare seqno against outstanding lazy request. Emit a request if they are
1317 * equal.
1318 */
1319 static int
1320 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1321 {
1322 int ret;
1323
1324 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1325
1326 ret = 0;
1327 if (seqno == ring->outstanding_lazy_seqno)
1328 ret = i915_add_request(ring, NULL);
1329
1330 return ret;
1331 }
1332
1333 #ifndef __NetBSD__
1334 static void fake_irq(unsigned long data)
1335 {
1336 wake_up_process((struct task_struct *)data);
1337 }
1338 #endif
1339
1340 static bool missed_irq(struct drm_i915_private *dev_priv,
1341 struct intel_ring_buffer *ring)
1342 {
1343 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1344 }
1345
1346 static bool can_wait_boost(struct drm_i915_file_private *file_priv)
1347 {
1348 if (file_priv == NULL)
1349 return true;
1350
1351 return !atomic_xchg(&file_priv->rps_wait_boost, true);
1352 }
1353
1354 /**
1355 * __wait_seqno - wait until execution of seqno has finished
1356 * @ring: the ring expected to report seqno
1357 * @seqno: duh!
1358 * @reset_counter: reset sequence associated with the given seqno
1359 * @interruptible: do an interruptible wait (normally yes)
1360 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1361 *
1362 * Note: It is of utmost importance that the passed in seqno and reset_counter
1363 * values have been read by the caller in an smp safe manner. Where read-side
1364 * locks are involved, it is sufficient to read the reset_counter before
1365 * unlocking the lock that protects the seqno. For lockless tricks, the
1366 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1367 * inserted.
1368 *
1369 * Returns 0 if the seqno was found within the alloted time. Else returns the
1370 * errno with remaining time filled in timeout argument.
1371 */
1372 #ifdef __NetBSD__
1373 static int
1374 __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, unsigned reset_counter,
1375 bool interruptible, struct timespec *timeout,
1376 struct drm_i915_file_private *file_priv)
1377 {
1378 struct drm_device *dev = ring->dev;
1379 struct drm_i915_private *dev_priv = dev->dev_private;
1380 bool irq_test_in_progress;
1381 struct timespec before, after;
1382 int ticks;
1383 bool wedged;
1384 int ret;
1385
1386 irq_test_in_progress = (dev_priv->gpu_error.test_irq_rings &
1387 intel_ring_flag(ring));
1388 __insn_barrier();
1389
1390 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1391 return 0;
1392
1393 if (timeout)
1394 ticks = mstohz(timespec_to_ns(timeout) / 1000000);
1395 else
1396 ticks = 1;
1397
1398 if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1399 gen6_rps_boost(dev_priv);
1400 if (file_priv)
1401 mod_delayed_work(dev_priv->wq,
1402 &file_priv->mm.idle_work,
1403 msecs_to_jiffies(100));
1404 }
1405
1406 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1407 return -ENODEV;
1408
1409 nanotime(&before);
1410 spin_lock(&dev_priv->irq_lock);
1411 #define EXIT_COND \
1412 (((reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) \
1413 ? wedged = true : false) || \
1414 i915_seqno_passed(ring->get_seqno(ring, false), \
1415 seqno))
1416
1417 if (timeout) {
1418 /*
1419 * XXX This missed_irq business smells like unlocked
1420 * Linux waitqueue nonsense.
1421 */
1422 if (missed_irq(dev_priv, ring))
1423 ticks = 1;
1424 if (interruptible)
1425 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &ring->irq_queue,
1426 &dev_priv->irq_lock, ticks, EXIT_COND);
1427 else
1428 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1429 &dev_priv->irq_lock, ticks, EXIT_COND);
1430 } else {
1431 if (interruptible)
1432 DRM_SPIN_WAIT_UNTIL(ret, &ring->irq_queue,
1433 &dev_priv->irq_lock, EXIT_COND);
1434 else
1435 DRM_SPIN_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1436 &dev_priv->irq_lock, EXIT_COND);
1437 }
1438 #undef EXIT_COND
1439 spin_unlock(&dev_priv->irq_lock);
1440 nanotime(&after);
1441
1442 if (!irq_test_in_progress)
1443 ring->irq_put(ring);
1444 if (timeout)
1445 timespecsub(&after, &before, timeout);
1446 return MAX(ret, 0); /* ignore remaining ticks */
1447 }
1448 #else
1449 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1450 unsigned reset_counter,
1451 bool interruptible,
1452 struct timespec *timeout,
1453 struct drm_i915_file_private *file_priv)
1454 {
1455 struct drm_device *dev = ring->dev;
1456 struct drm_i915_private *dev_priv = dev->dev_private;
1457 const bool irq_test_in_progress =
1458 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1459 struct timespec before, now;
1460 DEFINE_WAIT(wait);
1461 unsigned long timeout_expire;
1462 int ret;
1463
1464 WARN(dev_priv->pm.irqs_disabled, "IRQs disabled\n");
1465
1466 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1467 return 0;
1468
1469 timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
1470
1471 if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1472 gen6_rps_boost(dev_priv);
1473 if (file_priv)
1474 mod_delayed_work(dev_priv->wq,
1475 &file_priv->mm.idle_work,
1476 msecs_to_jiffies(100));
1477 }
1478
1479 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1480 return -ENODEV;
1481
1482 /* Record current time in case interrupted by signal, or wedged */
1483 trace_i915_gem_request_wait_begin(ring, seqno);
1484 getrawmonotonic(&before);
1485 for (;;) {
1486 struct timer_list timer;
1487
1488 prepare_to_wait(&ring->irq_queue, &wait,
1489 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
1490
1491 /* We need to check whether any gpu reset happened in between
1492 * the caller grabbing the seqno and now ... */
1493 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1494 /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1495 * is truely gone. */
1496 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1497 if (ret == 0)
1498 ret = -EAGAIN;
1499 break;
1500 }
1501
1502 if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
1503 ret = 0;
1504 break;
1505 }
1506
1507 if (interruptible && signal_pending(current)) {
1508 ret = -ERESTARTSYS;
1509 break;
1510 }
1511
1512 if (timeout && time_after_eq(jiffies, timeout_expire)) {
1513 ret = -ETIME;
1514 break;
1515 }
1516
1517 timer.function = NULL;
1518 if (timeout || missed_irq(dev_priv, ring)) {
1519 unsigned long expire;
1520
1521 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
1522 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1523 mod_timer(&timer, expire);
1524 }
1525
1526 io_schedule();
1527
1528 if (timer.function) {
1529 del_singleshot_timer_sync(&timer);
1530 destroy_timer_on_stack(&timer);
1531 }
1532 }
1533 getrawmonotonic(&now);
1534 trace_i915_gem_request_wait_end(ring, seqno);
1535
1536 if (!irq_test_in_progress)
1537 ring->irq_put(ring);
1538
1539 finish_wait(&ring->irq_queue, &wait);
1540
1541 if (timeout) {
1542 struct timespec sleep_time = timespec_sub(now, before);
1543 *timeout = timespec_sub(*timeout, sleep_time);
1544 if (!timespec_valid(timeout)) /* i.e. negative time remains */
1545 set_normalized_timespec(timeout, 0, 0);
1546 }
1547
1548 return ret;
1549 }
1550 #endif
1551
1552 /**
1553 * Waits for a sequence number to be signaled, and cleans up the
1554 * request and object lists appropriately for that event.
1555 */
1556 int
1557 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1558 {
1559 struct drm_device *dev = ring->dev;
1560 struct drm_i915_private *dev_priv = dev->dev_private;
1561 bool interruptible = dev_priv->mm.interruptible;
1562 int ret;
1563
1564 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1565 BUG_ON(seqno == 0);
1566
1567 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1568 if (ret)
1569 return ret;
1570
1571 ret = i915_gem_check_olr(ring, seqno);
1572 if (ret)
1573 return ret;
1574
1575 return __wait_seqno(ring, seqno,
1576 atomic_read(&dev_priv->gpu_error.reset_counter),
1577 interruptible, NULL, NULL);
1578 }
1579
1580 static int
1581 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
1582 struct intel_ring_buffer *ring)
1583 {
1584 i915_gem_retire_requests_ring(ring);
1585
1586 /* Manually manage the write flush as we may have not yet
1587 * retired the buffer.
1588 *
1589 * Note that the last_write_seqno is always the earlier of
1590 * the two (read/write) seqno, so if we haved successfully waited,
1591 * we know we have passed the last write.
1592 */
1593 obj->last_write_seqno = 0;
1594 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1595
1596 return 0;
1597 }
1598
1599 /**
1600 * Ensures that all rendering to the object has completed and the object is
1601 * safe to unbind from the GTT or access from the CPU.
1602 */
1603 static __must_check int
1604 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1605 bool readonly)
1606 {
1607 struct intel_ring_buffer *ring = obj->ring;
1608 u32 seqno;
1609 int ret;
1610
1611 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1612 if (seqno == 0)
1613 return 0;
1614
1615 ret = i915_wait_seqno(ring, seqno);
1616 if (ret)
1617 return ret;
1618
1619 return i915_gem_object_wait_rendering__tail(obj, ring);
1620 }
1621
1622 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1623 * as the object state may change during this call.
1624 */
1625 static __must_check int
1626 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1627 struct drm_i915_file_private *file_priv,
1628 bool readonly)
1629 {
1630 struct drm_device *dev = obj->base.dev;
1631 struct drm_i915_private *dev_priv = dev->dev_private;
1632 struct intel_ring_buffer *ring = obj->ring;
1633 unsigned reset_counter;
1634 u32 seqno;
1635 int ret;
1636
1637 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1638 BUG_ON(!dev_priv->mm.interruptible);
1639
1640 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1641 if (seqno == 0)
1642 return 0;
1643
1644 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1645 if (ret)
1646 return ret;
1647
1648 ret = i915_gem_check_olr(ring, seqno);
1649 if (ret)
1650 return ret;
1651
1652 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1653 mutex_unlock(&dev->struct_mutex);
1654 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
1655 mutex_lock(&dev->struct_mutex);
1656 if (ret)
1657 return ret;
1658
1659 return i915_gem_object_wait_rendering__tail(obj, ring);
1660 }
1661
1662 /**
1663 * Called when user space prepares to use an object with the CPU, either
1664 * through the mmap ioctl's mapping or a GTT mapping.
1665 */
1666 int
1667 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1668 struct drm_file *file)
1669 {
1670 struct drm_i915_gem_set_domain *args = data;
1671 struct drm_gem_object *gobj;
1672 struct drm_i915_gem_object *obj;
1673 uint32_t read_domains = args->read_domains;
1674 uint32_t write_domain = args->write_domain;
1675 int ret;
1676
1677 /* Only handle setting domains to types used by the CPU. */
1678 if (write_domain & I915_GEM_GPU_DOMAINS)
1679 return -EINVAL;
1680
1681 if (read_domains & I915_GEM_GPU_DOMAINS)
1682 return -EINVAL;
1683
1684 /* Having something in the write domain implies it's in the read
1685 * domain, and only that read domain. Enforce that in the request.
1686 */
1687 if (write_domain != 0 && read_domains != write_domain)
1688 return -EINVAL;
1689
1690 ret = i915_mutex_lock_interruptible(dev);
1691 if (ret)
1692 return ret;
1693
1694 gobj = drm_gem_object_lookup(dev, file, args->handle);
1695 if (gobj == NULL) {
1696 ret = -ENOENT;
1697 goto unlock;
1698 }
1699 obj = to_intel_bo(gobj);
1700
1701 /* Try to flush the object off the GPU without holding the lock.
1702 * We will repeat the flush holding the lock in the normal manner
1703 * to catch cases where we are gazumped.
1704 */
1705 ret = i915_gem_object_wait_rendering__nonblocking(obj,
1706 file->driver_priv,
1707 !write_domain);
1708 if (ret)
1709 goto unref;
1710
1711 if (read_domains & I915_GEM_DOMAIN_GTT) {
1712 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1713
1714 /* Silently promote "you're not bound, there was nothing to do"
1715 * to success, since the client was just asking us to
1716 * make sure everything was done.
1717 */
1718 if (ret == -EINVAL)
1719 ret = 0;
1720 } else {
1721 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1722 }
1723
1724 unref:
1725 drm_gem_object_unreference(&obj->base);
1726 unlock:
1727 mutex_unlock(&dev->struct_mutex);
1728 return ret;
1729 }
1730
1731 /**
1732 * Called when user space has done writes to this buffer
1733 */
1734 int
1735 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1736 struct drm_file *file)
1737 {
1738 struct drm_i915_gem_sw_finish *args = data;
1739 struct drm_gem_object *gobj;
1740 struct drm_i915_gem_object *obj;
1741 int ret = 0;
1742
1743 ret = i915_mutex_lock_interruptible(dev);
1744 if (ret)
1745 return ret;
1746
1747 gobj = drm_gem_object_lookup(dev, file, args->handle);
1748 if (gobj == NULL) {
1749 ret = -ENOENT;
1750 goto unlock;
1751 }
1752 obj = to_intel_bo(gobj);
1753
1754 /* Pinned buffers may be scanout, so flush the cache */
1755 if (obj->pin_display)
1756 i915_gem_object_flush_cpu_write_domain(obj, true);
1757
1758 drm_gem_object_unreference(&obj->base);
1759 unlock:
1760 mutex_unlock(&dev->struct_mutex);
1761 return ret;
1762 }
1763
1764 /**
1765 * Maps the contents of an object, returning the address it is mapped
1766 * into.
1767 *
1768 * While the mapping holds a reference on the contents of the object, it doesn't
1769 * imply a ref on the object itself.
1770 */
1771 int
1772 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1773 struct drm_file *file)
1774 {
1775 struct drm_i915_gem_mmap *args = data;
1776 struct drm_gem_object *obj;
1777 unsigned long addr;
1778 #ifdef __NetBSD__
1779 int ret;
1780 #endif
1781
1782 obj = drm_gem_object_lookup(dev, file, args->handle);
1783 if (obj == NULL)
1784 return -ENOENT;
1785
1786 /* prime objects have no backing filp to GEM mmap
1787 * pages from.
1788 */
1789 #ifdef __NetBSD__
1790 /* Also stolen objects (XXX can we get them here?) */
1791 if (obj->gemo_shm_uao == NULL) {
1792 drm_gem_object_unreference_unlocked(obj);
1793 return -EINVAL;
1794 }
1795 #else
1796 if (!obj->filp) {
1797 drm_gem_object_unreference_unlocked(obj);
1798 return -EINVAL;
1799 }
1800 #endif
1801
1802 #ifdef __NetBSD__
1803 addr = (*curproc->p_emul->e_vm_default_addr)(curproc,
1804 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size);
1805 /* XXX errno NetBSD->Linux */
1806 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size,
1807 obj->gemo_shm_uao, args->offset, 0,
1808 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE),
1809 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL,
1810 0));
1811 if (ret) {
1812 drm_gem_object_unreference_unlocked(obj);
1813 return ret;
1814 }
1815 uao_reference(obj->gemo_shm_uao);
1816 drm_gem_object_unreference_unlocked(obj);
1817 #else
1818 addr = vm_mmap(obj->filp, 0, args->size,
1819 PROT_READ | PROT_WRITE, MAP_SHARED,
1820 args->offset);
1821 drm_gem_object_unreference_unlocked(obj);
1822 if (IS_ERR((void *)addr))
1823 return addr;
1824 #endif
1825
1826 args->addr_ptr = (uint64_t) addr;
1827
1828 return 0;
1829 }
1830
1831 #ifdef __NetBSD__ /* XXX gem gtt fault */
1832 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t,
1833 struct vm_page **, int, int, vm_prot_t, int, paddr_t);
1834
1835 int
1836 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1837 int npages, int centeridx, vm_prot_t access_type, int flags)
1838 {
1839 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1840 struct drm_gem_object *gem_obj =
1841 container_of(uobj, struct drm_gem_object, gemo_uvmobj);
1842 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
1843 struct drm_device *dev = obj->base.dev;
1844 struct drm_i915_private *dev_priv = dev->dev_private;
1845 voff_t byte_offset;
1846 pgoff_t page_offset;
1847 int ret = 0;
1848 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0;
1849
1850 byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start));
1851 KASSERT(byte_offset <= obj->base.size);
1852 page_offset = (byte_offset >> PAGE_SHIFT);
1853
1854 intel_runtime_pm_get(dev_priv);
1855
1856 /* Thanks, uvm, but we don't need this lock. */
1857 mutex_exit(uobj->vmobjlock);
1858
1859 ret = i915_mutex_lock_interruptible(dev);
1860 if (ret)
1861 goto out;
1862
1863 trace_i915_gem_object_fault(obj, page_offset, true, write);
1864
1865 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1866 if (ret)
1867 goto unlock;
1868
1869 if ((obj->cache_level != I915_CACHE_NONE) && !HAS_LLC(dev)) {
1870 ret = -EINVAL;
1871 goto unlock;
1872 }
1873
1874 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
1875 if (ret)
1876 goto unlock;
1877
1878 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1879 if (ret)
1880 goto unpin;
1881
1882 ret = i915_gem_object_get_fence(obj);
1883 if (ret)
1884 goto unpin;
1885
1886 obj->fault_mappable = true;
1887
1888 /* XXX errno NetBSD->Linux */
1889 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type,
1890 flags,
1891 (dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj)));
1892 unpin:
1893 i915_gem_object_ggtt_unpin(obj);
1894 unlock:
1895 mutex_unlock(&dev->struct_mutex);
1896 out:
1897 mutex_enter(uobj->vmobjlock);
1898 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
1899 if (ret == -ERESTART)
1900 uvm_wait("i915flt");
1901 /* XXX Deal with GPU hangs here... */
1902 intel_runtime_pm_put(dev_priv);
1903 /* XXX errno Linux->NetBSD */
1904 return -ret;
1905 }
1906
1907 /*
1908 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c.
1909 *
1910 * XXX pmap_enter_default instead of pmap_enter because of a problem
1911 * with using weak aliases in kernel modules or something.
1912 */
1913 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned);
1914
1915 static int
1916 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1917 int npages, int centeridx, vm_prot_t access_type, int flags,
1918 paddr_t gtt_paddr)
1919 {
1920 struct vm_map_entry *entry = ufi->entry;
1921 vaddr_t curr_va;
1922 off_t curr_offset;
1923 paddr_t paddr;
1924 u_int mmapflags;
1925 int lcv, retval;
1926 vm_prot_t mapprot;
1927 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist);
1928 UVMHIST_LOG(maphist," flags=%d", flags,0,0,0);
1929
1930 /*
1931 * we do not allow device mappings to be mapped copy-on-write
1932 * so we kill any attempt to do so here.
1933 */
1934
1935 if (UVM_ET_ISCOPYONWRITE(entry)) {
1936 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)",
1937 entry->etype, 0,0,0);
1938 return(EIO);
1939 }
1940
1941 /*
1942 * now we must determine the offset in udv to use and the VA to
1943 * use for pmap_enter. note that we always use orig_map's pmap
1944 * for pmap_enter (even if we have a submap). since virtual
1945 * addresses in a submap must match the main map, this is ok.
1946 */
1947
1948 /* udv offset = (offset from start of entry) + entry's offset */
1949 curr_offset = entry->offset + (vaddr - entry->start);
1950 /* pmap va = vaddr (virtual address of pps[0]) */
1951 curr_va = vaddr;
1952
1953 /*
1954 * loop over the page range entering in as needed
1955 */
1956
1957 retval = 0;
1958 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
1959 curr_va += PAGE_SIZE) {
1960 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
1961 continue;
1962
1963 if (pps[lcv] == PGO_DONTCARE)
1964 continue;
1965
1966 paddr = (gtt_paddr + curr_offset);
1967 mmapflags = 0;
1968 mapprot = ufi->entry->protection;
1969 UVMHIST_LOG(maphist,
1970 " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d",
1971 ufi->orig_map->pmap, curr_va, paddr, mapprot);
1972 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot,
1973 PMAP_CANFAIL | mapprot | mmapflags) != 0) {
1974 /*
1975 * pmap_enter() didn't have the resource to
1976 * enter this mapping. Unlock everything,
1977 * wait for the pagedaemon to free up some
1978 * pages, and then tell uvm_fault() to start
1979 * the fault again.
1980 *
1981 * XXX Needs some rethinking for the PGO_ALLPAGES
1982 * XXX case.
1983 */
1984 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */
1985 return (ERESTART);
1986 }
1987 }
1988
1989 pmap_update(ufi->orig_map->pmap);
1990 return (retval);
1991 }
1992 #else
1993 /**
1994 * i915_gem_fault - fault a page into the GTT
1995 * vma: VMA in question
1996 * vmf: fault info
1997 *
1998 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1999 * from userspace. The fault handler takes care of binding the object to
2000 * the GTT (if needed), allocating and programming a fence register (again,
2001 * only if needed based on whether the old reg is still valid or the object
2002 * is tiled) and inserting a new PTE into the faulting process.
2003 *
2004 * Note that the faulting process may involve evicting existing objects
2005 * from the GTT and/or fence registers to make room. So performance may
2006 * suffer if the GTT working set is large or there are few fence registers
2007 * left.
2008 */
2009 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2010 {
2011 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
2012 struct drm_device *dev = obj->base.dev;
2013 struct drm_i915_private *dev_priv = dev->dev_private;
2014 pgoff_t page_offset;
2015 unsigned long pfn;
2016 int ret = 0;
2017 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
2018
2019 intel_runtime_pm_get(dev_priv);
2020
2021 /* We don't use vmf->pgoff since that has the fake offset */
2022 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
2023 PAGE_SHIFT;
2024
2025 ret = i915_mutex_lock_interruptible(dev);
2026 if (ret)
2027 goto out;
2028
2029 trace_i915_gem_object_fault(obj, page_offset, true, write);
2030
2031 /* Try to flush the object off the GPU first without holding the lock.
2032 * Upon reacquiring the lock, we will perform our sanity checks and then
2033 * repeat the flush holding the lock in the normal manner to catch cases
2034 * where we are gazumped.
2035 */
2036 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
2037 if (ret)
2038 goto unlock;
2039
2040 /* Access to snoopable pages through the GTT is incoherent. */
2041 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
2042 ret = -EINVAL;
2043 goto unlock;
2044 }
2045
2046 /* Now bind it into the GTT if needed */
2047 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
2048 if (ret)
2049 goto unlock;
2050
2051 ret = i915_gem_object_set_to_gtt_domain(obj, write);
2052 if (ret)
2053 goto unpin;
2054
2055 ret = i915_gem_object_get_fence(obj);
2056 if (ret)
2057 goto unpin;
2058
2059 obj->fault_mappable = true;
2060
2061 pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
2062 pfn >>= PAGE_SHIFT;
2063 pfn += page_offset;
2064
2065 /* Finally, remap it using the new GTT offset */
2066 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
2067 unpin:
2068 i915_gem_object_ggtt_unpin(obj);
2069 unlock:
2070 mutex_unlock(&dev->struct_mutex);
2071 out:
2072 switch (ret) {
2073 case -EIO:
2074 /* If this -EIO is due to a gpu hang, give the reset code a
2075 * chance to clean up the mess. Otherwise return the proper
2076 * SIGBUS. */
2077 if (i915_terminally_wedged(&dev_priv->gpu_error)) {
2078 ret = VM_FAULT_SIGBUS;
2079 break;
2080 }
2081 case -EAGAIN:
2082 /*
2083 * EAGAIN means the gpu is hung and we'll wait for the error
2084 * handler to reset everything when re-faulting in
2085 * i915_mutex_lock_interruptible.
2086 */
2087 case 0:
2088 case -ERESTARTSYS:
2089 case -EINTR:
2090 case -EBUSY:
2091 /*
2092 * EBUSY is ok: this just means that another thread
2093 * already did the job.
2094 */
2095 ret = VM_FAULT_NOPAGE;
2096 break;
2097 case -ENOMEM:
2098 ret = VM_FAULT_OOM;
2099 break;
2100 case -ENOSPC:
2101 case -EFAULT:
2102 ret = VM_FAULT_SIGBUS;
2103 break;
2104 default:
2105 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2106 ret = VM_FAULT_SIGBUS;
2107 break;
2108 }
2109
2110 intel_runtime_pm_put(dev_priv);
2111 return ret;
2112 }
2113
2114 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2115 {
2116 struct i915_vma *vma;
2117
2118 /*
2119 * Only the global gtt is relevant for gtt memory mappings, so restrict
2120 * list traversal to objects bound into the global address space. Note
2121 * that the active list should be empty, but better safe than sorry.
2122 */
2123 WARN_ON(!list_empty(&dev_priv->gtt.base.active_list));
2124 list_for_each_entry(vma, &dev_priv->gtt.base.active_list, mm_list)
2125 i915_gem_release_mmap(vma->obj);
2126 list_for_each_entry(vma, &dev_priv->gtt.base.inactive_list, mm_list)
2127 i915_gem_release_mmap(vma->obj);
2128 }
2129 #endif
2130
2131 /**
2132 * i915_gem_release_mmap - remove physical page mappings
2133 * @obj: obj in question
2134 *
2135 * Preserve the reservation of the mmapping with the DRM core code, but
2136 * relinquish ownership of the pages back to the system.
2137 *
2138 * It is vital that we remove the page mapping if we have mapped a tiled
2139 * object through the GTT and then lose the fence register due to
2140 * resource pressure. Similarly if the object has been moved out of the
2141 * aperture, than pages mapped into userspace must be revoked. Removing the
2142 * mapping will then trigger a page fault on the next user access, allowing
2143 * fixup by i915_gem_fault().
2144 */
2145 void
2146 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2147 {
2148 if (!obj->fault_mappable)
2149 return;
2150
2151 #ifdef __NetBSD__ /* XXX gem gtt fault */
2152 {
2153 struct vm_page *page;
2154
2155 mutex_enter(obj->base.gemo_shm_uao->vmobjlock);
2156 KASSERT(obj->pages != NULL);
2157 /* Force a fresh fault for each page. */
2158 /*
2159 * XXX OOPS! This doesn't actually do what we want.
2160 * This causes a fresh fault for access to the backing
2161 * pages -- but nothing accesses the backing pages
2162 * directly! What is actually entered into CPU page
2163 * table entries is aperture addresses which have been
2164 * programmed by the GTT to refer to those backing
2165 * pages.
2166 *
2167 * We need to clear those page table entries, but
2168 * there's no good way to do that at the moment: nobody
2169 * records for us a map from either uvm objects or
2170 * physical device addresses to a list of all virtual
2171 * pages where they have been mapped. pmap(9) records
2172 * a map only from physical RAM addresses to virtual
2173 * pages; it does nothing for physical device
2174 * addresses.
2175 */
2176 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue)
2177 pmap_page_protect(page, VM_PROT_NONE);
2178 mutex_exit(obj->base.gemo_shm_uao->vmobjlock);
2179 }
2180 #else
2181 drm_vma_node_unmap(&obj->base.vma_node,
2182 obj->base.dev->anon_inode->i_mapping);
2183 #endif
2184 obj->fault_mappable = false;
2185 }
2186
2187 uint32_t
2188 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2189 {
2190 uint32_t gtt_size;
2191
2192 if (INTEL_INFO(dev)->gen >= 4 ||
2193 tiling_mode == I915_TILING_NONE)
2194 return size;
2195
2196 /* Previous chips need a power-of-two fence region when tiling */
2197 if (INTEL_INFO(dev)->gen == 3)
2198 gtt_size = 1024*1024;
2199 else
2200 gtt_size = 512*1024;
2201
2202 while (gtt_size < size)
2203 gtt_size <<= 1;
2204
2205 return gtt_size;
2206 }
2207
2208 /**
2209 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2210 * @obj: object to check
2211 *
2212 * Return the required GTT alignment for an object, taking into account
2213 * potential fence register mapping.
2214 */
2215 uint32_t
2216 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2217 int tiling_mode, bool fenced)
2218 {
2219 /*
2220 * Minimum alignment is 4k (GTT page size), but might be greater
2221 * if a fence register is needed for the object.
2222 */
2223 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2224 tiling_mode == I915_TILING_NONE)
2225 return 4096;
2226
2227 /*
2228 * Previous chips need to be aligned to the size of the smallest
2229 * fence register that can contain the object.
2230 */
2231 return i915_gem_get_gtt_size(dev, size, tiling_mode);
2232 }
2233
2234 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2235 {
2236 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2237 int ret;
2238
2239 if (drm_vma_node_has_offset(&obj->base.vma_node))
2240 return 0;
2241
2242 dev_priv->mm.shrinker_no_lock_stealing = true;
2243
2244 ret = drm_gem_create_mmap_offset(&obj->base);
2245 if (ret != -ENOSPC)
2246 goto out;
2247
2248 /* Badly fragmented mmap space? The only way we can recover
2249 * space is by destroying unwanted objects. We can't randomly release
2250 * mmap_offsets as userspace expects them to be persistent for the
2251 * lifetime of the objects. The closest we can is to release the
2252 * offsets on purgeable objects by truncating it and marking it purged,
2253 * which prevents userspace from ever using that object again.
2254 */
2255 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
2256 ret = drm_gem_create_mmap_offset(&obj->base);
2257 if (ret != -ENOSPC)
2258 goto out;
2259
2260 i915_gem_shrink_all(dev_priv);
2261 ret = drm_gem_create_mmap_offset(&obj->base);
2262 out:
2263 dev_priv->mm.shrinker_no_lock_stealing = false;
2264
2265 return ret;
2266 }
2267
2268 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2269 {
2270 drm_gem_free_mmap_offset(&obj->base);
2271 }
2272
2273 int
2274 i915_gem_mmap_gtt(struct drm_file *file,
2275 struct drm_device *dev,
2276 uint32_t handle,
2277 uint64_t *offset)
2278 {
2279 struct drm_i915_private *dev_priv = dev->dev_private;
2280 struct drm_gem_object *gobj;
2281 struct drm_i915_gem_object *obj;
2282 int ret;
2283
2284 ret = i915_mutex_lock_interruptible(dev);
2285 if (ret)
2286 return ret;
2287
2288 gobj = drm_gem_object_lookup(dev, file, handle);
2289 if (gobj == NULL) {
2290 ret = -ENOENT;
2291 goto unlock;
2292 }
2293 obj = to_intel_bo(gobj);
2294
2295 if (obj->base.size > dev_priv->gtt.mappable_end) {
2296 ret = -E2BIG;
2297 goto out;
2298 }
2299
2300 if (obj->madv != I915_MADV_WILLNEED) {
2301 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2302 ret = -EFAULT;
2303 goto out;
2304 }
2305
2306 ret = i915_gem_object_create_mmap_offset(obj);
2307 if (ret)
2308 goto out;
2309
2310 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2311
2312 out:
2313 drm_gem_object_unreference(&obj->base);
2314 unlock:
2315 mutex_unlock(&dev->struct_mutex);
2316 return ret;
2317 }
2318
2319 /**
2320 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2321 * @dev: DRM device
2322 * @data: GTT mapping ioctl data
2323 * @file: GEM object info
2324 *
2325 * Simply returns the fake offset to userspace so it can mmap it.
2326 * The mmap call will end up in drm_gem_mmap(), which will set things
2327 * up so we can get faults in the handler above.
2328 *
2329 * The fault handler will take care of binding the object into the GTT
2330 * (since it may have been evicted to make room for something), allocating
2331 * a fence register, and mapping the appropriate aperture address into
2332 * userspace.
2333 */
2334 int
2335 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2336 struct drm_file *file)
2337 {
2338 struct drm_i915_gem_mmap_gtt *args = data;
2339
2340 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2341 }
2342
2343 /* Immediately discard the backing storage */
2344 static void
2345 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2346 {
2347 #ifndef __NetBSD__
2348 struct inode *inode;
2349 #endif
2350
2351 i915_gem_object_free_mmap_offset(obj);
2352
2353 #ifdef __NetBSD__
2354 if (obj->base.gemo_shm_uao == NULL)
2355 return;
2356
2357 {
2358 struct uvm_object *const uobj = obj->base.gemo_shm_uao;
2359
2360 if (uobj != NULL) {
2361 /* XXX Calling pgo_put like this is bogus. */
2362 mutex_enter(uobj->vmobjlock);
2363 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size,
2364 (PGO_ALLPAGES | PGO_FREE));
2365 }
2366 }
2367 #else
2368 if (obj->base.filp == NULL)
2369 return;
2370
2371 /* Our goal here is to return as much of the memory as
2372 * is possible back to the system as we are called from OOM.
2373 * To do this we must instruct the shmfs to drop all of its
2374 * backing pages, *now*.
2375 */
2376 inode = file_inode(obj->base.filp);
2377 shmem_truncate_range(inode, 0, (loff_t)-1);
2378 #endif
2379
2380 obj->madv = __I915_MADV_PURGED;
2381 }
2382
2383 static inline int
2384 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
2385 {
2386 return obj->madv == I915_MADV_DONTNEED;
2387 }
2388
2389 #ifdef __NetBSD__
2390 static void
2391 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2392 {
2393 struct drm_device *const dev = obj->base.dev;
2394 int ret;
2395
2396 /* XXX Cargo-culted from the Linux code. */
2397 BUG_ON(obj->madv == __I915_MADV_PURGED);
2398
2399 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2400 if (ret) {
2401 WARN_ON(ret != -EIO);
2402 i915_gem_clflush_object(obj, true);
2403 obj->base.read_domains = obj->base.write_domain =
2404 I915_GEM_DOMAIN_CPU;
2405 }
2406
2407 if (i915_gem_object_needs_bit17_swizzle(obj))
2408 i915_gem_object_save_bit_17_swizzle(obj);
2409
2410 /* XXX Maintain dirty flag? */
2411
2412 bus_dmamap_destroy(dev->dmat, obj->igo_dmamap);
2413 bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2414 obj->base.size, obj->pages, obj->igo_nsegs);
2415
2416 kfree(obj->pages);
2417 }
2418 #else
2419 static void
2420 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2421 {
2422 struct sg_page_iter sg_iter;
2423 int ret;
2424
2425 BUG_ON(obj->madv == __I915_MADV_PURGED);
2426
2427 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2428 if (ret) {
2429 /* In the event of a disaster, abandon all caches and
2430 * hope for the best.
2431 */
2432 WARN_ON(ret != -EIO);
2433 i915_gem_clflush_object(obj, true);
2434 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2435 }
2436
2437 if (i915_gem_object_needs_bit17_swizzle(obj))
2438 i915_gem_object_save_bit_17_swizzle(obj);
2439
2440 if (obj->madv == I915_MADV_DONTNEED)
2441 obj->dirty = 0;
2442
2443 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2444 struct page *page = sg_page_iter_page(&sg_iter);
2445
2446 if (obj->dirty)
2447 set_page_dirty(page);
2448
2449 if (obj->madv == I915_MADV_WILLNEED)
2450 mark_page_accessed(page);
2451
2452 page_cache_release(page);
2453 }
2454 obj->dirty = 0;
2455
2456 sg_free_table(obj->pages);
2457 kfree(obj->pages);
2458 }
2459 #endif
2460
2461 int
2462 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2463 {
2464 const struct drm_i915_gem_object_ops *ops = obj->ops;
2465
2466 if (obj->pages == NULL)
2467 return 0;
2468
2469 if (obj->pages_pin_count)
2470 return -EBUSY;
2471
2472 BUG_ON(i915_gem_obj_bound_any(obj));
2473
2474 /* ->put_pages might need to allocate memory for the bit17 swizzle
2475 * array, hence protect them from being reaped by removing them from gtt
2476 * lists early. */
2477 list_del(&obj->global_list);
2478
2479 ops->put_pages(obj);
2480 obj->pages = NULL;
2481
2482 if (i915_gem_object_is_purgeable(obj))
2483 i915_gem_object_truncate(obj);
2484
2485 return 0;
2486 }
2487
2488 static unsigned long
2489 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
2490 bool purgeable_only)
2491 {
2492 struct list_head still_bound_list;
2493 struct drm_i915_gem_object *obj, *next;
2494 unsigned long count = 0;
2495
2496 list_for_each_entry_safe(obj, next,
2497 &dev_priv->mm.unbound_list,
2498 global_list) {
2499 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2500 i915_gem_object_put_pages(obj) == 0) {
2501 count += obj->base.size >> PAGE_SHIFT;
2502 if (count >= target)
2503 return count;
2504 }
2505 }
2506
2507 /*
2508 * As we may completely rewrite the bound list whilst unbinding
2509 * (due to retiring requests) we have to strictly process only
2510 * one element of the list at the time, and recheck the list
2511 * on every iteration.
2512 */
2513 INIT_LIST_HEAD(&still_bound_list);
2514 while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
2515 struct i915_vma *vma, *v;
2516
2517 obj = list_first_entry(&dev_priv->mm.bound_list,
2518 typeof(*obj), global_list);
2519 list_move_tail(&obj->global_list, &still_bound_list);
2520
2521 if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
2522 continue;
2523
2524 /*
2525 * Hold a reference whilst we unbind this object, as we may
2526 * end up waiting for and retiring requests. This might
2527 * release the final reference (held by the active list)
2528 * and result in the object being freed from under us.
2529 * in this object being freed.
2530 *
2531 * Note 1: Shrinking the bound list is special since only active
2532 * (and hence bound objects) can contain such limbo objects, so
2533 * we don't need special tricks for shrinking the unbound list.
2534 * The only other place where we have to be careful with active
2535 * objects suddenly disappearing due to retiring requests is the
2536 * eviction code.
2537 *
2538 * Note 2: Even though the bound list doesn't hold a reference
2539 * to the object we can safely grab one here: The final object
2540 * unreferencing and the bound_list are both protected by the
2541 * dev->struct_mutex and so we won't ever be able to observe an
2542 * object on the bound_list with a reference count equals 0.
2543 */
2544 drm_gem_object_reference(&obj->base);
2545
2546 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
2547 if (i915_vma_unbind(vma))
2548 break;
2549
2550 if (i915_gem_object_put_pages(obj) == 0)
2551 count += obj->base.size >> PAGE_SHIFT;
2552
2553 drm_gem_object_unreference(&obj->base);
2554 }
2555 list_splice(&still_bound_list, &dev_priv->mm.bound_list);
2556
2557 return count;
2558 }
2559
2560 static unsigned long
2561 i915_gem_purge(struct drm_i915_private *dev_priv, long target)
2562 {
2563 return __i915_gem_shrink(dev_priv, target, true);
2564 }
2565
2566 static unsigned long
2567 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
2568 {
2569 struct drm_i915_gem_object *obj, *next;
2570 long freed = 0;
2571
2572 i915_gem_evict_everything(dev_priv->dev);
2573
2574 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
2575 global_list) {
2576 if (i915_gem_object_put_pages(obj) == 0)
2577 freed += obj->base.size >> PAGE_SHIFT;
2578 }
2579 return freed;
2580 }
2581
2582 #ifdef __NetBSD__
2583 static int
2584 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2585 {
2586 struct drm_device *const dev = obj->base.dev;
2587 struct vm_page *page;
2588 int error;
2589
2590 /* XXX Cargo-culted from the Linux code. */
2591 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2592 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2593
2594 KASSERT(obj->pages == NULL);
2595 TAILQ_INIT(&obj->igo_pageq);
2596 obj->pages = kcalloc((obj->base.size / PAGE_SIZE),
2597 sizeof(obj->pages[0]), GFP_KERNEL);
2598 if (obj->pages == NULL) {
2599 error = -ENOMEM;
2600 goto fail0;
2601 }
2602
2603 /* XXX errno NetBSD->Linux */
2604 error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao,
2605 0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages,
2606 (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT);
2607 if (error)
2608 /* XXX Try i915_gem_purge, i915_gem_shrink_all. */
2609 goto fail1;
2610 KASSERT(0 < obj->igo_nsegs);
2611 KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE));
2612
2613 /*
2614 * Check that the paddrs will fit in 40 bits, or 32 bits on i965.
2615 *
2616 * XXX This is wrong; we ought to pass this constraint to
2617 * bus_dmamem_wire_uvm_object instead.
2618 */
2619 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) {
2620 const uint64_t mask =
2621 (IS_BROADWATER(dev) || IS_CRESTLINE(dev)?
2622 0xffffffffULL : 0xffffffffffULL);
2623 if (VM_PAGE_TO_PHYS(page) & ~mask) {
2624 DRM_ERROR("GEM physical address exceeds %u bits"
2625 ": %"PRIxMAX"\n",
2626 popcount64(mask),
2627 (uintmax_t)VM_PAGE_TO_PHYS(page));
2628 error = -EIO;
2629 goto fail2;
2630 }
2631 }
2632
2633 /* XXX Should create the DMA map when creating the object. */
2634
2635 /* XXX errno NetBSD->Linux */
2636 error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs,
2637 PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap);
2638 if (error)
2639 goto fail2;
2640
2641 /* XXX Cargo-culted from the Linux code. */
2642 if (i915_gem_object_needs_bit17_swizzle(obj))
2643 i915_gem_object_do_bit_17_swizzle(obj);
2644
2645 /* Success! */
2646 return 0;
2647
2648 fail2: bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2649 obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE));
2650 fail1: kfree(obj->pages);
2651 obj->pages = NULL;
2652 fail0: KASSERT(error);
2653 return error;
2654 }
2655 #else
2656 static int
2657 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2658 {
2659 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2660 int page_count, i;
2661 struct address_space *mapping;
2662 struct sg_table *st;
2663 struct scatterlist *sg;
2664 struct sg_page_iter sg_iter;
2665 struct page *page;
2666 unsigned long last_pfn = 0; /* suppress gcc warning */
2667 gfp_t gfp;
2668
2669 /* Assert that the object is not currently in any GPU domain. As it
2670 * wasn't in the GTT, there shouldn't be any way it could have been in
2671 * a GPU cache
2672 */
2673 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2674 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2675
2676 st = kmalloc(sizeof(*st), GFP_KERNEL);
2677 if (st == NULL)
2678 return -ENOMEM;
2679
2680 page_count = obj->base.size / PAGE_SIZE;
2681 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2682 kfree(st);
2683 return -ENOMEM;
2684 }
2685
2686 /* Get the list of pages out of our struct file. They'll be pinned
2687 * at this point until we release them.
2688 *
2689 * Fail silently without starting the shrinker
2690 */
2691 mapping = file_inode(obj->base.filp)->i_mapping;
2692 gfp = mapping_gfp_mask(mapping);
2693 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2694 gfp &= ~(__GFP_IO | __GFP_WAIT);
2695 sg = st->sgl;
2696 st->nents = 0;
2697 for (i = 0; i < page_count; i++) {
2698 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2699 if (IS_ERR(page)) {
2700 i915_gem_purge(dev_priv, page_count);
2701 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2702 }
2703 if (IS_ERR(page)) {
2704 /* We've tried hard to allocate the memory by reaping
2705 * our own buffer, now let the real VM do its job and
2706 * go down in flames if truly OOM.
2707 */
2708 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
2709 gfp |= __GFP_IO | __GFP_WAIT;
2710
2711 i915_gem_shrink_all(dev_priv);
2712 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2713 if (IS_ERR(page))
2714 goto err_pages;
2715
2716 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2717 gfp &= ~(__GFP_IO | __GFP_WAIT);
2718 }
2719 #ifdef CONFIG_SWIOTLB
2720 if (swiotlb_nr_tbl()) {
2721 st->nents++;
2722 sg_set_page(sg, page, PAGE_SIZE, 0);
2723 sg = sg_next(sg);
2724 continue;
2725 }
2726 #endif
2727 if (!i || page_to_pfn(page) != last_pfn + 1) {
2728 if (i)
2729 sg = sg_next(sg);
2730 st->nents++;
2731 sg_set_page(sg, page, PAGE_SIZE, 0);
2732 } else {
2733 sg->length += PAGE_SIZE;
2734 }
2735 last_pfn = page_to_pfn(page);
2736
2737 /* Check that the i965g/gm workaround works. */
2738 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2739 }
2740 #ifdef CONFIG_SWIOTLB
2741 if (!swiotlb_nr_tbl())
2742 #endif
2743 sg_mark_end(sg);
2744 obj->pages = st;
2745
2746 if (i915_gem_object_needs_bit17_swizzle(obj))
2747 i915_gem_object_do_bit_17_swizzle(obj);
2748
2749 return 0;
2750
2751 err_pages:
2752 sg_mark_end(sg);
2753 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
2754 page_cache_release(sg_page_iter_page(&sg_iter));
2755 sg_free_table(st);
2756 kfree(st);
2757 return PTR_ERR(page);
2758 }
2759 #endif
2760
2761 /* Ensure that the associated pages are gathered from the backing storage
2762 * and pinned into our object. i915_gem_object_get_pages() may be called
2763 * multiple times before they are released by a single call to
2764 * i915_gem_object_put_pages() - once the pages are no longer referenced
2765 * either as a result of memory pressure (reaping pages under the shrinker)
2766 * or as the object is itself released.
2767 */
2768 int
2769 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2770 {
2771 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2772 const struct drm_i915_gem_object_ops *ops = obj->ops;
2773 int ret;
2774
2775 if (obj->pages)
2776 return 0;
2777
2778 if (obj->madv != I915_MADV_WILLNEED) {
2779 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2780 return -EFAULT;
2781 }
2782
2783 BUG_ON(obj->pages_pin_count);
2784
2785 ret = ops->get_pages(obj);
2786 if (ret)
2787 return ret;
2788
2789 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2790 return 0;
2791 }
2792
2793 static void
2794 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
2795 struct intel_ring_buffer *ring)
2796 {
2797 struct drm_device *dev = obj->base.dev;
2798 struct drm_i915_private *dev_priv = dev->dev_private;
2799 u32 seqno = intel_ring_get_seqno(ring);
2800
2801 BUG_ON(ring == NULL);
2802 if (obj->ring != ring && obj->last_write_seqno) {
2803 /* Keep the seqno relative to the current ring */
2804 obj->last_write_seqno = seqno;
2805 }
2806 obj->ring = ring;
2807
2808 /* Add a reference if we're newly entering the active list. */
2809 if (!obj->active) {
2810 drm_gem_object_reference(&obj->base);
2811 obj->active = 1;
2812 }
2813
2814 list_move_tail(&obj->ring_list, &ring->active_list);
2815
2816 obj->last_read_seqno = seqno;
2817
2818 if (obj->fenced_gpu_access) {
2819 obj->last_fenced_seqno = seqno;
2820
2821 /* Bump MRU to take account of the delayed flush */
2822 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2823 struct drm_i915_fence_reg *reg;
2824
2825 reg = &dev_priv->fence_regs[obj->fence_reg];
2826 list_move_tail(®->lru_list,
2827 &dev_priv->mm.fence_list);
2828 }
2829 }
2830 }
2831
2832 void i915_vma_move_to_active(struct i915_vma *vma,
2833 struct intel_ring_buffer *ring)
2834 {
2835 list_move_tail(&vma->mm_list, &vma->vm->active_list);
2836 return i915_gem_object_move_to_active(vma->obj, ring);
2837 }
2838
2839 static void
2840 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
2841 {
2842 struct drm_device *dev = obj->base.dev;
2843 struct drm_i915_private *dev_priv = dev->dev_private;
2844 struct i915_address_space *vm;
2845 struct i915_vma *vma;
2846
2847 if ((obj->base.write_domain & I915_GEM_DOMAIN_GTT) != 0) {
2848 #if 0
2849 printk(KERN_ERR "%s: %p 0x%x flushing gtt\n", __func__, obj,
2850 obj->base.write_domain);
2851 #endif
2852 i915_gem_object_flush_gtt_write_domain(obj);
2853 }
2854 if ((obj->base.write_domain & I915_GEM_DOMAIN_CPU) != 0) {
2855 #if 0
2856 printk(KERN_ERR "%s: %p 0x%x flushing cpu\n", __func__, obj,
2857 obj->base.write_domain);
2858 #endif
2859 i915_gem_object_flush_cpu_write_domain(obj, false);
2860 }
2861 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
2862 BUG_ON(!obj->active);
2863
2864 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
2865 vma = i915_gem_obj_to_vma(obj, vm);
2866 if (vma && !list_empty(&vma->mm_list))
2867 list_move_tail(&vma->mm_list, &vm->inactive_list);
2868 }
2869
2870 list_del_init(&obj->ring_list);
2871 obj->ring = NULL;
2872
2873 obj->last_read_seqno = 0;
2874 obj->last_write_seqno = 0;
2875 obj->base.write_domain = 0;
2876
2877 obj->last_fenced_seqno = 0;
2878 obj->fenced_gpu_access = false;
2879
2880 obj->active = 0;
2881 drm_gem_object_unreference(&obj->base);
2882
2883 WARN_ON(i915_verify_lists(dev));
2884 }
2885
2886 static int
2887 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2888 {
2889 struct drm_i915_private *dev_priv = dev->dev_private;
2890 struct intel_ring_buffer *ring;
2891 int ret, i, j;
2892
2893 /* Carefully retire all requests without writing to the rings */
2894 for_each_ring(ring, dev_priv, i) {
2895 ret = intel_ring_idle(ring);
2896 if (ret)
2897 return ret;
2898 }
2899 i915_gem_retire_requests(dev);
2900
2901 /* Finally reset hw state */
2902 for_each_ring(ring, dev_priv, i) {
2903 intel_ring_init_seqno(ring, seqno);
2904
2905 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
2906 ring->sync_seqno[j] = 0;
2907 }
2908
2909 return 0;
2910 }
2911
2912 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2913 {
2914 struct drm_i915_private *dev_priv = dev->dev_private;
2915 int ret;
2916
2917 if (seqno == 0)
2918 return -EINVAL;
2919
2920 /* HWS page needs to be set less than what we
2921 * will inject to ring
2922 */
2923 ret = i915_gem_init_seqno(dev, seqno - 1);
2924 if (ret)
2925 return ret;
2926
2927 /* Carefully set the last_seqno value so that wrap
2928 * detection still works
2929 */
2930 dev_priv->next_seqno = seqno;
2931 dev_priv->last_seqno = seqno - 1;
2932 if (dev_priv->last_seqno == 0)
2933 dev_priv->last_seqno--;
2934
2935 return 0;
2936 }
2937
2938 int
2939 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2940 {
2941 struct drm_i915_private *dev_priv = dev->dev_private;
2942
2943 /* reserve 0 for non-seqno */
2944 if (dev_priv->next_seqno == 0) {
2945 int ret = i915_gem_init_seqno(dev, 0);
2946 if (ret)
2947 return ret;
2948
2949 dev_priv->next_seqno = 1;
2950 }
2951
2952 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2953 return 0;
2954 }
2955
2956 int __i915_add_request(struct intel_ring_buffer *ring,
2957 struct drm_file *file,
2958 struct drm_i915_gem_object *obj,
2959 u32 *out_seqno)
2960 {
2961 struct drm_i915_private *dev_priv = ring->dev->dev_private;
2962 struct drm_i915_gem_request *request;
2963 u32 request_ring_position, request_start;
2964 int ret;
2965
2966 request_start = intel_ring_get_tail(ring);
2967 /*
2968 * Emit any outstanding flushes - execbuf can fail to emit the flush
2969 * after having emitted the batchbuffer command. Hence we need to fix
2970 * things up similar to emitting the lazy request. The difference here
2971 * is that the flush _must_ happen before the next request, no matter
2972 * what.
2973 */
2974 ret = intel_ring_flush_all_caches(ring);
2975 if (ret)
2976 return ret;
2977
2978 request = ring->preallocated_lazy_request;
2979 if (WARN_ON(request == NULL))
2980 return -ENOMEM;
2981
2982 /* Record the position of the start of the request so that
2983 * should we detect the updated seqno part-way through the
2984 * GPU processing the request, we never over-estimate the
2985 * position of the head.
2986 */
2987 request_ring_position = intel_ring_get_tail(ring);
2988
2989 ret = ring->add_request(ring);
2990 if (ret)
2991 return ret;
2992
2993 request->seqno = intel_ring_get_seqno(ring);
2994 request->ring = ring;
2995 request->head = request_start;
2996 request->tail = request_ring_position;
2997
2998 /* Whilst this request exists, batch_obj will be on the
2999 * active_list, and so will hold the active reference. Only when this
3000 * request is retired will the the batch_obj be moved onto the
3001 * inactive_list and lose its active reference. Hence we do not need
3002 * to explicitly hold another reference here.
3003 */
3004 request->batch_obj = obj;
3005
3006 /* Hold a reference to the current context so that we can inspect
3007 * it later in case a hangcheck error event fires.
3008 */
3009 request->ctx = ring->last_context;
3010 if (request->ctx)
3011 i915_gem_context_reference(request->ctx);
3012
3013 request->emitted_jiffies = jiffies;
3014 list_add_tail(&request->list, &ring->request_list);
3015 request->file_priv = NULL;
3016
3017 if (file) {
3018 struct drm_i915_file_private *file_priv = file->driver_priv;
3019
3020 spin_lock(&file_priv->mm.lock);
3021 request->file_priv = file_priv;
3022 list_add_tail(&request->client_list,
3023 &file_priv->mm.request_list);
3024 spin_unlock(&file_priv->mm.lock);
3025 }
3026
3027 trace_i915_gem_request_add(ring, request->seqno);
3028 ring->outstanding_lazy_seqno = 0;
3029 ring->preallocated_lazy_request = NULL;
3030
3031 if (!dev_priv->ums.mm_suspended) {
3032 i915_queue_hangcheck(ring->dev);
3033
3034 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
3035 queue_delayed_work(dev_priv->wq,
3036 &dev_priv->mm.retire_work,
3037 round_jiffies_up_relative(HZ));
3038 intel_mark_busy(dev_priv->dev);
3039 }
3040
3041 if (out_seqno)
3042 *out_seqno = request->seqno;
3043 return 0;
3044 }
3045
3046 static inline void
3047 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
3048 {
3049 struct drm_i915_file_private *file_priv = request->file_priv;
3050
3051 if (!file_priv)
3052 return;
3053
3054 spin_lock(&file_priv->mm.lock);
3055 list_del(&request->client_list);
3056 request->file_priv = NULL;
3057 spin_unlock(&file_priv->mm.lock);
3058 }
3059
3060 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
3061 const struct i915_hw_context *ctx)
3062 {
3063 unsigned long elapsed;
3064
3065 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
3066
3067 if (ctx->hang_stats.banned)
3068 return true;
3069
3070 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
3071 if (!i915_gem_context_is_default(ctx)) {
3072 DRM_DEBUG("context hanging too fast, banning!\n");
3073 return true;
3074 } else if (dev_priv->gpu_error.stop_rings == 0) {
3075 DRM_ERROR("gpu hanging too fast, banning!\n");
3076 return true;
3077 }
3078 }
3079
3080 return false;
3081 }
3082
3083 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
3084 struct i915_hw_context *ctx,
3085 const bool guilty)
3086 {
3087 struct i915_ctx_hang_stats *hs;
3088
3089 if (WARN_ON(!ctx))
3090 return;
3091
3092 hs = &ctx->hang_stats;
3093
3094 if (guilty) {
3095 hs->banned = i915_context_is_banned(dev_priv, ctx);
3096 hs->batch_active++;
3097 hs->guilty_ts = get_seconds();
3098 } else {
3099 hs->batch_pending++;
3100 }
3101 }
3102
3103 static void i915_gem_free_request(struct drm_i915_gem_request *request)
3104 {
3105 list_del(&request->list);
3106 i915_gem_request_remove_from_client(request);
3107
3108 if (request->ctx)
3109 i915_gem_context_unreference(request->ctx);
3110
3111 kfree(request);
3112 }
3113
3114 struct drm_i915_gem_request *
3115 i915_gem_find_active_request(struct intel_ring_buffer *ring)
3116 {
3117 struct drm_i915_gem_request *request;
3118 u32 completed_seqno;
3119
3120 completed_seqno = ring->get_seqno(ring, false);
3121
3122 list_for_each_entry(request, &ring->request_list, list) {
3123 if (i915_seqno_passed(completed_seqno, request->seqno))
3124 continue;
3125
3126 return request;
3127 }
3128
3129 return NULL;
3130 }
3131
3132 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
3133 struct intel_ring_buffer *ring)
3134 {
3135 struct drm_i915_gem_request *request;
3136 bool ring_hung;
3137
3138 request = i915_gem_find_active_request(ring);
3139
3140 if (request == NULL)
3141 return;
3142
3143 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
3144
3145 i915_set_reset_status(dev_priv, request->ctx, ring_hung);
3146
3147 list_for_each_entry_continue(request, &ring->request_list, list)
3148 i915_set_reset_status(dev_priv, request->ctx, false);
3149 }
3150
3151 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
3152 struct intel_ring_buffer *ring)
3153 {
3154 while (!list_empty(&ring->active_list)) {
3155 struct drm_i915_gem_object *obj;
3156
3157 obj = list_first_entry(&ring->active_list,
3158 struct drm_i915_gem_object,
3159 ring_list);
3160
3161 i915_gem_object_move_to_inactive(obj);
3162 }
3163
3164 /*
3165 * We must free the requests after all the corresponding objects have
3166 * been moved off active lists. Which is the same order as the normal
3167 * retire_requests function does. This is important if object hold
3168 * implicit references on things like e.g. ppgtt address spaces through
3169 * the request.
3170 */
3171 while (!list_empty(&ring->request_list)) {
3172 struct drm_i915_gem_request *request;
3173
3174 request = list_first_entry(&ring->request_list,
3175 struct drm_i915_gem_request,
3176 list);
3177
3178 i915_gem_free_request(request);
3179 }
3180 }
3181
3182 void i915_gem_restore_fences(struct drm_device *dev)
3183 {
3184 struct drm_i915_private *dev_priv = dev->dev_private;
3185 int i;
3186
3187 for (i = 0; i < dev_priv->num_fence_regs; i++) {
3188 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
3189
3190 /*
3191 * Commit delayed tiling changes if we have an object still
3192 * attached to the fence, otherwise just clear the fence.
3193 */
3194 if (reg->obj) {
3195 i915_gem_object_update_fence(reg->obj, reg,
3196 reg->obj->tiling_mode);
3197 } else {
3198 i915_gem_write_fence(dev, i, NULL);
3199 }
3200 }
3201 }
3202
3203 void i915_gem_reset(struct drm_device *dev)
3204 {
3205 struct drm_i915_private *dev_priv = dev->dev_private;
3206 struct intel_ring_buffer *ring;
3207 int i;
3208
3209 /*
3210 * Before we free the objects from the requests, we need to inspect
3211 * them for finding the guilty party. As the requests only borrow
3212 * their reference to the objects, the inspection must be done first.
3213 */
3214 for_each_ring(ring, dev_priv, i)
3215 i915_gem_reset_ring_status(dev_priv, ring);
3216
3217 for_each_ring(ring, dev_priv, i)
3218 i915_gem_reset_ring_cleanup(dev_priv, ring);
3219
3220 i915_gem_cleanup_ringbuffer(dev);
3221
3222 i915_gem_context_reset(dev);
3223
3224 i915_gem_restore_fences(dev);
3225 }
3226
3227 /**
3228 * This function clears the request list as sequence numbers are passed.
3229 */
3230 static void
3231 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
3232 {
3233 uint32_t seqno;
3234
3235 if (list_empty(&ring->request_list))
3236 return;
3237
3238 WARN_ON(i915_verify_lists(ring->dev));
3239
3240 seqno = ring->get_seqno(ring, true);
3241
3242 /* Move any buffers on the active list that are no longer referenced
3243 * by the ringbuffer to the flushing/inactive lists as appropriate,
3244 * before we free the context associated with the requests.
3245 */
3246 while (!list_empty(&ring->active_list)) {
3247 struct drm_i915_gem_object *obj;
3248
3249 obj = list_first_entry(&ring->active_list,
3250 struct drm_i915_gem_object,
3251 ring_list);
3252
3253 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
3254 break;
3255
3256 i915_gem_object_move_to_inactive(obj);
3257 }
3258
3259
3260 while (!list_empty(&ring->request_list)) {
3261 struct drm_i915_gem_request *request;
3262
3263 request = list_first_entry(&ring->request_list,
3264 struct drm_i915_gem_request,
3265 list);
3266
3267 if (!i915_seqno_passed(seqno, request->seqno))
3268 break;
3269
3270 trace_i915_gem_request_retire(ring, request->seqno);
3271 /* We know the GPU must have read the request to have
3272 * sent us the seqno + interrupt, so use the position
3273 * of tail of the request to update the last known position
3274 * of the GPU head.
3275 */
3276 ring->last_retired_head = request->tail;
3277
3278 i915_gem_free_request(request);
3279 }
3280
3281 if (unlikely(ring->trace_irq_seqno &&
3282 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
3283 ring->irq_put(ring);
3284 ring->trace_irq_seqno = 0;
3285 }
3286
3287 WARN_ON(i915_verify_lists(ring->dev));
3288 }
3289
3290 bool
3291 i915_gem_retire_requests(struct drm_device *dev)
3292 {
3293 struct drm_i915_private *dev_priv = dev->dev_private;
3294 struct intel_ring_buffer *ring;
3295 bool idle = true;
3296 int i;
3297
3298 for_each_ring(ring, dev_priv, i) {
3299 i915_gem_retire_requests_ring(ring);
3300 idle &= list_empty(&ring->request_list);
3301 }
3302
3303 if (idle)
3304 mod_delayed_work(dev_priv->wq,
3305 &dev_priv->mm.idle_work,
3306 msecs_to_jiffies(100));
3307
3308 return idle;
3309 }
3310
3311 static void
3312 i915_gem_retire_work_handler(struct work_struct *work)
3313 {
3314 struct drm_i915_private *dev_priv =
3315 container_of(work, typeof(*dev_priv), mm.retire_work.work);
3316 struct drm_device *dev = dev_priv->dev;
3317 bool idle;
3318
3319 /* Come back later if the device is busy... */
3320 idle = false;
3321 if (mutex_trylock(&dev->struct_mutex)) {
3322 idle = i915_gem_retire_requests(dev);
3323 mutex_unlock(&dev->struct_mutex);
3324 }
3325 if (!idle)
3326 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3327 round_jiffies_up_relative(HZ));
3328 }
3329
3330 static void
3331 i915_gem_idle_work_handler(struct work_struct *work)
3332 {
3333 struct drm_i915_private *dev_priv =
3334 container_of(work, typeof(*dev_priv), mm.idle_work.work);
3335
3336 intel_mark_idle(dev_priv->dev);
3337 }
3338
3339 /**
3340 * Ensures that an object will eventually get non-busy by flushing any required
3341 * write domains, emitting any outstanding lazy request and retiring and
3342 * completed requests.
3343 */
3344 static int
3345 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3346 {
3347 int ret;
3348
3349 if (obj->active) {
3350 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
3351 if (ret)
3352 return ret;
3353
3354 i915_gem_retire_requests_ring(obj->ring);
3355 }
3356
3357 return 0;
3358 }
3359
3360 /**
3361 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3362 * @DRM_IOCTL_ARGS: standard ioctl arguments
3363 *
3364 * Returns 0 if successful, else an error is returned with the remaining time in
3365 * the timeout parameter.
3366 * -ETIME: object is still busy after timeout
3367 * -ERESTARTSYS: signal interrupted the wait
3368 * -ENONENT: object doesn't exist
3369 * Also possible, but rare:
3370 * -EAGAIN: GPU wedged
3371 * -ENOMEM: damn
3372 * -ENODEV: Internal IRQ fail
3373 * -E?: The add request failed
3374 *
3375 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3376 * non-zero timeout parameter the wait ioctl will wait for the given number of
3377 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3378 * without holding struct_mutex the object may become re-busied before this
3379 * function completes. A similar but shorter * race condition exists in the busy
3380 * ioctl
3381 */
3382 int
3383 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3384 {
3385 struct drm_i915_private *dev_priv = dev->dev_private;
3386 struct drm_i915_gem_wait *args = data;
3387 struct drm_gem_object *gobj;
3388 struct drm_i915_gem_object *obj;
3389 struct intel_ring_buffer *ring = NULL;
3390 struct timespec timeout_stack, *timeout = NULL;
3391 unsigned reset_counter;
3392 u32 seqno = 0;
3393 int ret = 0;
3394
3395 if (args->timeout_ns >= 0) {
3396 timeout_stack = ns_to_timespec(args->timeout_ns);
3397 timeout = &timeout_stack;
3398 }
3399
3400 ret = i915_mutex_lock_interruptible(dev);
3401 if (ret)
3402 return ret;
3403
3404 gobj = drm_gem_object_lookup(dev, file, args->bo_handle);
3405 if (gobj == NULL) {
3406 mutex_unlock(&dev->struct_mutex);
3407 return -ENOENT;
3408 }
3409 obj = to_intel_bo(gobj);
3410
3411 /* Need to make sure the object gets inactive eventually. */
3412 ret = i915_gem_object_flush_active(obj);
3413 if (ret)
3414 goto out;
3415
3416 if (obj->active) {
3417 seqno = obj->last_read_seqno;
3418 ring = obj->ring;
3419 }
3420
3421 if (seqno == 0)
3422 goto out;
3423
3424 /* Do this after OLR check to make sure we make forward progress polling
3425 * on this IOCTL with a 0 timeout (like busy ioctl)
3426 */
3427 if (!args->timeout_ns) {
3428 ret = -ETIME;
3429 goto out;
3430 }
3431
3432 drm_gem_object_unreference(&obj->base);
3433 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3434 mutex_unlock(&dev->struct_mutex);
3435
3436 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
3437 if (timeout)
3438 args->timeout_ns = timespec_to_ns(timeout);
3439 return ret;
3440
3441 out:
3442 drm_gem_object_unreference(&obj->base);
3443 mutex_unlock(&dev->struct_mutex);
3444 return ret;
3445 }
3446
3447 /**
3448 * i915_gem_object_sync - sync an object to a ring.
3449 *
3450 * @obj: object which may be in use on another ring.
3451 * @to: ring we wish to use the object on. May be NULL.
3452 *
3453 * This code is meant to abstract object synchronization with the GPU.
3454 * Calling with NULL implies synchronizing the object with the CPU
3455 * rather than a particular GPU ring.
3456 *
3457 * Returns 0 if successful, else propagates up the lower layer error.
3458 */
3459 int
3460 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3461 struct intel_ring_buffer *to)
3462 {
3463 struct intel_ring_buffer *from = obj->ring;
3464 u32 seqno;
3465 int ret, idx;
3466
3467 if (from == NULL || to == from)
3468 return 0;
3469
3470 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
3471 return i915_gem_object_wait_rendering(obj, false);
3472
3473 idx = intel_ring_sync_index(from, to);
3474
3475 seqno = obj->last_read_seqno;
3476 if (seqno <= from->sync_seqno[idx])
3477 return 0;
3478
3479 ret = i915_gem_check_olr(obj->ring, seqno);
3480 if (ret)
3481 return ret;
3482
3483 trace_i915_gem_ring_sync_to(from, to, seqno);
3484 ret = to->sync_to(to, from, seqno);
3485 if (!ret)
3486 /* We use last_read_seqno because sync_to()
3487 * might have just caused seqno wrap under
3488 * the radar.
3489 */
3490 from->sync_seqno[idx] = obj->last_read_seqno;
3491
3492 return ret;
3493 }
3494
3495 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3496 {
3497 u32 old_write_domain, old_read_domains;
3498
3499 /* Force a pagefault for domain tracking on next user access */
3500 i915_gem_release_mmap(obj);
3501
3502 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3503 return;
3504
3505 /* Wait for any direct GTT access to complete */
3506 mb();
3507
3508 old_read_domains = obj->base.read_domains;
3509 old_write_domain = obj->base.write_domain;
3510
3511 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3512 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3513
3514 trace_i915_gem_object_change_domain(obj,
3515 old_read_domains,
3516 old_write_domain);
3517 }
3518
3519 int i915_vma_unbind(struct i915_vma *vma)
3520 {
3521 struct drm_i915_gem_object *obj = vma->obj;
3522 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3523 int ret;
3524
3525 if (list_empty(&vma->vma_link))
3526 return 0;
3527
3528 if (!drm_mm_node_allocated(&vma->node)) {
3529 i915_gem_vma_destroy(vma);
3530 return 0;
3531 }
3532
3533 if (vma->pin_count)
3534 return -EBUSY;
3535
3536 BUG_ON(obj->pages == NULL);
3537
3538 ret = i915_gem_object_finish_gpu(obj);
3539 if (ret)
3540 return ret;
3541 /* Continue on if we fail due to EIO, the GPU is hung so we
3542 * should be safe and we need to cleanup or else we might
3543 * cause memory corruption through use-after-free.
3544 */
3545
3546 i915_gem_object_finish_gtt(obj);
3547
3548 /* release the fence reg _after_ flushing */
3549 ret = i915_gem_object_put_fence(obj);
3550 if (ret)
3551 return ret;
3552
3553 trace_i915_vma_unbind(vma);
3554
3555 vma->unbind_vma(vma);
3556
3557 i915_gem_gtt_finish_object(obj);
3558
3559 list_del_init(&vma->mm_list);
3560 /* Avoid an unnecessary call to unbind on rebind. */
3561 if (i915_is_ggtt(vma->vm))
3562 obj->map_and_fenceable = true;
3563
3564 drm_mm_remove_node(&vma->node);
3565 i915_gem_vma_destroy(vma);
3566
3567 /* Since the unbound list is global, only move to that list if
3568 * no more VMAs exist. */
3569 if (list_empty(&obj->vma_list))
3570 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3571
3572 /* And finally now the object is completely decoupled from this vma,
3573 * we can drop its hold on the backing storage and allow it to be
3574 * reaped by the shrinker.
3575 */
3576 i915_gem_object_unpin_pages(obj);
3577
3578 return 0;
3579 }
3580
3581 int i915_gpu_idle(struct drm_device *dev)
3582 {
3583 struct drm_i915_private *dev_priv = dev->dev_private;
3584 struct intel_ring_buffer *ring;
3585 int ret, i;
3586
3587 /* Flush everything onto the inactive list. */
3588 for_each_ring(ring, dev_priv, i) {
3589 ret = i915_switch_context(ring, ring->default_context);
3590 if (ret)
3591 return ret;
3592
3593 ret = intel_ring_idle(ring);
3594 if (ret)
3595 return ret;
3596 }
3597
3598 return 0;
3599 }
3600
3601 static void i965_write_fence_reg(struct drm_device *dev, int reg,
3602 struct drm_i915_gem_object *obj)
3603 {
3604 struct drm_i915_private *dev_priv = dev->dev_private;
3605 int fence_reg;
3606 int fence_pitch_shift;
3607
3608 if (INTEL_INFO(dev)->gen >= 6) {
3609 fence_reg = FENCE_REG_SANDYBRIDGE_0;
3610 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
3611 } else {
3612 fence_reg = FENCE_REG_965_0;
3613 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
3614 }
3615
3616 fence_reg += reg * 8;
3617
3618 /* To w/a incoherency with non-atomic 64-bit register updates,
3619 * we split the 64-bit update into two 32-bit writes. In order
3620 * for a partial fence not to be evaluated between writes, we
3621 * precede the update with write to turn off the fence register,
3622 * and only enable the fence as the last step.
3623 *
3624 * For extra levels of paranoia, we make sure each step lands
3625 * before applying the next step.
3626 */
3627 I915_WRITE(fence_reg, 0);
3628 POSTING_READ(fence_reg);
3629
3630 if (obj) {
3631 u32 size = i915_gem_obj_ggtt_size(obj);
3632 uint64_t val;
3633
3634 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
3635 0xfffff000) << 32;
3636 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
3637 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
3638 if (obj->tiling_mode == I915_TILING_Y)
3639 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3640 val |= I965_FENCE_REG_VALID;
3641
3642 I915_WRITE(fence_reg + 4, val >> 32);
3643 POSTING_READ(fence_reg + 4);
3644
3645 I915_WRITE(fence_reg + 0, val);
3646 POSTING_READ(fence_reg);
3647 } else {
3648 I915_WRITE(fence_reg + 4, 0);
3649 POSTING_READ(fence_reg + 4);
3650 }
3651 }
3652
3653 static void i915_write_fence_reg(struct drm_device *dev, int reg,
3654 struct drm_i915_gem_object *obj)
3655 {
3656 struct drm_i915_private *dev_priv = dev->dev_private;
3657 u32 val;
3658
3659 if (obj) {
3660 u32 size = i915_gem_obj_ggtt_size(obj);
3661 int pitch_val;
3662 int tile_width;
3663
3664 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
3665 (size & -size) != size ||
3666 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3667 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3668 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
3669
3670 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
3671 tile_width = 128;
3672 else
3673 tile_width = 512;
3674
3675 /* Note: pitch better be a power of two tile widths */
3676 pitch_val = obj->stride / tile_width;
3677 pitch_val = ffs(pitch_val) - 1;
3678
3679 val = i915_gem_obj_ggtt_offset(obj);
3680 if (obj->tiling_mode == I915_TILING_Y)
3681 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3682 val |= I915_FENCE_SIZE_BITS(size);
3683 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3684 val |= I830_FENCE_REG_VALID;
3685 } else
3686 val = 0;
3687
3688 if (reg < 8)
3689 reg = FENCE_REG_830_0 + reg * 4;
3690 else
3691 reg = FENCE_REG_945_8 + (reg - 8) * 4;
3692
3693 I915_WRITE(reg, val);
3694 POSTING_READ(reg);
3695 }
3696
3697 static void i830_write_fence_reg(struct drm_device *dev, int reg,
3698 struct drm_i915_gem_object *obj)
3699 {
3700 struct drm_i915_private *dev_priv = dev->dev_private;
3701 uint32_t val;
3702
3703 if (obj) {
3704 u32 size = i915_gem_obj_ggtt_size(obj);
3705 uint32_t pitch_val;
3706
3707 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
3708 (size & -size) != size ||
3709 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3710 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
3711 i915_gem_obj_ggtt_offset(obj), size);
3712
3713 pitch_val = obj->stride / 128;
3714 pitch_val = ffs(pitch_val) - 1;
3715
3716 val = i915_gem_obj_ggtt_offset(obj);
3717 if (obj->tiling_mode == I915_TILING_Y)
3718 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3719 val |= I830_FENCE_SIZE_BITS(size);
3720 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3721 val |= I830_FENCE_REG_VALID;
3722 } else
3723 val = 0;
3724
3725 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
3726 POSTING_READ(FENCE_REG_830_0 + reg * 4);
3727 }
3728
3729 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
3730 {
3731 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
3732 }
3733
3734 static void i915_gem_write_fence(struct drm_device *dev, int reg,
3735 struct drm_i915_gem_object *obj)
3736 {
3737 struct drm_i915_private *dev_priv = dev->dev_private;
3738
3739 /* Ensure that all CPU reads are completed before installing a fence
3740 * and all writes before removing the fence.
3741 */
3742 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
3743 mb();
3744
3745 WARN(obj && (!obj->stride || !obj->tiling_mode),
3746 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
3747 obj->stride, obj->tiling_mode);
3748
3749 switch (INTEL_INFO(dev)->gen) {
3750 case 8:
3751 case 7:
3752 case 6:
3753 case 5:
3754 case 4: i965_write_fence_reg(dev, reg, obj); break;
3755 case 3: i915_write_fence_reg(dev, reg, obj); break;
3756 case 2: i830_write_fence_reg(dev, reg, obj); break;
3757 default: BUG();
3758 }
3759
3760 /* And similarly be paranoid that no direct access to this region
3761 * is reordered to before the fence is installed.
3762 */
3763 if (i915_gem_object_needs_mb(obj))
3764 mb();
3765 }
3766
3767 static inline int fence_number(struct drm_i915_private *dev_priv,
3768 struct drm_i915_fence_reg *fence)
3769 {
3770 return fence - dev_priv->fence_regs;
3771 }
3772
3773 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
3774 struct drm_i915_fence_reg *fence,
3775 bool enable)
3776 {
3777 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3778 int reg = fence_number(dev_priv, fence);
3779
3780 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
3781
3782 if (enable) {
3783 obj->fence_reg = reg;
3784 fence->obj = obj;
3785 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
3786 } else {
3787 obj->fence_reg = I915_FENCE_REG_NONE;
3788 fence->obj = NULL;
3789 list_del_init(&fence->lru_list);
3790 }
3791 obj->fence_dirty = false;
3792 }
3793
3794 static int
3795 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
3796 {
3797 if (obj->last_fenced_seqno) {
3798 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
3799 if (ret)
3800 return ret;
3801
3802 obj->last_fenced_seqno = 0;
3803 }
3804
3805 obj->fenced_gpu_access = false;
3806 return 0;
3807 }
3808
3809 int
3810 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
3811 {
3812 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3813 struct drm_i915_fence_reg *fence;
3814 int ret;
3815
3816 ret = i915_gem_object_wait_fence(obj);
3817 if (ret)
3818 return ret;
3819
3820 if (obj->fence_reg == I915_FENCE_REG_NONE)
3821 return 0;
3822
3823 fence = &dev_priv->fence_regs[obj->fence_reg];
3824
3825 i915_gem_object_fence_lost(obj);
3826 i915_gem_object_update_fence(obj, fence, false);
3827
3828 return 0;
3829 }
3830
3831 static struct drm_i915_fence_reg *
3832 i915_find_fence_reg(struct drm_device *dev)
3833 {
3834 struct drm_i915_private *dev_priv = dev->dev_private;
3835 struct drm_i915_fence_reg *reg, *avail;
3836 int i;
3837
3838 /* First try to find a free reg */
3839 avail = NULL;
3840 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
3841 reg = &dev_priv->fence_regs[i];
3842 if (!reg->obj)
3843 return reg;
3844
3845 if (!reg->pin_count)
3846 avail = reg;
3847 }
3848
3849 if (avail == NULL)
3850 goto deadlock;
3851
3852 /* None available, try to steal one or wait for a user to finish */
3853 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
3854 if (reg->pin_count)
3855 continue;
3856
3857 return reg;
3858 }
3859
3860 deadlock:
3861 /* Wait for completion of pending flips which consume fences */
3862 if (intel_has_pending_fb_unpin(dev))
3863 return ERR_PTR(-EAGAIN);
3864
3865 return ERR_PTR(-EDEADLK);
3866 }
3867
3868 /**
3869 * i915_gem_object_get_fence - set up fencing for an object
3870 * @obj: object to map through a fence reg
3871 *
3872 * When mapping objects through the GTT, userspace wants to be able to write
3873 * to them without having to worry about swizzling if the object is tiled.
3874 * This function walks the fence regs looking for a free one for @obj,
3875 * stealing one if it can't find any.
3876 *
3877 * It then sets up the reg based on the object's properties: address, pitch
3878 * and tiling format.
3879 *
3880 * For an untiled surface, this removes any existing fence.
3881 */
3882 int
3883 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
3884 {
3885 struct drm_device *dev = obj->base.dev;
3886 struct drm_i915_private *dev_priv = dev->dev_private;
3887 bool enable = obj->tiling_mode != I915_TILING_NONE;
3888 struct drm_i915_fence_reg *reg;
3889 int ret;
3890
3891 /* Have we updated the tiling parameters upon the object and so
3892 * will need to serialise the write to the associated fence register?
3893 */
3894 if (obj->fence_dirty) {
3895 ret = i915_gem_object_wait_fence(obj);
3896 if (ret)
3897 return ret;
3898 }
3899
3900 /* Just update our place in the LRU if our fence is getting reused. */
3901 if (obj->fence_reg != I915_FENCE_REG_NONE) {
3902 reg = &dev_priv->fence_regs[obj->fence_reg];
3903 if (!obj->fence_dirty) {
3904 list_move_tail(®->lru_list,
3905 &dev_priv->mm.fence_list);
3906 return 0;
3907 }
3908 } else if (enable) {
3909 reg = i915_find_fence_reg(dev);
3910 if (IS_ERR(reg))
3911 return PTR_ERR(reg);
3912
3913 if (reg->obj) {
3914 struct drm_i915_gem_object *old = reg->obj;
3915
3916 ret = i915_gem_object_wait_fence(old);
3917 if (ret)
3918 return ret;
3919
3920 i915_gem_object_fence_lost(old);
3921 }
3922 } else
3923 return 0;
3924
3925 i915_gem_object_update_fence(obj, reg, enable);
3926
3927 return 0;
3928 }
3929
3930 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
3931 struct drm_mm_node *gtt_space,
3932 unsigned long cache_level)
3933 {
3934 struct drm_mm_node *other;
3935
3936 /* On non-LLC machines we have to be careful when putting differing
3937 * types of snoopable memory together to avoid the prefetcher
3938 * crossing memory domains and dying.
3939 */
3940 if (HAS_LLC(dev))
3941 return true;
3942
3943 if (!drm_mm_node_allocated(gtt_space))
3944 return true;
3945
3946 if (list_empty(>t_space->node_list))
3947 return true;
3948
3949 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3950 if (other->allocated && !other->hole_follows && other->color != cache_level)
3951 return false;
3952
3953 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3954 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3955 return false;
3956
3957 return true;
3958 }
3959
3960 static void i915_gem_verify_gtt(struct drm_device *dev)
3961 {
3962 #if WATCH_GTT
3963 struct drm_i915_private *dev_priv = dev->dev_private;
3964 struct drm_i915_gem_object *obj;
3965 int err = 0;
3966
3967 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3968 if (obj->gtt_space == NULL) {
3969 printk(KERN_ERR "object found on GTT list with no space reserved\n");
3970 err++;
3971 continue;
3972 }
3973
3974 if (obj->cache_level != obj->gtt_space->color) {
3975 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
3976 i915_gem_obj_ggtt_offset(obj),
3977 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3978 obj->cache_level,
3979 obj->gtt_space->color);
3980 err++;
3981 continue;
3982 }
3983
3984 if (!i915_gem_valid_gtt_space(dev,
3985 obj->gtt_space,
3986 obj->cache_level)) {
3987 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
3988 i915_gem_obj_ggtt_offset(obj),
3989 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3990 obj->cache_level);
3991 err++;
3992 continue;
3993 }
3994 }
3995
3996 WARN_ON(err);
3997 #endif
3998 }
3999
4000 /**
4001 * Finds free space in the GTT aperture and binds the object there.
4002 */
4003 static struct i915_vma *
4004 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
4005 struct i915_address_space *vm,
4006 unsigned alignment,
4007 uint64_t flags)
4008 {
4009 struct drm_device *dev = obj->base.dev;
4010 struct drm_i915_private *dev_priv = dev->dev_private;
4011 u32 size, fence_size, fence_alignment, unfenced_alignment;
4012 unsigned long start =
4013 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
4014 unsigned long end =
4015 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
4016 struct i915_vma *vma;
4017 int ret;
4018
4019 fence_size = i915_gem_get_gtt_size(dev,
4020 obj->base.size,
4021 obj->tiling_mode);
4022 fence_alignment = i915_gem_get_gtt_alignment(dev,
4023 obj->base.size,
4024 obj->tiling_mode, true);
4025 unfenced_alignment =
4026 i915_gem_get_gtt_alignment(dev,
4027 obj->base.size,
4028 obj->tiling_mode, false);
4029
4030 if (alignment == 0)
4031 alignment = flags & PIN_MAPPABLE ? fence_alignment :
4032 unfenced_alignment;
4033 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
4034 DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
4035 return ERR_PTR(-EINVAL);
4036 }
4037
4038 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
4039
4040 /* If the object is bigger than the entire aperture, reject it early
4041 * before evicting everything in a vain attempt to find space.
4042 */
4043 if (obj->base.size > end) {
4044 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
4045 obj->base.size,
4046 flags & PIN_MAPPABLE ? "mappable" : "total",
4047 end);
4048 return ERR_PTR(-E2BIG);
4049 }
4050
4051 ret = i915_gem_object_get_pages(obj);
4052 if (ret)
4053 return ERR_PTR(ret);
4054
4055 i915_gem_object_pin_pages(obj);
4056
4057 vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
4058 if (IS_ERR(vma))
4059 goto err_unpin;
4060
4061 search_free:
4062 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
4063 size, alignment,
4064 obj->cache_level,
4065 start, end,
4066 DRM_MM_SEARCH_DEFAULT,
4067 DRM_MM_CREATE_DEFAULT);
4068 if (ret) {
4069 ret = i915_gem_evict_something(dev, vm, size, alignment,
4070 obj->cache_level,
4071 start, end,
4072 flags);
4073 if (ret == 0)
4074 goto search_free;
4075
4076 goto err_free_vma;
4077 }
4078 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
4079 obj->cache_level))) {
4080 ret = -EINVAL;
4081 goto err_remove_node;
4082 }
4083
4084 ret = i915_gem_gtt_prepare_object(obj);
4085 if (ret)
4086 goto err_remove_node;
4087
4088 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
4089 list_add_tail(&vma->mm_list, &vm->inactive_list);
4090
4091 if (i915_is_ggtt(vm)) {
4092 bool mappable, fenceable;
4093
4094 fenceable = (vma->node.size == fence_size &&
4095 (vma->node.start & (fence_alignment - 1)) == 0);
4096
4097 mappable = (vma->node.start + obj->base.size <=
4098 dev_priv->gtt.mappable_end);
4099
4100 obj->map_and_fenceable = mappable && fenceable;
4101 }
4102
4103 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4104
4105 trace_i915_vma_bind(vma, flags);
4106 vma->bind_vma(vma, obj->cache_level,
4107 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
4108
4109 i915_gem_verify_gtt(dev);
4110 return vma;
4111
4112 err_remove_node:
4113 drm_mm_remove_node(&vma->node);
4114 err_free_vma:
4115 i915_gem_vma_destroy(vma);
4116 vma = ERR_PTR(ret);
4117 err_unpin:
4118 i915_gem_object_unpin_pages(obj);
4119 return vma;
4120 }
4121
4122 bool
4123 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
4124 bool force)
4125 {
4126 /* If we don't have a page list set up, then we're not pinned
4127 * to GPU, and we can ignore the cache flush because it'll happen
4128 * again at bind time.
4129 */
4130 if (obj->pages == NULL)
4131 return false;
4132
4133 /*
4134 * Stolen memory is always coherent with the GPU as it is explicitly
4135 * marked as wc by the system, or the system is cache-coherent.
4136 */
4137 if (obj->stolen)
4138 return false;
4139
4140 /* If the GPU is snooping the contents of the CPU cache,
4141 * we do not need to manually clear the CPU cache lines. However,
4142 * the caches are only snooped when the render cache is
4143 * flushed/invalidated. As we always have to emit invalidations
4144 * and flushes when moving into and out of the RENDER domain, correct
4145 * snooping behaviour occurs naturally as the result of our domain
4146 * tracking.
4147 */
4148 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
4149 return false;
4150
4151 trace_i915_gem_object_clflush(obj);
4152 #ifdef __NetBSD__
4153 drm_clflush_pglist(&obj->igo_pageq);
4154 #else
4155 drm_clflush_sg(obj->pages);
4156 #endif
4157
4158 return true;
4159 }
4160
4161 /** Flushes the GTT write domain for the object if it's dirty. */
4162 static void
4163 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
4164 {
4165 uint32_t old_write_domain;
4166
4167 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
4168 return;
4169
4170 /* No actual flushing is required for the GTT write domain. Writes
4171 * to it immediately go to main memory as far as we know, so there's
4172 * no chipset flush. It also doesn't land in render cache.
4173 *
4174 * However, we do have to enforce the order so that all writes through
4175 * the GTT land before any writes to the device, such as updates to
4176 * the GATT itself.
4177 */
4178 wmb();
4179
4180 old_write_domain = obj->base.write_domain;
4181 obj->base.write_domain = 0;
4182
4183 trace_i915_gem_object_change_domain(obj,
4184 obj->base.read_domains,
4185 old_write_domain);
4186 }
4187
4188 /** Flushes the CPU write domain for the object if it's dirty. */
4189 static void
4190 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
4191 bool force)
4192 {
4193 uint32_t old_write_domain;
4194
4195 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
4196 return;
4197
4198 if (i915_gem_clflush_object(obj, force))
4199 i915_gem_chipset_flush(obj->base.dev);
4200
4201 old_write_domain = obj->base.write_domain;
4202 obj->base.write_domain = 0;
4203
4204 trace_i915_gem_object_change_domain(obj,
4205 obj->base.read_domains,
4206 old_write_domain);
4207 }
4208
4209 /**
4210 * Moves a single object to the GTT read, and possibly write domain.
4211 *
4212 * This function returns when the move is complete, including waiting on
4213 * flushes to occur.
4214 */
4215 int
4216 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4217 {
4218 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4219 uint32_t old_write_domain, old_read_domains;
4220 int ret;
4221
4222 /* Not valid to be called on unbound objects. */
4223 if (!i915_gem_obj_bound_any(obj))
4224 return -EINVAL;
4225
4226 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4227 return 0;
4228
4229 ret = i915_gem_object_wait_rendering(obj, !write);
4230 if (ret)
4231 return ret;
4232
4233 i915_gem_object_flush_cpu_write_domain(obj, false);
4234
4235 /* Serialise direct access to this object with the barriers for
4236 * coherent writes from the GPU, by effectively invalidating the
4237 * GTT domain upon first access.
4238 */
4239 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
4240 mb();
4241
4242 old_write_domain = obj->base.write_domain;
4243 old_read_domains = obj->base.read_domains;
4244
4245 /* It should now be out of any other write domains, and we can update
4246 * the domain values for our changes.
4247 */
4248 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4249 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4250 if (write) {
4251 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
4252 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
4253 obj->dirty = 1;
4254 }
4255
4256 trace_i915_gem_object_change_domain(obj,
4257 old_read_domains,
4258 old_write_domain);
4259
4260 /* And bump the LRU for this access */
4261 if (i915_gem_object_is_inactive(obj)) {
4262 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4263 if (vma)
4264 list_move_tail(&vma->mm_list,
4265 &dev_priv->gtt.base.inactive_list);
4266
4267 }
4268
4269 return 0;
4270 }
4271
4272 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4273 enum i915_cache_level cache_level)
4274 {
4275 struct drm_device *dev = obj->base.dev;
4276 struct i915_vma *vma, *next;
4277 int ret;
4278
4279 if (obj->cache_level == cache_level)
4280 return 0;
4281
4282 if (i915_gem_obj_is_pinned(obj)) {
4283 DRM_DEBUG("can not change the cache level of pinned objects\n");
4284 return -EBUSY;
4285 }
4286
4287 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4288 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
4289 ret = i915_vma_unbind(vma);
4290 if (ret)
4291 return ret;
4292 }
4293 }
4294
4295 if (i915_gem_obj_bound_any(obj)) {
4296 ret = i915_gem_object_finish_gpu(obj);
4297 if (ret)
4298 return ret;
4299
4300 i915_gem_object_finish_gtt(obj);
4301
4302 /* Before SandyBridge, you could not use tiling or fence
4303 * registers with snooped memory, so relinquish any fences
4304 * currently pointing to our region in the aperture.
4305 */
4306 if (INTEL_INFO(dev)->gen < 6) {
4307 ret = i915_gem_object_put_fence(obj);
4308 if (ret)
4309 return ret;
4310 }
4311
4312 list_for_each_entry(vma, &obj->vma_list, vma_link)
4313 if (drm_mm_node_allocated(&vma->node))
4314 vma->bind_vma(vma, cache_level,
4315 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
4316 }
4317
4318 list_for_each_entry(vma, &obj->vma_list, vma_link)
4319 vma->node.color = cache_level;
4320 obj->cache_level = cache_level;
4321
4322 if (cpu_write_needs_clflush(obj)) {
4323 u32 old_read_domains, old_write_domain;
4324
4325 /* If we're coming from LLC cached, then we haven't
4326 * actually been tracking whether the data is in the
4327 * CPU cache or not, since we only allow one bit set
4328 * in obj->write_domain and have been skipping the clflushes.
4329 * Just set it to the CPU cache for now.
4330 */
4331 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
4332
4333 old_read_domains = obj->base.read_domains;
4334 old_write_domain = obj->base.write_domain;
4335
4336 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4337 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4338
4339 trace_i915_gem_object_change_domain(obj,
4340 old_read_domains,
4341 old_write_domain);
4342 }
4343
4344 i915_gem_verify_gtt(dev);
4345 return 0;
4346 }
4347
4348 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4349 struct drm_file *file)
4350 {
4351 struct drm_i915_gem_caching *args = data;
4352 struct drm_gem_object *gobj;
4353 struct drm_i915_gem_object *obj;
4354 int ret;
4355
4356 ret = i915_mutex_lock_interruptible(dev);
4357 if (ret)
4358 return ret;
4359
4360 gobj = drm_gem_object_lookup(dev, file, args->handle);
4361 if (gobj == NULL) {
4362 ret = -ENOENT;
4363 goto unlock;
4364 }
4365 obj = to_intel_bo(gobj);
4366
4367 switch (obj->cache_level) {
4368 case I915_CACHE_LLC:
4369 case I915_CACHE_L3_LLC:
4370 args->caching = I915_CACHING_CACHED;
4371 break;
4372
4373 case I915_CACHE_WT:
4374 args->caching = I915_CACHING_DISPLAY;
4375 break;
4376
4377 default:
4378 args->caching = I915_CACHING_NONE;
4379 break;
4380 }
4381
4382 drm_gem_object_unreference(&obj->base);
4383 unlock:
4384 mutex_unlock(&dev->struct_mutex);
4385 return ret;
4386 }
4387
4388 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4389 struct drm_file *file)
4390 {
4391 struct drm_i915_gem_caching *args = data;
4392 struct drm_gem_object *gobj;
4393 struct drm_i915_gem_object *obj;
4394 enum i915_cache_level level;
4395 int ret;
4396
4397 switch (args->caching) {
4398 case I915_CACHING_NONE:
4399 level = I915_CACHE_NONE;
4400 break;
4401 case I915_CACHING_CACHED:
4402 level = I915_CACHE_LLC;
4403 break;
4404 case I915_CACHING_DISPLAY:
4405 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4406 break;
4407 default:
4408 return -EINVAL;
4409 }
4410
4411 ret = i915_mutex_lock_interruptible(dev);
4412 if (ret)
4413 return ret;
4414
4415 gobj = drm_gem_object_lookup(dev, file, args->handle);
4416 if (gobj == NULL) {
4417 ret = -ENOENT;
4418 goto unlock;
4419 }
4420 obj = to_intel_bo(gobj);
4421
4422 ret = i915_gem_object_set_cache_level(obj, level);
4423
4424 drm_gem_object_unreference(&obj->base);
4425 unlock:
4426 mutex_unlock(&dev->struct_mutex);
4427 return ret;
4428 }
4429
4430 static bool is_pin_display(struct drm_i915_gem_object *obj)
4431 {
4432 /* There are 3 sources that pin objects:
4433 * 1. The display engine (scanouts, sprites, cursors);
4434 * 2. Reservations for execbuffer;
4435 * 3. The user.
4436 *
4437 * We can ignore reservations as we hold the struct_mutex and
4438 * are only called outside of the reservation path. The user
4439 * can only increment pin_count once, and so if after
4440 * subtracting the potential reference by the user, any pin_count
4441 * remains, it must be due to another use by the display engine.
4442 */
4443 return i915_gem_obj_to_ggtt(obj)->pin_count - !!obj->user_pin_count;
4444 }
4445
4446 /*
4447 * Prepare buffer for display plane (scanout, cursors, etc).
4448 * Can be called from an uninterruptible phase (modesetting) and allows
4449 * any flushes to be pipelined (for pageflips).
4450 */
4451 int
4452 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4453 u32 alignment,
4454 struct intel_ring_buffer *pipelined)
4455 {
4456 u32 old_read_domains, old_write_domain;
4457 int ret;
4458
4459 if (pipelined != obj->ring) {
4460 ret = i915_gem_object_sync(obj, pipelined);
4461 if (ret)
4462 return ret;
4463 }
4464
4465 /* Mark the pin_display early so that we account for the
4466 * display coherency whilst setting up the cache domains.
4467 */
4468 obj->pin_display = true;
4469
4470 /* The display engine is not coherent with the LLC cache on gen6. As
4471 * a result, we make sure that the pinning that is about to occur is
4472 * done with uncached PTEs. This is lowest common denominator for all
4473 * chipsets.
4474 *
4475 * However for gen6+, we could do better by using the GFDT bit instead
4476 * of uncaching, which would allow us to flush all the LLC-cached data
4477 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4478 */
4479 ret = i915_gem_object_set_cache_level(obj,
4480 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4481 if (ret)
4482 goto err_unpin_display;
4483
4484 /* As the user may map the buffer once pinned in the display plane
4485 * (e.g. libkms for the bootup splash), we have to ensure that we
4486 * always use map_and_fenceable for all scanout buffers.
4487 */
4488 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
4489 if (ret)
4490 goto err_unpin_display;
4491
4492 i915_gem_object_flush_cpu_write_domain(obj, true);
4493
4494 old_write_domain = obj->base.write_domain;
4495 old_read_domains = obj->base.read_domains;
4496
4497 /* It should now be out of any other write domains, and we can update
4498 * the domain values for our changes.
4499 */
4500 obj->base.write_domain = 0;
4501 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4502
4503 trace_i915_gem_object_change_domain(obj,
4504 old_read_domains,
4505 old_write_domain);
4506
4507 return 0;
4508
4509 err_unpin_display:
4510 obj->pin_display = is_pin_display(obj);
4511 return ret;
4512 }
4513
4514 void
4515 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
4516 {
4517 i915_gem_object_ggtt_unpin(obj);
4518 obj->pin_display = is_pin_display(obj);
4519 }
4520
4521 int
4522 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
4523 {
4524 int ret;
4525
4526 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
4527 return 0;
4528
4529 ret = i915_gem_object_wait_rendering(obj, false);
4530 if (ret)
4531 return ret;
4532
4533 /* Ensure that we invalidate the GPU's caches and TLBs. */
4534 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
4535 return 0;
4536 }
4537
4538 /**
4539 * Moves a single object to the CPU read, and possibly write domain.
4540 *
4541 * This function returns when the move is complete, including waiting on
4542 * flushes to occur.
4543 */
4544 int
4545 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4546 {
4547 uint32_t old_write_domain, old_read_domains;
4548 int ret;
4549
4550 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4551 return 0;
4552
4553 ret = i915_gem_object_wait_rendering(obj, !write);
4554 if (ret)
4555 return ret;
4556
4557 i915_gem_object_flush_gtt_write_domain(obj);
4558
4559 old_write_domain = obj->base.write_domain;
4560 old_read_domains = obj->base.read_domains;
4561
4562 /* Flush the CPU cache if it's still invalid. */
4563 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4564 i915_gem_clflush_object(obj, false);
4565
4566 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4567 }
4568
4569 /* It should now be out of any other write domains, and we can update
4570 * the domain values for our changes.
4571 */
4572 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4573
4574 /* If we're writing through the CPU, then the GPU read domains will
4575 * need to be invalidated at next use.
4576 */
4577 if (write) {
4578 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4579 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4580 }
4581
4582 trace_i915_gem_object_change_domain(obj,
4583 old_read_domains,
4584 old_write_domain);
4585
4586 return 0;
4587 }
4588
4589 /* Throttle our rendering by waiting until the ring has completed our requests
4590 * emitted over 20 msec ago.
4591 *
4592 * Note that if we were to use the current jiffies each time around the loop,
4593 * we wouldn't escape the function with any frames outstanding if the time to
4594 * render a frame was over 20ms.
4595 *
4596 * This should get us reasonable parallelism between CPU and GPU but also
4597 * relatively low latency when blocking on a particular request to finish.
4598 */
4599 static int
4600 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4601 {
4602 struct drm_i915_private *dev_priv = dev->dev_private;
4603 struct drm_i915_file_private *file_priv = file->driver_priv;
4604 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
4605 struct drm_i915_gem_request *request;
4606 struct intel_ring_buffer *ring = NULL;
4607 unsigned reset_counter;
4608 u32 seqno = 0;
4609 int ret;
4610
4611 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4612 if (ret)
4613 return ret;
4614
4615 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4616 if (ret)
4617 return ret;
4618
4619 spin_lock(&file_priv->mm.lock);
4620 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4621 if (time_after_eq(request->emitted_jiffies, recent_enough))
4622 break;
4623
4624 ring = request->ring;
4625 seqno = request->seqno;
4626 }
4627 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
4628 spin_unlock(&file_priv->mm.lock);
4629
4630 if (seqno == 0)
4631 return 0;
4632
4633 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
4634 if (ret == 0)
4635 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4636
4637 return ret;
4638 }
4639
4640 static bool
4641 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4642 {
4643 struct drm_i915_gem_object *obj = vma->obj;
4644
4645 if (alignment &&
4646 vma->node.start & (alignment - 1))
4647 return true;
4648
4649 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4650 return true;
4651
4652 if (flags & PIN_OFFSET_BIAS &&
4653 vma->node.start < (flags & PIN_OFFSET_MASK))
4654 return true;
4655
4656 return false;
4657 }
4658
4659 int
4660 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4661 struct i915_address_space *vm,
4662 uint32_t alignment,
4663 uint64_t flags)
4664 {
4665 struct i915_vma *vma;
4666 int ret;
4667
4668 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4669 return -EINVAL;
4670
4671 vma = i915_gem_obj_to_vma(obj, vm);
4672 if (vma) {
4673 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4674 return -EBUSY;
4675
4676 if (i915_vma_misplaced(vma, alignment, flags)) {
4677 WARN(vma->pin_count,
4678 "bo is already pinned with incorrect alignment:"
4679 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
4680 " obj->map_and_fenceable=%d\n",
4681 i915_gem_obj_offset(obj, vm), alignment,
4682 !!(flags & PIN_MAPPABLE),
4683 obj->map_and_fenceable);
4684 ret = i915_vma_unbind(vma);
4685 if (ret)
4686 return ret;
4687
4688 vma = NULL;
4689 }
4690 }
4691
4692 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4693 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
4694 if (IS_ERR(vma))
4695 return PTR_ERR(vma);
4696 }
4697
4698 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
4699 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
4700
4701 vma->pin_count++;
4702 if (flags & PIN_MAPPABLE)
4703 obj->pin_mappable |= true;
4704
4705 return 0;
4706 }
4707
4708 void
4709 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
4710 {
4711 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4712
4713 BUG_ON(!vma);
4714 BUG_ON(vma->pin_count == 0);
4715 BUG_ON(!i915_gem_obj_ggtt_bound(obj));
4716
4717 if (--vma->pin_count == 0)
4718 obj->pin_mappable = false;
4719 }
4720
4721 int
4722 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4723 struct drm_file *file)
4724 {
4725 struct drm_i915_gem_pin *args = data;
4726 struct drm_gem_object *gobj;
4727 struct drm_i915_gem_object *obj;
4728 int ret;
4729
4730 if (INTEL_INFO(dev)->gen >= 6)
4731 return -ENODEV;
4732
4733 ret = i915_mutex_lock_interruptible(dev);
4734 if (ret)
4735 return ret;
4736
4737 gobj = drm_gem_object_lookup(dev, file, args->handle);
4738 if (gobj == NULL) {
4739 ret = -ENOENT;
4740 goto unlock;
4741 }
4742 obj = to_intel_bo(gobj);
4743
4744 if (obj->madv != I915_MADV_WILLNEED) {
4745 DRM_DEBUG("Attempting to pin a purgeable buffer\n");
4746 ret = -EFAULT;
4747 goto out;
4748 }
4749
4750 if (obj->pin_filp != NULL && obj->pin_filp != file) {
4751 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n",
4752 args->handle);
4753 ret = -EINVAL;
4754 goto out;
4755 }
4756
4757 if (obj->user_pin_count == ULONG_MAX) {
4758 ret = -EBUSY;
4759 goto out;
4760 }
4761
4762 if (obj->user_pin_count == 0) {
4763 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
4764 if (ret)
4765 goto out;
4766 }
4767
4768 obj->user_pin_count++;
4769 obj->pin_filp = file;
4770
4771 args->offset = i915_gem_obj_ggtt_offset(obj);
4772 out:
4773 drm_gem_object_unreference(&obj->base);
4774 unlock:
4775 mutex_unlock(&dev->struct_mutex);
4776 return ret;
4777 }
4778
4779 int
4780 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4781 struct drm_file *file)
4782 {
4783 struct drm_i915_gem_pin *args = data;
4784 struct drm_gem_object *gobj;
4785 struct drm_i915_gem_object *obj;
4786 int ret;
4787
4788 ret = i915_mutex_lock_interruptible(dev);
4789 if (ret)
4790 return ret;
4791
4792 gobj = drm_gem_object_lookup(dev, file, args->handle);
4793 if (gobj == NULL) {
4794 ret = -ENOENT;
4795 goto unlock;
4796 }
4797 obj = to_intel_bo(gobj);
4798
4799 if (obj->pin_filp != file) {
4800 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4801 args->handle);
4802 ret = -EINVAL;
4803 goto out;
4804 }
4805 obj->user_pin_count--;
4806 if (obj->user_pin_count == 0) {
4807 obj->pin_filp = NULL;
4808 i915_gem_object_ggtt_unpin(obj);
4809 }
4810
4811 out:
4812 drm_gem_object_unreference(&obj->base);
4813 unlock:
4814 mutex_unlock(&dev->struct_mutex);
4815 return ret;
4816 }
4817
4818 int
4819 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4820 struct drm_file *file)
4821 {
4822 struct drm_i915_gem_busy *args = data;
4823 struct drm_gem_object *gobj;
4824 struct drm_i915_gem_object *obj;
4825 int ret;
4826
4827 ret = i915_mutex_lock_interruptible(dev);
4828 if (ret)
4829 return ret;
4830
4831 gobj = drm_gem_object_lookup(dev, file, args->handle);
4832 if (gobj == NULL) {
4833 ret = -ENOENT;
4834 goto unlock;
4835 }
4836 obj = to_intel_bo(gobj);
4837
4838 /* Count all active objects as busy, even if they are currently not used
4839 * by the gpu. Users of this interface expect objects to eventually
4840 * become non-busy without any further actions, therefore emit any
4841 * necessary flushes here.
4842 */
4843 ret = i915_gem_object_flush_active(obj);
4844
4845 args->busy = obj->active;
4846 if (obj->ring) {
4847 BUILD_BUG_ON(I915_NUM_RINGS > 16);
4848 args->busy |= intel_ring_flag(obj->ring) << 16;
4849 }
4850
4851 drm_gem_object_unreference(&obj->base);
4852 unlock:
4853 mutex_unlock(&dev->struct_mutex);
4854 return ret;
4855 }
4856
4857 int
4858 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4859 struct drm_file *file_priv)
4860 {
4861 return i915_gem_ring_throttle(dev, file_priv);
4862 }
4863
4864 int
4865 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4866 struct drm_file *file_priv)
4867 {
4868 struct drm_i915_gem_madvise *args = data;
4869 struct drm_gem_object *gobj;
4870 struct drm_i915_gem_object *obj;
4871 int ret;
4872
4873 switch (args->madv) {
4874 case I915_MADV_DONTNEED:
4875 case I915_MADV_WILLNEED:
4876 break;
4877 default:
4878 return -EINVAL;
4879 }
4880
4881 ret = i915_mutex_lock_interruptible(dev);
4882 if (ret)
4883 return ret;
4884
4885 gobj = drm_gem_object_lookup(dev, file_priv, args->handle);
4886 if (gobj == NULL) {
4887 ret = -ENOENT;
4888 goto unlock;
4889 }
4890 obj = to_intel_bo(gobj);
4891
4892 if (i915_gem_obj_is_pinned(obj)) {
4893 ret = -EINVAL;
4894 goto out;
4895 }
4896
4897 if (obj->madv != __I915_MADV_PURGED)
4898 obj->madv = args->madv;
4899
4900 /* if the object is no longer attached, discard its backing storage */
4901 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
4902 i915_gem_object_truncate(obj);
4903
4904 args->retained = obj->madv != __I915_MADV_PURGED;
4905
4906 out:
4907 drm_gem_object_unreference(&obj->base);
4908 unlock:
4909 mutex_unlock(&dev->struct_mutex);
4910 return ret;
4911 }
4912
4913 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4914 const struct drm_i915_gem_object_ops *ops)
4915 {
4916 INIT_LIST_HEAD(&obj->global_list);
4917 INIT_LIST_HEAD(&obj->ring_list);
4918 INIT_LIST_HEAD(&obj->obj_exec_link);
4919 INIT_LIST_HEAD(&obj->vma_list);
4920
4921 obj->ops = ops;
4922
4923 obj->fence_reg = I915_FENCE_REG_NONE;
4924 obj->madv = I915_MADV_WILLNEED;
4925 /* Avoid an unnecessary call to unbind on the first bind. */
4926 obj->map_and_fenceable = true;
4927
4928 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4929 }
4930
4931 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4932 .get_pages = i915_gem_object_get_pages_gtt,
4933 .put_pages = i915_gem_object_put_pages_gtt,
4934 };
4935
4936 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4937 size_t size)
4938 {
4939 #ifdef __NetBSD__
4940 struct drm_i915_private *const dev_priv = dev->dev_private;
4941 #endif
4942 struct drm_i915_gem_object *obj;
4943 #ifndef __NetBSD__
4944 struct address_space *mapping;
4945 gfp_t mask;
4946 #endif
4947
4948 obj = i915_gem_object_alloc(dev);
4949 if (obj == NULL)
4950 return NULL;
4951
4952 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4953 i915_gem_object_free(obj);
4954 return NULL;
4955 }
4956
4957 #ifdef __NetBSD__
4958 uao_set_pgfl(obj->base.gemo_shm_uao, dev_priv->gtt.pgfl);
4959 #else
4960 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4961 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4962 /* 965gm cannot relocate objects above 4GiB. */
4963 mask &= ~__GFP_HIGHMEM;
4964 mask |= __GFP_DMA32;
4965 }
4966
4967 mapping = file_inode(obj->base.filp)->i_mapping;
4968 mapping_set_gfp_mask(mapping, mask);
4969 #endif
4970
4971 i915_gem_object_init(obj, &i915_gem_object_ops);
4972
4973 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4974 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4975
4976 if (HAS_LLC(dev)) {
4977 /* On some devices, we can have the GPU use the LLC (the CPU
4978 * cache) for about a 10% performance improvement
4979 * compared to uncached. Graphics requests other than
4980 * display scanout are coherent with the CPU in
4981 * accessing this cache. This means in this mode we
4982 * don't need to clflush on the CPU side, and on the
4983 * GPU side we only need to flush internal caches to
4984 * get data visible to the CPU.
4985 *
4986 * However, we maintain the display planes as UC, and so
4987 * need to rebind when first used as such.
4988 */
4989 obj->cache_level = I915_CACHE_LLC;
4990 } else
4991 obj->cache_level = I915_CACHE_NONE;
4992
4993 trace_i915_gem_object_create(obj);
4994
4995 return obj;
4996 }
4997
4998 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4999 {
5000 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
5001 struct drm_device *dev = obj->base.dev;
5002 struct drm_i915_private *dev_priv = dev->dev_private;
5003 struct i915_vma *vma, *next;
5004
5005 intel_runtime_pm_get(dev_priv);
5006
5007 trace_i915_gem_object_destroy(obj);
5008
5009 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
5010 int ret;
5011
5012 vma->pin_count = 0;
5013 ret = i915_vma_unbind(vma);
5014 if (WARN_ON(ret == -ERESTARTSYS)) {
5015 bool was_interruptible;
5016
5017 was_interruptible = dev_priv->mm.interruptible;
5018 dev_priv->mm.interruptible = false;
5019
5020 WARN_ON(i915_vma_unbind(vma));
5021
5022 dev_priv->mm.interruptible = was_interruptible;
5023 }
5024 }
5025
5026 i915_gem_object_detach_phys(obj);
5027
5028 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
5029 * before progressing. */
5030 if (obj->stolen)
5031 i915_gem_object_unpin_pages(obj);
5032
5033 if (WARN_ON(obj->pages_pin_count))
5034 obj->pages_pin_count = 0;
5035 i915_gem_object_put_pages(obj);
5036 i915_gem_object_free_mmap_offset(obj);
5037 i915_gem_object_release_stolen(obj);
5038
5039 BUG_ON(obj->pages);
5040
5041 #ifndef __NetBSD__ /* XXX drm prime */
5042 if (obj->base.import_attach)
5043 drm_prime_gem_destroy(&obj->base, NULL);
5044 #endif
5045
5046 drm_gem_object_release(&obj->base);
5047 i915_gem_info_remove_obj(dev_priv, obj->base.size);
5048
5049 kfree(obj->bit_17);
5050 i915_gem_object_free(obj);
5051
5052 intel_runtime_pm_put(dev_priv);
5053 }
5054
5055 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
5056 struct i915_address_space *vm)
5057 {
5058 struct i915_vma *vma;
5059 list_for_each_entry(vma, &obj->vma_list, vma_link)
5060 if (vma->vm == vm)
5061 return vma;
5062
5063 return NULL;
5064 }
5065
5066 void i915_gem_vma_destroy(struct i915_vma *vma)
5067 {
5068 WARN_ON(vma->node.allocated);
5069
5070 /* Keep the vma as a placeholder in the execbuffer reservation lists */
5071 if (!list_empty(&vma->exec_list))
5072 return;
5073
5074 list_del(&vma->vma_link);
5075
5076 kfree(vma);
5077 }
5078
5079 int
5080 i915_gem_suspend(struct drm_device *dev)
5081 {
5082 struct drm_i915_private *dev_priv = dev->dev_private;
5083 int ret = 0;
5084
5085 mutex_lock(&dev->struct_mutex);
5086 if (dev_priv->ums.mm_suspended)
5087 goto err;
5088
5089 ret = i915_gpu_idle(dev);
5090 if (ret)
5091 goto err;
5092
5093 i915_gem_retire_requests(dev);
5094
5095 /* Under UMS, be paranoid and evict. */
5096 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5097 i915_gem_evict_everything(dev);
5098
5099 i915_kernel_lost_context(dev);
5100 i915_gem_cleanup_ringbuffer(dev);
5101
5102 /* Hack! Don't let anybody do execbuf while we don't control the chip.
5103 * We need to replace this with a semaphore, or something.
5104 * And not confound ums.mm_suspended!
5105 */
5106 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev,
5107 DRIVER_MODESET);
5108 mutex_unlock(&dev->struct_mutex);
5109
5110 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
5111 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5112 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
5113
5114 return 0;
5115
5116 err:
5117 mutex_unlock(&dev->struct_mutex);
5118 return ret;
5119 }
5120
5121 int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
5122 {
5123 struct drm_device *dev = ring->dev;
5124 struct drm_i915_private *dev_priv = dev->dev_private;
5125 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
5126 u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
5127 int i, ret;
5128
5129 if (!HAS_L3_DPF(dev) || !remap_info)
5130 return 0;
5131
5132 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
5133 if (ret)
5134 return ret;
5135
5136 /*
5137 * Note: We do not worry about the concurrent register cacheline hang
5138 * here because no other code should access these registers other than
5139 * at initialization time.
5140 */
5141 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
5142 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
5143 intel_ring_emit(ring, reg_base + i);
5144 intel_ring_emit(ring, remap_info[i/4]);
5145 }
5146
5147 intel_ring_advance(ring);
5148
5149 return ret;
5150 }
5151
5152 void i915_gem_init_swizzling(struct drm_device *dev)
5153 {
5154 struct drm_i915_private *dev_priv = dev->dev_private;
5155
5156 if (INTEL_INFO(dev)->gen < 5 ||
5157 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5158 return;
5159
5160 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5161 DISP_TILE_SURFACE_SWIZZLING);
5162
5163 if (IS_GEN5(dev))
5164 return;
5165
5166 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5167 if (IS_GEN6(dev))
5168 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5169 else if (IS_GEN7(dev))
5170 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5171 else if (IS_GEN8(dev))
5172 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5173 else
5174 BUG();
5175 }
5176
5177 static bool
5178 intel_enable_blt(struct drm_device *dev)
5179 {
5180 if (!HAS_BLT(dev))
5181 return false;
5182
5183 /* The blitter was dysfunctional on early prototypes */
5184 if (IS_GEN6(dev) && dev->pdev->revision < 8) {
5185 DRM_INFO("BLT not supported on this pre-production hardware;"
5186 " graphics performance will be degraded.\n");
5187 return false;
5188 }
5189
5190 return true;
5191 }
5192
5193 static int i915_gem_init_rings(struct drm_device *dev)
5194 {
5195 struct drm_i915_private *dev_priv = dev->dev_private;
5196 int ret;
5197
5198 ret = intel_init_render_ring_buffer(dev);
5199 if (ret)
5200 return ret;
5201
5202 if (HAS_BSD(dev)) {
5203 ret = intel_init_bsd_ring_buffer(dev);
5204 if (ret)
5205 goto cleanup_render_ring;
5206 }
5207
5208 if (intel_enable_blt(dev)) {
5209 ret = intel_init_blt_ring_buffer(dev);
5210 if (ret)
5211 goto cleanup_bsd_ring;
5212 }
5213
5214 if (HAS_VEBOX(dev)) {
5215 ret = intel_init_vebox_ring_buffer(dev);
5216 if (ret)
5217 goto cleanup_blt_ring;
5218 }
5219
5220
5221 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
5222 if (ret)
5223 goto cleanup_vebox_ring;
5224
5225 return 0;
5226
5227 cleanup_vebox_ring:
5228 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
5229 cleanup_blt_ring:
5230 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
5231 cleanup_bsd_ring:
5232 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
5233 cleanup_render_ring:
5234 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
5235
5236 return ret;
5237 }
5238
5239 int
5240 i915_gem_init_hw(struct drm_device *dev)
5241 {
5242 struct drm_i915_private *dev_priv = dev->dev_private;
5243 int ret, i;
5244
5245 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
5246 return -EIO;
5247
5248 if (dev_priv->ellc_size)
5249 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5250
5251 if (IS_HASWELL(dev))
5252 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
5253 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5254
5255 if (HAS_PCH_NOP(dev)) {
5256 if (IS_IVYBRIDGE(dev)) {
5257 u32 temp = I915_READ(GEN7_MSG_CTL);
5258 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5259 I915_WRITE(GEN7_MSG_CTL, temp);
5260 } else if (INTEL_INFO(dev)->gen >= 7) {
5261 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5262 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5263 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5264 }
5265 }
5266
5267 i915_gem_init_swizzling(dev);
5268
5269 ret = i915_gem_init_rings(dev);
5270 if (ret)
5271 return ret;
5272
5273 for (i = 0; i < NUM_L3_SLICES(dev); i++)
5274 i915_gem_l3_remap(&dev_priv->ring[RCS], i);
5275
5276 /*
5277 * XXX: Contexts should only be initialized once. Doing a switch to the
5278 * default context switch however is something we'd like to do after
5279 * reset or thaw (the latter may not actually be necessary for HW, but
5280 * goes with our code better). Context switching requires rings (for
5281 * the do_switch), but before enabling PPGTT. So don't move this.
5282 */
5283 ret = i915_gem_context_enable(dev_priv);
5284 if (ret) {
5285 DRM_ERROR("Context enable failed %d\n", ret);
5286 goto err_out;
5287 }
5288
5289 return 0;
5290
5291 err_out:
5292 i915_gem_cleanup_ringbuffer(dev);
5293 return ret;
5294 }
5295
5296 int i915_gem_init(struct drm_device *dev)
5297 {
5298 struct drm_i915_private *dev_priv = dev->dev_private;
5299 int ret;
5300
5301 mutex_lock(&dev->struct_mutex);
5302
5303 if (IS_VALLEYVIEW(dev)) {
5304 /* VLVA0 (potential hack), BIOS isn't actually waking us */
5305 I915_WRITE(VLV_GTLC_WAKE_CTRL, 1);
5306 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10))
5307 DRM_DEBUG_DRIVER("allow wake ack timed out\n");
5308 }
5309 i915_gem_init_global_gtt(dev);
5310
5311 ret = i915_gem_context_init(dev);
5312 if (ret) {
5313 mutex_unlock(&dev->struct_mutex);
5314 return ret;
5315 }
5316
5317 ret = i915_gem_init_hw(dev);
5318 mutex_unlock(&dev->struct_mutex);
5319 if (ret) {
5320 WARN_ON(dev_priv->mm.aliasing_ppgtt);
5321 i915_gem_context_fini(dev);
5322 drm_mm_takedown(&dev_priv->gtt.base.mm);
5323 return ret;
5324 }
5325
5326 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
5327 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5328 dev_priv->dri1.allow_batchbuffer = 1;
5329 return 0;
5330 }
5331
5332 void
5333 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
5334 {
5335 struct drm_i915_private *dev_priv = dev->dev_private;
5336 struct intel_ring_buffer *ring;
5337 int i;
5338
5339 for_each_ring(ring, dev_priv, i)
5340 intel_cleanup_ring_buffer(ring);
5341 }
5342
5343 int
5344 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
5345 struct drm_file *file_priv)
5346 {
5347 struct drm_i915_private *dev_priv = dev->dev_private;
5348 int ret;
5349
5350 if (drm_core_check_feature(dev, DRIVER_MODESET))
5351 return 0;
5352
5353 if (i915_reset_in_progress(&dev_priv->gpu_error)) {
5354 DRM_ERROR("Reenabling wedged hardware, good luck\n");
5355 atomic_set(&dev_priv->gpu_error.reset_counter, 0);
5356 }
5357
5358 mutex_lock(&dev->struct_mutex);
5359 dev_priv->ums.mm_suspended = 0;
5360
5361 ret = i915_gem_init_hw(dev);
5362 if (ret != 0) {
5363 mutex_unlock(&dev->struct_mutex);
5364 return ret;
5365 }
5366
5367 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
5368 mutex_unlock(&dev->struct_mutex);
5369
5370 ret = drm_irq_install(dev);
5371 if (ret)
5372 goto cleanup_ringbuffer;
5373
5374 return 0;
5375
5376 cleanup_ringbuffer:
5377 mutex_lock(&dev->struct_mutex);
5378 i915_gem_cleanup_ringbuffer(dev);
5379 dev_priv->ums.mm_suspended = 1;
5380 mutex_unlock(&dev->struct_mutex);
5381
5382 return ret;
5383 }
5384
5385 int
5386 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
5387 struct drm_file *file_priv)
5388 {
5389 if (drm_core_check_feature(dev, DRIVER_MODESET))
5390 return 0;
5391
5392 drm_irq_uninstall(dev);
5393
5394 return i915_gem_suspend(dev);
5395 }
5396
5397 void
5398 i915_gem_lastclose(struct drm_device *dev)
5399 {
5400 int ret;
5401
5402 if (drm_core_check_feature(dev, DRIVER_MODESET))
5403 return;
5404
5405 ret = i915_gem_suspend(dev);
5406 if (ret)
5407 DRM_ERROR("failed to idle hardware: %d\n", ret);
5408 }
5409
5410 static void
5411 init_ring_lists(struct intel_ring_buffer *ring)
5412 {
5413 INIT_LIST_HEAD(&ring->active_list);
5414 INIT_LIST_HEAD(&ring->request_list);
5415 }
5416
5417 void i915_init_vm(struct drm_i915_private *dev_priv,
5418 struct i915_address_space *vm)
5419 {
5420 if (!i915_is_ggtt(vm))
5421 drm_mm_init(&vm->mm, vm->start, vm->total);
5422 vm->dev = dev_priv->dev;
5423 INIT_LIST_HEAD(&vm->active_list);
5424 INIT_LIST_HEAD(&vm->inactive_list);
5425 INIT_LIST_HEAD(&vm->global_link);
5426 list_add_tail(&vm->global_link, &dev_priv->vm_list);
5427 }
5428
5429 void
5430 i915_gem_load(struct drm_device *dev)
5431 {
5432 struct drm_i915_private *dev_priv = dev->dev_private;
5433 int i;
5434
5435 dev_priv->slab =
5436 kmem_cache_create("i915_gem_object",
5437 sizeof(struct drm_i915_gem_object), 0,
5438 SLAB_HWCACHE_ALIGN,
5439 NULL);
5440
5441 INIT_LIST_HEAD(&dev_priv->vm_list);
5442 i915_init_vm(dev_priv, &dev_priv->gtt.base);
5443
5444 INIT_LIST_HEAD(&dev_priv->context_list);
5445 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5446 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5447 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5448 for (i = 0; i < I915_NUM_RINGS; i++)
5449 init_ring_lists(&dev_priv->ring[i]);
5450 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5451 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5452 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5453 i915_gem_retire_work_handler);
5454 INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5455 i915_gem_idle_work_handler);
5456 #ifdef __NetBSD__
5457 spin_lock_init(&dev_priv->gpu_error.reset_lock);
5458 DRM_INIT_WAITQUEUE(&dev_priv->gpu_error.reset_queue, "i915errst");
5459 #else
5460 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5461 #endif
5462
5463 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
5464 if (IS_GEN3(dev)) {
5465 I915_WRITE(MI_ARB_STATE,
5466 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
5467 }
5468
5469 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5470
5471 /* Old X drivers will take 0-2 for front, back, depth buffers */
5472 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5473 dev_priv->fence_reg_start = 3;
5474
5475 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
5476 dev_priv->num_fence_regs = 32;
5477 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
5478 dev_priv->num_fence_regs = 16;
5479 else
5480 dev_priv->num_fence_regs = 8;
5481
5482 /* Initialize fence registers to zero */
5483 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5484 i915_gem_restore_fences(dev);
5485
5486 i915_gem_detect_bit_6_swizzle(dev);
5487 #ifdef __NetBSD__
5488 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip");
5489 spin_lock_init(&dev_priv->pending_flip_lock);
5490 #else
5491 init_waitqueue_head(&dev_priv->pending_flip_queue);
5492 #endif
5493
5494 dev_priv->mm.interruptible = true;
5495
5496 dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
5497 dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
5498 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
5499 register_shrinker(&dev_priv->mm.inactive_shrinker);
5500 }
5501
5502 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5503 {
5504 struct drm_i915_file_private *file_priv = file->driver_priv;
5505
5506 cancel_delayed_work_sync(&file_priv->mm.idle_work);
5507
5508 /* Clean up our request list when the client is going away, so that
5509 * later retire_requests won't dereference our soon-to-be-gone
5510 * file_priv.
5511 */
5512 spin_lock(&file_priv->mm.lock);
5513 while (!list_empty(&file_priv->mm.request_list)) {
5514 struct drm_i915_gem_request *request;
5515
5516 request = list_first_entry(&file_priv->mm.request_list,
5517 struct drm_i915_gem_request,
5518 client_list);
5519 list_del(&request->client_list);
5520 request->file_priv = NULL;
5521 }
5522 spin_unlock(&file_priv->mm.lock);
5523 }
5524
5525 static void
5526 i915_gem_file_idle_work_handler(struct work_struct *work)
5527 {
5528 struct drm_i915_file_private *file_priv =
5529 container_of(work, typeof(*file_priv), mm.idle_work.work);
5530
5531 atomic_set(&file_priv->rps_wait_boost, false);
5532 }
5533
5534 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5535 {
5536 struct drm_i915_file_private *file_priv;
5537 int ret;
5538
5539 DRM_DEBUG_DRIVER("\n");
5540
5541 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5542 if (!file_priv)
5543 return -ENOMEM;
5544
5545 file->driver_priv = file_priv;
5546 file_priv->dev_priv = dev->dev_private;
5547 file_priv->file = file;
5548
5549 spin_lock_init(&file_priv->mm.lock);
5550 INIT_LIST_HEAD(&file_priv->mm.request_list);
5551 INIT_DELAYED_WORK(&file_priv->mm.idle_work,
5552 i915_gem_file_idle_work_handler);
5553
5554 ret = i915_gem_context_open(dev, file);
5555 if (ret)
5556 kfree(file_priv);
5557
5558 return ret;
5559 }
5560
5561 #ifndef __NetBSD__
5562 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
5563 {
5564 if (!mutex_is_locked(mutex))
5565 return false;
5566
5567 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
5568 return mutex->owner == task;
5569 #else
5570 /* Since UP may be pre-empted, we cannot assume that we own the lock */
5571 return false;
5572 #endif
5573 }
5574 #endif
5575
5576 static unsigned long
5577 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
5578 {
5579 #ifdef __NetBSD__ /* XXX shrinkers */
5580 return 0;
5581 #else
5582 struct drm_i915_private *dev_priv =
5583 container_of(shrinker,
5584 struct drm_i915_private,
5585 mm.inactive_shrinker);
5586 struct drm_device *dev = dev_priv->dev;
5587 struct drm_i915_gem_object *obj;
5588 bool unlock = true;
5589 unsigned long count;
5590
5591 if (!mutex_trylock(&dev->struct_mutex)) {
5592 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5593 return 0;
5594
5595 if (dev_priv->mm.shrinker_no_lock_stealing)
5596 return 0;
5597
5598 unlock = false;
5599 }
5600
5601 count = 0;
5602 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
5603 if (obj->pages_pin_count == 0)
5604 count += obj->base.size >> PAGE_SHIFT;
5605
5606 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
5607 if (obj->active)
5608 continue;
5609
5610 if (!i915_gem_obj_is_pinned(obj) && obj->pages_pin_count == 0)
5611 count += obj->base.size >> PAGE_SHIFT;
5612 }
5613
5614 if (unlock)
5615 mutex_unlock(&dev->struct_mutex);
5616
5617 return count;
5618 #endif
5619 }
5620
5621 /* All the new VM stuff */
5622 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
5623 struct i915_address_space *vm)
5624 {
5625 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5626 struct i915_vma *vma;
5627
5628 if (!dev_priv->mm.aliasing_ppgtt ||
5629 vm == &dev_priv->mm.aliasing_ppgtt->base)
5630 vm = &dev_priv->gtt.base;
5631
5632 BUG_ON(list_empty(&o->vma_list));
5633 list_for_each_entry(vma, &o->vma_list, vma_link) {
5634 if (vma->vm == vm)
5635 return vma->node.start;
5636
5637 }
5638 return -1;
5639 }
5640
5641 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5642 struct i915_address_space *vm)
5643 {
5644 struct i915_vma *vma;
5645
5646 list_for_each_entry(vma, &o->vma_list, vma_link)
5647 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5648 return true;
5649
5650 return false;
5651 }
5652
5653 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5654 {
5655 struct i915_vma *vma;
5656
5657 list_for_each_entry(vma, &o->vma_list, vma_link)
5658 if (drm_mm_node_allocated(&vma->node))
5659 return true;
5660
5661 return false;
5662 }
5663
5664 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5665 struct i915_address_space *vm)
5666 {
5667 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5668 struct i915_vma *vma;
5669
5670 if (!dev_priv->mm.aliasing_ppgtt ||
5671 vm == &dev_priv->mm.aliasing_ppgtt->base)
5672 vm = &dev_priv->gtt.base;
5673
5674 BUG_ON(list_empty(&o->vma_list));
5675
5676 list_for_each_entry(vma, &o->vma_list, vma_link)
5677 if (vma->vm == vm)
5678 return vma->node.size;
5679
5680 return 0;
5681 }
5682
5683 static unsigned long
5684 i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
5685 {
5686 #ifdef __NetBSD__ /* XXX shrinkers */
5687 return 0;
5688 #else
5689 struct drm_i915_private *dev_priv =
5690 container_of(shrinker,
5691 struct drm_i915_private,
5692 mm.inactive_shrinker);
5693 struct drm_device *dev = dev_priv->dev;
5694 unsigned long freed;
5695 bool unlock = true;
5696
5697 if (!mutex_trylock(&dev->struct_mutex)) {
5698 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5699 return SHRINK_STOP;
5700
5701 if (dev_priv->mm.shrinker_no_lock_stealing)
5702 return SHRINK_STOP;
5703
5704 unlock = false;
5705 }
5706
5707 freed = i915_gem_purge(dev_priv, sc->nr_to_scan);
5708 if (freed < sc->nr_to_scan)
5709 freed += __i915_gem_shrink(dev_priv,
5710 sc->nr_to_scan - freed,
5711 false);
5712 if (freed < sc->nr_to_scan)
5713 freed += i915_gem_shrink_all(dev_priv);
5714
5715 if (unlock)
5716 mutex_unlock(&dev->struct_mutex);
5717
5718 return freed;
5719 #endif
5720 }
5721
5722 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
5723 {
5724 struct i915_vma *vma;
5725
5726 if (WARN_ON(list_empty(&obj->vma_list)))
5727 return NULL;
5728
5729 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
5730 if (vma->vm != obj_to_ggtt(obj))
5731 return NULL;
5732
5733 return vma;
5734 }
5735