i915_gem.c revision 1.14.2.5 1 /*
2 * Copyright 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric (at) anholt.net>
25 *
26 */
27
28 #ifdef __NetBSD__
29 #if 0 /* XXX uvmhist option? */
30 #include "opt_uvmhist.h"
31 #endif
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35
36 #include <uvm/uvm.h>
37 #include <uvm/uvm_extern.h>
38 #include <uvm/uvm_fault.h>
39 #include <uvm/uvm_page.h>
40 #include <uvm/uvm_pmap.h>
41 #include <uvm/uvm_prot.h>
42
43 #include <drm/bus_dma_hacks.h>
44 #endif
45
46 #include <drm/drmP.h>
47 #include <drm/drm_vma_manager.h>
48 #include <drm/i915_drm.h>
49 #include "i915_drv.h"
50 #include "i915_trace.h"
51 #include "intel_drv.h"
52 #include <linux/shmem_fs.h>
53 #include <linux/slab.h>
54 #include <linux/swap.h>
55 #include <linux/pci.h>
56 #include <linux/dma-buf.h>
57 #include <linux/errno.h>
58 #include <linux/time.h>
59 #include <linux/err.h>
60 #include <linux/bitops.h>
61 #include <linux/printk.h>
62 #include <asm/param.h>
63 #include <asm/page.h>
64
65 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
66 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
67 bool force);
68 static __must_check int
69 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
70 bool readonly);
71
72 static void i915_gem_write_fence(struct drm_device *dev, int reg,
73 struct drm_i915_gem_object *obj);
74 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
75 struct drm_i915_fence_reg *fence,
76 bool enable);
77
78 static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
79 struct shrink_control *sc);
80 static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
81 struct shrink_control *sc);
82 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
83 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
84 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
85 static void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
86
87 static bool cpu_cache_is_coherent(struct drm_device *dev,
88 enum i915_cache_level level)
89 {
90 return HAS_LLC(dev) || level != I915_CACHE_NONE;
91 }
92
93 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
94 {
95 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
96 return true;
97
98 return obj->pin_display;
99 }
100
101 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
102 {
103 if (obj->tiling_mode)
104 i915_gem_release_mmap(obj);
105
106 /* As we do not have an associated fence register, we will force
107 * a tiling change if we ever need to acquire one.
108 */
109 obj->fence_dirty = false;
110 obj->fence_reg = I915_FENCE_REG_NONE;
111 }
112
113 /* some bookkeeping */
114 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
115 size_t size)
116 {
117 spin_lock(&dev_priv->mm.object_stat_lock);
118 dev_priv->mm.object_count++;
119 dev_priv->mm.object_memory += size;
120 spin_unlock(&dev_priv->mm.object_stat_lock);
121 }
122
123 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
124 size_t size)
125 {
126 spin_lock(&dev_priv->mm.object_stat_lock);
127 dev_priv->mm.object_count--;
128 dev_priv->mm.object_memory -= size;
129 spin_unlock(&dev_priv->mm.object_stat_lock);
130 }
131
132 static int
133 i915_gem_wait_for_error(struct i915_gpu_error *error)
134 {
135 int ret;
136
137 #define EXIT_COND (!i915_reset_in_progress(error) || \
138 i915_terminally_wedged(error))
139 if (EXIT_COND)
140 return 0;
141
142 /*
143 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
144 * userspace. If it takes that long something really bad is going on and
145 * we should simply try to bail out and fail as gracefully as possible.
146 */
147 #ifdef __NetBSD__
148 spin_lock(&error->reset_lock);
149 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &error->reset_queue, &error->reset_lock,
150 10*HZ, EXIT_COND);
151 spin_unlock(&error->reset_lock);
152 #else
153 ret = wait_event_interruptible_timeout(error->reset_queue,
154 EXIT_COND,
155 10*HZ);
156 #endif
157 if (ret == 0) {
158 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
159 return -EIO;
160 } else if (ret < 0) {
161 return ret;
162 }
163 #undef EXIT_COND
164
165 return 0;
166 }
167
168 int i915_mutex_lock_interruptible(struct drm_device *dev)
169 {
170 struct drm_i915_private *dev_priv = dev->dev_private;
171 int ret;
172
173 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
174 if (ret)
175 return ret;
176
177 ret = mutex_lock_interruptible(&dev->struct_mutex);
178 if (ret)
179 return ret;
180
181 WARN_ON(i915_verify_lists(dev));
182 return 0;
183 }
184
185 static inline bool
186 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
187 {
188 return i915_gem_obj_bound_any(obj) && !obj->active;
189 }
190
191 int
192 i915_gem_init_ioctl(struct drm_device *dev, void *data,
193 struct drm_file *file)
194 {
195 struct drm_i915_private *dev_priv = dev->dev_private;
196 struct drm_i915_gem_init *args = data;
197
198 if (drm_core_check_feature(dev, DRIVER_MODESET))
199 return -ENODEV;
200
201 if (args->gtt_start >= args->gtt_end ||
202 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
203 return -EINVAL;
204
205 /* GEM with user mode setting was never supported on ilk and later. */
206 if (INTEL_INFO(dev)->gen >= 5)
207 return -ENODEV;
208
209 mutex_lock(&dev->struct_mutex);
210 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
211 args->gtt_end);
212 dev_priv->gtt.mappable_end = args->gtt_end;
213 mutex_unlock(&dev->struct_mutex);
214
215 return 0;
216 }
217
218 int
219 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
220 struct drm_file *file)
221 {
222 struct drm_i915_private *dev_priv = dev->dev_private;
223 struct drm_i915_gem_get_aperture *args = data;
224 struct drm_i915_gem_object *obj;
225 size_t pinned;
226
227 pinned = 0;
228 mutex_lock(&dev->struct_mutex);
229 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
230 if (i915_gem_obj_is_pinned(obj))
231 pinned += i915_gem_obj_ggtt_size(obj);
232 mutex_unlock(&dev->struct_mutex);
233
234 args->aper_size = dev_priv->gtt.base.total;
235 args->aper_available_size = args->aper_size - pinned;
236
237 return 0;
238 }
239
240 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
241 {
242 drm_dma_handle_t *phys = obj->phys_handle;
243
244 if (!phys)
245 return;
246
247 if (obj->madv == I915_MADV_WILLNEED) {
248 #ifdef __NetBSD__
249 const char *vaddr = phys->vaddr;
250 unsigned i;
251
252 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
253 struct pglist pages;
254 int error;
255
256 TAILQ_INIT(&pages);
257 error = uvm_obj_wirepages(obj->base.gemo_shm_uao,
258 i*PAGE_SIZE, (i+1)*PAGE_SIZE, &pages);
259 if (error)
260 continue;
261
262 struct vm_page *const vm_page = TAILQ_FIRST(&pages);
263 struct page *const page = container_of(vm_page,
264 struct page, p_vmp);
265 char *const dst = kmap_atomic(page);
266 (void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE);
267 drm_clflush_virt_range(dst, PAGE_SIZE);
268 kunmap_atomic(dst);
269
270 vm_page->flags &= ~PG_CLEAN;
271 /* XXX mark page accessed */
272 uvm_obj_unwirepages(obj->base.gemo_shm_uao,
273 i*PAGE_SIZE, (i+1)*PAGE_SIZE);
274 }
275 #else
276 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
277 char *vaddr = phys->vaddr;
278 int i;
279
280 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
281 struct page *page = shmem_read_mapping_page(mapping, i);
282 if (!IS_ERR(page)) {
283 char *dst = kmap_atomic(page);
284 memcpy(dst, vaddr, PAGE_SIZE);
285 drm_clflush_virt_range(dst, PAGE_SIZE);
286 kunmap_atomic(dst);
287
288 set_page_dirty(page);
289 mark_page_accessed(page);
290 page_cache_release(page);
291 }
292 vaddr += PAGE_SIZE;
293 }
294 #endif
295 i915_gem_chipset_flush(obj->base.dev);
296 }
297
298 #ifndef __NetBSD__
299 #ifdef CONFIG_X86
300 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
301 #endif
302 #endif
303 drm_pci_free(obj->base.dev, phys);
304 obj->phys_handle = NULL;
305 }
306
307 int
308 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
309 int align)
310 {
311 drm_dma_handle_t *phys;
312 #ifndef __NetBSD__
313 struct address_space *mapping;
314 #endif
315 char *vaddr;
316 int i;
317
318 if (obj->phys_handle) {
319 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
320 return -EBUSY;
321
322 return 0;
323 }
324
325 if (obj->madv != I915_MADV_WILLNEED)
326 return -EFAULT;
327
328 #ifdef __NetBSD__
329 if (obj->base.gemo_shm_uao == NULL)
330 return -EINVAL;
331 #else
332 if (obj->base.filp == NULL)
333 return -EINVAL;
334 #endif
335
336 /* create a new object */
337 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
338 if (!phys)
339 return -ENOMEM;
340
341 vaddr = phys->vaddr;
342 #ifndef __NetBSD__
343 #ifdef CONFIG_X86
344 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
345 #endif
346 mapping = file_inode(obj->base.filp)->i_mapping;
347 #endif
348 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
349 struct page *page;
350 char *src;
351
352 #ifdef __NetBSD__
353 struct pglist pages;
354 int ret;
355
356 TAILQ_INIT(&pages);
357
358 /* XXX errno NetBSD->Linux */
359 ret = -uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
360 (i+1)*PAGE_SIZE, &pages);
361 if (ret) {
362 drm_pci_free(obj->base.dev, phys);
363 return ret;
364 }
365 KASSERT(!TAILQ_EMPTY(&pages));
366 page = container_of(TAILQ_FIRST(&pages), struct page, p_vmp);
367 #else
368 page = shmem_read_mapping_page(mapping, i);
369 if (IS_ERR(page)) {
370 #ifdef CONFIG_X86
371 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
372 #endif
373 drm_pci_free(obj->base.dev, phys);
374 return PTR_ERR(page);
375 }
376 #endif /* defined(__NetBSD__) */
377
378 src = kmap_atomic(page);
379 memcpy(vaddr, src, PAGE_SIZE);
380 kunmap_atomic(src);
381
382 #ifndef __NetBSD__
383 mark_page_accessed(page);
384 page_cache_release(page);
385 #endif
386
387 vaddr += PAGE_SIZE;
388 }
389
390 obj->phys_handle = phys;
391 return 0;
392 }
393
394 static int
395 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
396 struct drm_i915_gem_pwrite *args,
397 struct drm_file *file_priv)
398 {
399 struct drm_device *dev = obj->base.dev;
400 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset;
401 char __user *user_data = to_user_ptr(args->data_ptr);
402
403 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
404 unsigned long unwritten;
405
406 /* The physical object once assigned is fixed for the lifetime
407 * of the obj, so we can safely drop the lock and continue
408 * to access vaddr.
409 */
410 mutex_unlock(&dev->struct_mutex);
411 unwritten = copy_from_user(vaddr, user_data, args->size);
412 mutex_lock(&dev->struct_mutex);
413 if (unwritten)
414 return -EFAULT;
415 }
416
417 i915_gem_chipset_flush(dev);
418 return 0;
419 }
420
421 void *i915_gem_object_alloc(struct drm_device *dev)
422 {
423 struct drm_i915_private *dev_priv = dev->dev_private;
424 return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL);
425 }
426
427 void i915_gem_object_free(struct drm_i915_gem_object *obj)
428 {
429 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
430 kmem_cache_free(dev_priv->slab, obj);
431 }
432
433 static int
434 i915_gem_create(struct drm_file *file,
435 struct drm_device *dev,
436 uint64_t size,
437 uint32_t *handle_p)
438 {
439 struct drm_i915_gem_object *obj;
440 int ret;
441 u32 handle;
442
443 size = roundup(size, PAGE_SIZE);
444 if (size == 0)
445 return -EINVAL;
446
447 /* Allocate the new object */
448 obj = i915_gem_alloc_object(dev, size);
449 if (obj == NULL)
450 return -ENOMEM;
451
452 ret = drm_gem_handle_create(file, &obj->base, &handle);
453 /* drop reference from allocate - handle holds it now */
454 drm_gem_object_unreference_unlocked(&obj->base);
455 if (ret)
456 return ret;
457
458 *handle_p = handle;
459 return 0;
460 }
461
462 int
463 i915_gem_dumb_create(struct drm_file *file,
464 struct drm_device *dev,
465 struct drm_mode_create_dumb *args)
466 {
467 /* have to work out size/pitch and return them */
468 #ifdef __NetBSD__ /* ALIGN means something else. */
469 args->pitch = round_up(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
470 #else
471 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
472 #endif
473 args->size = args->pitch * args->height;
474 return i915_gem_create(file, dev,
475 args->size, &args->handle);
476 }
477
478 /**
479 * Creates a new mm object and returns a handle to it.
480 */
481 int
482 i915_gem_create_ioctl(struct drm_device *dev, void *data,
483 struct drm_file *file)
484 {
485 struct drm_i915_gem_create *args = data;
486
487 return i915_gem_create(file, dev,
488 args->size, &args->handle);
489 }
490
491 static inline int
492 __copy_to_user_swizzled(char __user *cpu_vaddr,
493 const char *gpu_vaddr, int gpu_offset,
494 int length)
495 {
496 int ret, cpu_offset = 0;
497
498 while (length > 0) {
499 #ifdef __NetBSD__ /* XXX ALIGN means something else. */
500 int cacheline_end = round_up(gpu_offset + 1, 64);
501 #else
502 int cacheline_end = ALIGN(gpu_offset + 1, 64);
503 #endif
504 int this_length = min(cacheline_end - gpu_offset, length);
505 int swizzled_gpu_offset = gpu_offset ^ 64;
506
507 ret = __copy_to_user(cpu_vaddr + cpu_offset,
508 gpu_vaddr + swizzled_gpu_offset,
509 this_length);
510 if (ret)
511 return ret + length;
512
513 cpu_offset += this_length;
514 gpu_offset += this_length;
515 length -= this_length;
516 }
517
518 return 0;
519 }
520
521 static inline int
522 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
523 const char __user *cpu_vaddr,
524 int length)
525 {
526 int ret, cpu_offset = 0;
527
528 while (length > 0) {
529 #ifdef __NetBSD__ /* XXX ALIGN means something else. */
530 int cacheline_end = round_up(gpu_offset + 1, 64);
531 #else
532 int cacheline_end = ALIGN(gpu_offset + 1, 64);
533 #endif
534 int this_length = min(cacheline_end - gpu_offset, length);
535 int swizzled_gpu_offset = gpu_offset ^ 64;
536
537 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
538 cpu_vaddr + cpu_offset,
539 this_length);
540 if (ret)
541 return ret + length;
542
543 cpu_offset += this_length;
544 gpu_offset += this_length;
545 length -= this_length;
546 }
547
548 return 0;
549 }
550
551 /*
552 * Pins the specified object's pages and synchronizes the object with
553 * GPU accesses. Sets needs_clflush to non-zero if the caller should
554 * flush the object from the CPU cache.
555 */
556 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
557 int *needs_clflush)
558 {
559 int ret;
560
561 *needs_clflush = 0;
562
563 #ifdef __NetBSD__
564 if (obj->base.gemo_shm_uao == NULL)
565 return -EINVAL;
566 #else
567 if (!obj->base.filp)
568 return -EINVAL;
569 #endif
570
571 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
572 /* If we're not in the cpu read domain, set ourself into the gtt
573 * read domain and manually flush cachelines (if required). This
574 * optimizes for the case when the gpu will dirty the data
575 * anyway again before the next pread happens. */
576 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
577 obj->cache_level);
578 ret = i915_gem_object_wait_rendering(obj, true);
579 if (ret)
580 return ret;
581 }
582
583 ret = i915_gem_object_get_pages(obj);
584 if (ret)
585 return ret;
586
587 i915_gem_object_pin_pages(obj);
588
589 return ret;
590 }
591
592 /* Per-page copy function for the shmem pread fastpath.
593 * Flushes invalid cachelines before reading the target if
594 * needs_clflush is set. */
595 static int
596 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
597 char __user *user_data,
598 bool page_do_bit17_swizzling, bool needs_clflush)
599 {
600 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
601 return -EFAULT;
602 #else
603 char *vaddr;
604 int ret;
605
606 if (unlikely(page_do_bit17_swizzling))
607 return -EINVAL;
608
609 vaddr = kmap_atomic(page);
610 if (needs_clflush)
611 drm_clflush_virt_range(vaddr + shmem_page_offset,
612 page_length);
613 ret = __copy_to_user_inatomic(user_data,
614 vaddr + shmem_page_offset,
615 page_length);
616 kunmap_atomic(vaddr);
617
618 return ret ? -EFAULT : 0;
619 #endif
620 }
621
622 static void
623 shmem_clflush_swizzled_range(char *addr, unsigned long length,
624 bool swizzled)
625 {
626 if (unlikely(swizzled)) {
627 unsigned long start = (unsigned long) addr;
628 unsigned long end = (unsigned long) addr + length;
629
630 /* For swizzling simply ensure that we always flush both
631 * channels. Lame, but simple and it works. Swizzled
632 * pwrite/pread is far from a hotpath - current userspace
633 * doesn't use it at all. */
634 start = round_down(start, 128);
635 end = round_up(end, 128);
636
637 drm_clflush_virt_range((void *)start, end - start);
638 } else {
639 drm_clflush_virt_range(addr, length);
640 }
641
642 }
643
644 /* Only difference to the fast-path function is that this can handle bit17
645 * and uses non-atomic copy and kmap functions. */
646 static int
647 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
648 char __user *user_data,
649 bool page_do_bit17_swizzling, bool needs_clflush)
650 {
651 char *vaddr;
652 int ret;
653
654 vaddr = kmap(page);
655 if (needs_clflush)
656 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
657 page_length,
658 page_do_bit17_swizzling);
659
660 if (page_do_bit17_swizzling)
661 ret = __copy_to_user_swizzled(user_data,
662 vaddr, shmem_page_offset,
663 page_length);
664 else
665 ret = __copy_to_user(user_data,
666 vaddr + shmem_page_offset,
667 page_length);
668 kunmap(page);
669
670 return ret ? - EFAULT : 0;
671 }
672
673 static int
674 i915_gem_shmem_pread(struct drm_device *dev,
675 struct drm_i915_gem_object *obj,
676 struct drm_i915_gem_pread *args,
677 struct drm_file *file)
678 {
679 char __user *user_data;
680 ssize_t remain;
681 loff_t offset;
682 int shmem_page_offset, page_length, ret = 0;
683 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
684 #ifndef __NetBSD__ /* XXX */
685 int prefaulted = 0;
686 #endif
687 int needs_clflush = 0;
688 #ifndef __NetBSD__
689 struct sg_page_iter sg_iter;
690 #endif
691
692 user_data = to_user_ptr(args->data_ptr);
693 remain = args->size;
694
695 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
696
697 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
698 if (ret)
699 return ret;
700
701 offset = args->offset;
702
703 #ifdef __NetBSD__
704 /*
705 * XXX This is a big #ifdef with a lot of duplicated code, but
706 * factoring out the loop head -- which is all that
707 * substantially differs -- is probably more trouble than it's
708 * worth at the moment.
709 */
710 while (0 < remain) {
711 /* Get the next page. */
712 shmem_page_offset = offset_in_page(offset);
713 KASSERT(shmem_page_offset < PAGE_SIZE);
714 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
715 struct page *const page = i915_gem_object_get_page(obj,
716 atop(offset));
717
718 /* Decide whether to swizzle bit 17. */
719 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
720 (page_to_phys(page) & (1 << 17)) != 0;
721
722 /* Try the fast path. */
723 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
724 user_data, page_do_bit17_swizzling, needs_clflush);
725 if (ret == 0)
726 goto next_page;
727
728 /* Fast path failed. Try the slow path. */
729 mutex_unlock(&dev->struct_mutex);
730 /* XXX prefault */
731 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
732 user_data, page_do_bit17_swizzling, needs_clflush);
733 mutex_lock(&dev->struct_mutex);
734 if (ret)
735 goto out;
736
737 next_page: KASSERT(page_length <= remain);
738 remain -= page_length;
739 user_data += page_length;
740 offset += page_length;
741 }
742 #else
743 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
744 offset >> PAGE_SHIFT) {
745 struct page *page = sg_page_iter_page(&sg_iter);
746
747 if (remain <= 0)
748 break;
749
750 /* Operation in this page
751 *
752 * shmem_page_offset = offset within page in shmem file
753 * page_length = bytes to copy for this page
754 */
755 shmem_page_offset = offset_in_page(offset);
756 page_length = remain;
757 if ((shmem_page_offset + page_length) > PAGE_SIZE)
758 page_length = PAGE_SIZE - shmem_page_offset;
759
760 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
761 (page_to_phys(page) & (1 << 17)) != 0;
762
763 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
764 user_data, page_do_bit17_swizzling,
765 needs_clflush);
766 if (ret == 0)
767 goto next_page;
768
769 mutex_unlock(&dev->struct_mutex);
770
771 if (likely(!i915.prefault_disable) && !prefaulted) {
772 ret = fault_in_multipages_writeable(user_data, remain);
773 /* Userspace is tricking us, but we've already clobbered
774 * its pages with the prefault and promised to write the
775 * data up to the first fault. Hence ignore any errors
776 * and just continue. */
777 (void)ret;
778 prefaulted = 1;
779 }
780
781 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
782 user_data, page_do_bit17_swizzling,
783 needs_clflush);
784
785 mutex_lock(&dev->struct_mutex);
786
787 if (ret)
788 goto out;
789
790 next_page:
791 remain -= page_length;
792 user_data += page_length;
793 offset += page_length;
794 }
795 #endif
796
797 out:
798 i915_gem_object_unpin_pages(obj);
799
800 return ret;
801 }
802
803 /**
804 * Reads data from the object referenced by handle.
805 *
806 * On error, the contents of *data are undefined.
807 */
808 int
809 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
810 struct drm_file *file)
811 {
812 struct drm_i915_gem_pread *args = data;
813 struct drm_gem_object *gobj;
814 struct drm_i915_gem_object *obj;
815 int ret = 0;
816
817 if (args->size == 0)
818 return 0;
819
820 if (!access_ok(VERIFY_WRITE,
821 to_user_ptr(args->data_ptr),
822 args->size))
823 return -EFAULT;
824
825 ret = i915_mutex_lock_interruptible(dev);
826 if (ret)
827 return ret;
828
829 gobj = drm_gem_object_lookup(dev, file, args->handle);
830 if (gobj == NULL) {
831 ret = -ENOENT;
832 goto unlock;
833 }
834 obj = to_intel_bo(gobj);
835
836 /* Bounds check source. */
837 if (args->offset > obj->base.size ||
838 args->size > obj->base.size - args->offset) {
839 ret = -EINVAL;
840 goto out;
841 }
842
843 /* prime objects have no backing filp to GEM pread/pwrite
844 * pages from.
845 */
846 #ifdef __NetBSD__
847 /* Also stolen objects. */
848 if (obj->base.gemo_shm_uao == NULL) {
849 ret = -EINVAL;
850 goto out;
851 }
852 #else
853 if (!obj->base.filp) {
854 ret = -EINVAL;
855 goto out;
856 }
857 #endif
858
859 trace_i915_gem_object_pread(obj, args->offset, args->size);
860
861 ret = i915_gem_shmem_pread(dev, obj, args, file);
862
863 out:
864 drm_gem_object_unreference(&obj->base);
865 unlock:
866 mutex_unlock(&dev->struct_mutex);
867 return ret;
868 }
869
870 /* This is the fast write path which cannot handle
871 * page faults in the source data
872 */
873
874 static inline int
875 fast_user_write(struct io_mapping *mapping,
876 loff_t page_base, int page_offset,
877 char __user *user_data,
878 int length)
879 {
880 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
881 return -EFAULT;
882 #else
883 void __iomem *vaddr_atomic;
884 void *vaddr;
885 unsigned long unwritten;
886
887 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
888 /* We can use the cpu mem copy function because this is X86. */
889 vaddr = (void __force*)vaddr_atomic + page_offset;
890 unwritten = __copy_from_user_inatomic_nocache(vaddr,
891 user_data, length);
892 io_mapping_unmap_atomic(vaddr_atomic);
893 return unwritten;
894 #endif
895 }
896
897 /**
898 * This is the fast pwrite path, where we copy the data directly from the
899 * user into the GTT, uncached.
900 */
901 static int
902 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
903 struct drm_i915_gem_object *obj,
904 struct drm_i915_gem_pwrite *args,
905 struct drm_file *file)
906 {
907 struct drm_i915_private *dev_priv = dev->dev_private;
908 ssize_t remain;
909 loff_t offset, page_base;
910 char __user *user_data;
911 int page_offset, page_length, ret;
912
913 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
914 if (ret)
915 goto out;
916
917 ret = i915_gem_object_set_to_gtt_domain(obj, true);
918 if (ret)
919 goto out_unpin;
920
921 ret = i915_gem_object_put_fence(obj);
922 if (ret)
923 goto out_unpin;
924
925 user_data = to_user_ptr(args->data_ptr);
926 remain = args->size;
927
928 offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
929
930 while (remain > 0) {
931 /* Operation in this page
932 *
933 * page_base = page offset within aperture
934 * page_offset = offset within page
935 * page_length = bytes to copy for this page
936 */
937 page_base = offset & PAGE_MASK;
938 page_offset = offset_in_page(offset);
939 page_length = remain;
940 if ((page_offset + remain) > PAGE_SIZE)
941 page_length = PAGE_SIZE - page_offset;
942
943 /* If we get a fault while copying data, then (presumably) our
944 * source page isn't available. Return the error and we'll
945 * retry in the slow path.
946 */
947 if (fast_user_write(dev_priv->gtt.mappable, page_base,
948 page_offset, user_data, page_length)) {
949 ret = -EFAULT;
950 goto out_unpin;
951 }
952
953 remain -= page_length;
954 user_data += page_length;
955 offset += page_length;
956 }
957
958 out_unpin:
959 i915_gem_object_ggtt_unpin(obj);
960 out:
961 return ret;
962 }
963
964 /* Per-page copy function for the shmem pwrite fastpath.
965 * Flushes invalid cachelines before writing to the target if
966 * needs_clflush_before is set and flushes out any written cachelines after
967 * writing if needs_clflush is set. */
968 static int
969 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
970 char __user *user_data,
971 bool page_do_bit17_swizzling,
972 bool needs_clflush_before,
973 bool needs_clflush_after)
974 {
975 #ifdef __NetBSD__
976 return -EFAULT;
977 #else
978 char *vaddr;
979 int ret;
980
981 if (unlikely(page_do_bit17_swizzling))
982 return -EINVAL;
983
984 vaddr = kmap_atomic(page);
985 if (needs_clflush_before)
986 drm_clflush_virt_range(vaddr + shmem_page_offset,
987 page_length);
988 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
989 user_data, page_length);
990 if (needs_clflush_after)
991 drm_clflush_virt_range(vaddr + shmem_page_offset,
992 page_length);
993 kunmap_atomic(vaddr);
994
995 return ret ? -EFAULT : 0;
996 #endif
997 }
998
999 /* Only difference to the fast-path function is that this can handle bit17
1000 * and uses non-atomic copy and kmap functions. */
1001 static int
1002 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1003 char __user *user_data,
1004 bool page_do_bit17_swizzling,
1005 bool needs_clflush_before,
1006 bool needs_clflush_after)
1007 {
1008 char *vaddr;
1009 int ret;
1010
1011 vaddr = kmap(page);
1012 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1013 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1014 page_length,
1015 page_do_bit17_swizzling);
1016 if (page_do_bit17_swizzling)
1017 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
1018 user_data,
1019 page_length);
1020 else
1021 ret = __copy_from_user(vaddr + shmem_page_offset,
1022 user_data,
1023 page_length);
1024 if (needs_clflush_after)
1025 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1026 page_length,
1027 page_do_bit17_swizzling);
1028 kunmap(page);
1029
1030 return ret ? -EFAULT : 0;
1031 }
1032
1033 static int
1034 i915_gem_shmem_pwrite(struct drm_device *dev,
1035 struct drm_i915_gem_object *obj,
1036 struct drm_i915_gem_pwrite *args,
1037 struct drm_file *file)
1038 {
1039 ssize_t remain;
1040 loff_t offset;
1041 char __user *user_data;
1042 int shmem_page_offset, page_length, ret = 0;
1043 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1044 int hit_slowpath = 0;
1045 int needs_clflush_after = 0;
1046 int needs_clflush_before = 0;
1047 #ifndef __NetBSD__
1048 struct sg_page_iter sg_iter;
1049 #endif
1050
1051 user_data = to_user_ptr(args->data_ptr);
1052 remain = args->size;
1053
1054 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1055
1056 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1057 /* If we're not in the cpu write domain, set ourself into the gtt
1058 * write domain and manually flush cachelines (if required). This
1059 * optimizes for the case when the gpu will use the data
1060 * right away and we therefore have to clflush anyway. */
1061 needs_clflush_after = cpu_write_needs_clflush(obj);
1062 ret = i915_gem_object_wait_rendering(obj, false);
1063 if (ret)
1064 return ret;
1065 }
1066 /* Same trick applies to invalidate partially written cachelines read
1067 * before writing. */
1068 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1069 needs_clflush_before =
1070 !cpu_cache_is_coherent(dev, obj->cache_level);
1071
1072 ret = i915_gem_object_get_pages(obj);
1073 if (ret)
1074 return ret;
1075
1076 i915_gem_object_pin_pages(obj);
1077
1078 offset = args->offset;
1079 obj->dirty = 1;
1080
1081 #ifdef __NetBSD__
1082 while (0 < remain) {
1083 /* Get the next page. */
1084 shmem_page_offset = offset_in_page(offset);
1085 KASSERT(shmem_page_offset < PAGE_SIZE);
1086 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
1087 struct page *const page = i915_gem_object_get_page(obj,
1088 atop(offset));
1089
1090 /* Decide whether to flush the cache or swizzle bit 17. */
1091 const bool partial_cacheline_write = needs_clflush_before &&
1092 ((shmem_page_offset | page_length)
1093 & (cpu_info_primary.ci_cflush_lsize - 1));
1094 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1095 (page_to_phys(page) & (1 << 17)) != 0;
1096
1097 /* Try the fast path. */
1098 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1099 user_data, page_do_bit17_swizzling,
1100 partial_cacheline_write, needs_clflush_after);
1101 if (ret == 0)
1102 goto next_page;
1103
1104 /* Fast path failed. Try the slow path. */
1105 hit_slowpath = 1;
1106 mutex_unlock(&dev->struct_mutex);
1107 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1108 user_data, page_do_bit17_swizzling,
1109 partial_cacheline_write, needs_clflush_after);
1110 mutex_lock(&dev->struct_mutex);
1111 if (ret)
1112 goto out;
1113
1114 next_page: KASSERT(page_length <= remain);
1115 remain -= page_length;
1116 user_data += page_length;
1117 offset += page_length;
1118 }
1119 #else
1120 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1121 offset >> PAGE_SHIFT) {
1122 struct page *page = sg_page_iter_page(&sg_iter);
1123 int partial_cacheline_write;
1124
1125 if (remain <= 0)
1126 break;
1127
1128 /* Operation in this page
1129 *
1130 * shmem_page_offset = offset within page in shmem file
1131 * page_length = bytes to copy for this page
1132 */
1133 shmem_page_offset = offset_in_page(offset);
1134
1135 page_length = remain;
1136 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1137 page_length = PAGE_SIZE - shmem_page_offset;
1138
1139 /* If we don't overwrite a cacheline completely we need to be
1140 * careful to have up-to-date data by first clflushing. Don't
1141 * overcomplicate things and flush the entire patch. */
1142 partial_cacheline_write = needs_clflush_before &&
1143 ((shmem_page_offset | page_length)
1144 & (boot_cpu_data.x86_clflush_size - 1));
1145
1146 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1147 (page_to_phys(page) & (1 << 17)) != 0;
1148
1149 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1150 user_data, page_do_bit17_swizzling,
1151 partial_cacheline_write,
1152 needs_clflush_after);
1153 if (ret == 0)
1154 goto next_page;
1155
1156 hit_slowpath = 1;
1157 mutex_unlock(&dev->struct_mutex);
1158 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1159 user_data, page_do_bit17_swizzling,
1160 partial_cacheline_write,
1161 needs_clflush_after);
1162
1163 mutex_lock(&dev->struct_mutex);
1164
1165 if (ret)
1166 goto out;
1167
1168 next_page:
1169 remain -= page_length;
1170 user_data += page_length;
1171 offset += page_length;
1172 }
1173 #endif
1174
1175 out:
1176 i915_gem_object_unpin_pages(obj);
1177
1178 if (hit_slowpath) {
1179 /*
1180 * Fixup: Flush cpu caches in case we didn't flush the dirty
1181 * cachelines in-line while writing and the object moved
1182 * out of the cpu write domain while we've dropped the lock.
1183 */
1184 if (!needs_clflush_after &&
1185 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1186 if (i915_gem_clflush_object(obj, obj->pin_display))
1187 i915_gem_chipset_flush(dev);
1188 }
1189 }
1190
1191 if (needs_clflush_after)
1192 i915_gem_chipset_flush(dev);
1193
1194 return ret;
1195 }
1196
1197 /**
1198 * Writes data to the object referenced by handle.
1199 *
1200 * On error, the contents of the buffer that were to be modified are undefined.
1201 */
1202 int
1203 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1204 struct drm_file *file)
1205 {
1206 struct drm_i915_gem_pwrite *args = data;
1207 struct drm_gem_object *gobj;
1208 struct drm_i915_gem_object *obj;
1209 int ret;
1210
1211 if (args->size == 0)
1212 return 0;
1213
1214 if (!access_ok(VERIFY_READ,
1215 to_user_ptr(args->data_ptr),
1216 args->size))
1217 return -EFAULT;
1218
1219 #ifndef __NetBSD__ /* XXX prefault */
1220 if (likely(!i915.prefault_disable)) {
1221 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1222 args->size);
1223 if (ret)
1224 return -EFAULT;
1225 }
1226 #endif
1227
1228 ret = i915_mutex_lock_interruptible(dev);
1229 if (ret)
1230 return ret;
1231
1232 gobj = drm_gem_object_lookup(dev, file, args->handle);
1233 if (gobj == NULL) {
1234 ret = -ENOENT;
1235 goto unlock;
1236 }
1237 obj = to_intel_bo(gobj);
1238
1239 /* Bounds check destination. */
1240 if (args->offset > obj->base.size ||
1241 args->size > obj->base.size - args->offset) {
1242 ret = -EINVAL;
1243 goto out;
1244 }
1245
1246 /* prime objects have no backing filp to GEM pread/pwrite
1247 * pages from.
1248 */
1249 #ifdef __NetBSD__
1250 /* Also stolen objects. */
1251 if (obj->base.gemo_shm_uao == NULL) {
1252 ret = -EINVAL;
1253 goto out;
1254 }
1255 #else
1256 if (!obj->base.filp) {
1257 ret = -EINVAL;
1258 goto out;
1259 }
1260 #endif
1261
1262 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1263
1264 ret = -EFAULT;
1265 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1266 * it would end up going through the fenced access, and we'll get
1267 * different detiling behavior between reading and writing.
1268 * pread/pwrite currently are reading and writing from the CPU
1269 * perspective, requiring manual detiling by the client.
1270 */
1271 if (obj->phys_handle) {
1272 ret = i915_gem_phys_pwrite(obj, args, file);
1273 goto out;
1274 }
1275
1276 if (obj->tiling_mode == I915_TILING_NONE &&
1277 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1278 cpu_write_needs_clflush(obj)) {
1279 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1280 /* Note that the gtt paths might fail with non-page-backed user
1281 * pointers (e.g. gtt mappings when moving data between
1282 * textures). Fallback to the shmem path in that case. */
1283 }
1284
1285 if (ret == -EFAULT || ret == -ENOSPC)
1286 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1287
1288 out:
1289 drm_gem_object_unreference(&obj->base);
1290 unlock:
1291 mutex_unlock(&dev->struct_mutex);
1292 return ret;
1293 }
1294
1295 int
1296 i915_gem_check_wedge(struct i915_gpu_error *error,
1297 bool interruptible)
1298 {
1299 if (i915_reset_in_progress(error)) {
1300 /* Non-interruptible callers can't handle -EAGAIN, hence return
1301 * -EIO unconditionally for these. */
1302 if (!interruptible)
1303 return -EIO;
1304
1305 /* Recovery complete, but the reset failed ... */
1306 if (i915_terminally_wedged(error))
1307 return -EIO;
1308
1309 return -EAGAIN;
1310 }
1311
1312 return 0;
1313 }
1314
1315 /*
1316 * Compare seqno against outstanding lazy request. Emit a request if they are
1317 * equal.
1318 */
1319 static int
1320 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1321 {
1322 int ret;
1323
1324 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1325
1326 ret = 0;
1327 if (seqno == ring->outstanding_lazy_seqno)
1328 ret = i915_add_request(ring, NULL);
1329
1330 return ret;
1331 }
1332
1333 #ifndef __NetBSD__
1334 static void fake_irq(unsigned long data)
1335 {
1336 wake_up_process((struct task_struct *)data);
1337 }
1338 #endif
1339
1340 static bool missed_irq(struct drm_i915_private *dev_priv,
1341 struct intel_ring_buffer *ring)
1342 {
1343 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1344 }
1345
1346 static bool can_wait_boost(struct drm_i915_file_private *file_priv)
1347 {
1348 if (file_priv == NULL)
1349 return true;
1350
1351 return !atomic_xchg(&file_priv->rps_wait_boost, true);
1352 }
1353
1354 /**
1355 * __wait_seqno - wait until execution of seqno has finished
1356 * @ring: the ring expected to report seqno
1357 * @seqno: duh!
1358 * @reset_counter: reset sequence associated with the given seqno
1359 * @interruptible: do an interruptible wait (normally yes)
1360 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1361 *
1362 * Note: It is of utmost importance that the passed in seqno and reset_counter
1363 * values have been read by the caller in an smp safe manner. Where read-side
1364 * locks are involved, it is sufficient to read the reset_counter before
1365 * unlocking the lock that protects the seqno. For lockless tricks, the
1366 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1367 * inserted.
1368 *
1369 * Returns 0 if the seqno was found within the alloted time. Else returns the
1370 * errno with remaining time filled in timeout argument.
1371 */
1372 #ifdef __NetBSD__
1373 static int
1374 __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, unsigned reset_counter,
1375 bool interruptible, struct timespec *timeout,
1376 struct drm_i915_file_private *file_priv)
1377 {
1378 struct drm_device *dev = ring->dev;
1379 struct drm_i915_private *dev_priv = dev->dev_private;
1380 bool irq_test_in_progress;
1381 struct timespec before, after;
1382 int ticks;
1383 bool wedged;
1384 int ret;
1385
1386 irq_test_in_progress = (dev_priv->gpu_error.test_irq_rings &
1387 intel_ring_flag(ring));
1388 __insn_barrier();
1389
1390 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1391 return 0;
1392
1393 if (timeout)
1394 ticks = mstohz(timespec_to_ns(timeout) / 1000000);
1395 else
1396 ticks = 1;
1397
1398 if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1399 gen6_rps_boost(dev_priv);
1400 if (file_priv)
1401 mod_delayed_work(dev_priv->wq,
1402 &file_priv->mm.idle_work,
1403 msecs_to_jiffies(100));
1404 }
1405
1406 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1407 return -ENODEV;
1408
1409 nanotime(&before);
1410 spin_lock(&dev_priv->irq_lock);
1411 #define EXIT_COND \
1412 (((reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) \
1413 ? wedged = true : false) || \
1414 i915_seqno_passed(ring->get_seqno(ring, false), \
1415 seqno))
1416
1417 if (timeout) {
1418 /*
1419 * XXX This missed_irq business smells like unlocked
1420 * Linux waitqueue nonsense.
1421 */
1422 if (missed_irq(dev_priv, ring))
1423 ticks = 1;
1424 if (interruptible)
1425 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &ring->irq_queue,
1426 &dev_priv->irq_lock, ticks, EXIT_COND);
1427 else
1428 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1429 &dev_priv->irq_lock, ticks, EXIT_COND);
1430 } else {
1431 if (interruptible)
1432 DRM_SPIN_WAIT_UNTIL(ret, &ring->irq_queue,
1433 &dev_priv->irq_lock, EXIT_COND);
1434 else
1435 DRM_SPIN_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1436 &dev_priv->irq_lock, EXIT_COND);
1437 }
1438 #undef EXIT_COND
1439 spin_unlock(&dev_priv->irq_lock);
1440 nanotime(&after);
1441
1442 if (!irq_test_in_progress)
1443 ring->irq_put(ring);
1444 if (timeout) {
1445 struct timespec slept;
1446
1447 /* Compute slept = after - before. */
1448 timespecsub(&after, &before, &slept);
1449
1450 /*
1451 * Return the time remaining, timeout - slept, if we
1452 * slept for less time than the timeout; or zero if we
1453 * timed out.
1454 */
1455 if (timespeccmp(&slept, timeout, <))
1456 timespecsub(timeout, &slept, timeout);
1457 else
1458 timespecclear(timeout);
1459 }
1460 if (wedged) { /* GPU reset while we were waiting. */
1461 ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1462 interruptible);
1463 if (ret == 0)
1464 ret = -EAGAIN;
1465 }
1466 if (ret < 0) /* Error. */
1467 return ret;
1468 if (ret == 0) /* Seqno didn't pass. */
1469 return -ETIME;
1470 return 0; /* Seqno passed, maybe time to spare. */
1471 }
1472 #else
1473 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1474 unsigned reset_counter,
1475 bool interruptible,
1476 struct timespec *timeout,
1477 struct drm_i915_file_private *file_priv)
1478 {
1479 struct drm_device *dev = ring->dev;
1480 struct drm_i915_private *dev_priv = dev->dev_private;
1481 const bool irq_test_in_progress =
1482 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1483 struct timespec before, now;
1484 DEFINE_WAIT(wait);
1485 unsigned long timeout_expire;
1486 int ret;
1487
1488 WARN(dev_priv->pm.irqs_disabled, "IRQs disabled\n");
1489
1490 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1491 return 0;
1492
1493 timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
1494
1495 if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1496 gen6_rps_boost(dev_priv);
1497 if (file_priv)
1498 mod_delayed_work(dev_priv->wq,
1499 &file_priv->mm.idle_work,
1500 msecs_to_jiffies(100));
1501 }
1502
1503 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1504 return -ENODEV;
1505
1506 /* Record current time in case interrupted by signal, or wedged */
1507 trace_i915_gem_request_wait_begin(ring, seqno);
1508 getrawmonotonic(&before);
1509 for (;;) {
1510 struct timer_list timer;
1511
1512 prepare_to_wait(&ring->irq_queue, &wait,
1513 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
1514
1515 /* We need to check whether any gpu reset happened in between
1516 * the caller grabbing the seqno and now ... */
1517 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1518 /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1519 * is truely gone. */
1520 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1521 if (ret == 0)
1522 ret = -EAGAIN;
1523 break;
1524 }
1525
1526 if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
1527 ret = 0;
1528 break;
1529 }
1530
1531 if (interruptible && signal_pending(current)) {
1532 ret = -ERESTARTSYS;
1533 break;
1534 }
1535
1536 if (timeout && time_after_eq(jiffies, timeout_expire)) {
1537 ret = -ETIME;
1538 break;
1539 }
1540
1541 timer.function = NULL;
1542 if (timeout || missed_irq(dev_priv, ring)) {
1543 unsigned long expire;
1544
1545 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
1546 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1547 mod_timer(&timer, expire);
1548 }
1549
1550 io_schedule();
1551
1552 if (timer.function) {
1553 del_singleshot_timer_sync(&timer);
1554 destroy_timer_on_stack(&timer);
1555 }
1556 }
1557 getrawmonotonic(&now);
1558 trace_i915_gem_request_wait_end(ring, seqno);
1559
1560 if (!irq_test_in_progress)
1561 ring->irq_put(ring);
1562
1563 finish_wait(&ring->irq_queue, &wait);
1564
1565 if (timeout) {
1566 struct timespec sleep_time = timespec_sub(now, before);
1567 *timeout = timespec_sub(*timeout, sleep_time);
1568 if (!timespec_valid(timeout)) /* i.e. negative time remains */
1569 set_normalized_timespec(timeout, 0, 0);
1570 }
1571
1572 return ret;
1573 }
1574 #endif
1575
1576 /**
1577 * Waits for a sequence number to be signaled, and cleans up the
1578 * request and object lists appropriately for that event.
1579 */
1580 int
1581 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1582 {
1583 struct drm_device *dev = ring->dev;
1584 struct drm_i915_private *dev_priv = dev->dev_private;
1585 bool interruptible = dev_priv->mm.interruptible;
1586 int ret;
1587
1588 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1589 BUG_ON(seqno == 0);
1590
1591 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1592 if (ret)
1593 return ret;
1594
1595 ret = i915_gem_check_olr(ring, seqno);
1596 if (ret)
1597 return ret;
1598
1599 return __wait_seqno(ring, seqno,
1600 atomic_read(&dev_priv->gpu_error.reset_counter),
1601 interruptible, NULL, NULL);
1602 }
1603
1604 static int
1605 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
1606 struct intel_ring_buffer *ring)
1607 {
1608 i915_gem_retire_requests_ring(ring);
1609
1610 /* Manually manage the write flush as we may have not yet
1611 * retired the buffer.
1612 *
1613 * Note that the last_write_seqno is always the earlier of
1614 * the two (read/write) seqno, so if we haved successfully waited,
1615 * we know we have passed the last write.
1616 */
1617 obj->last_write_seqno = 0;
1618 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1619
1620 return 0;
1621 }
1622
1623 /**
1624 * Ensures that all rendering to the object has completed and the object is
1625 * safe to unbind from the GTT or access from the CPU.
1626 */
1627 static __must_check int
1628 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1629 bool readonly)
1630 {
1631 struct intel_ring_buffer *ring = obj->ring;
1632 u32 seqno;
1633 int ret;
1634
1635 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1636 if (seqno == 0)
1637 return 0;
1638
1639 ret = i915_wait_seqno(ring, seqno);
1640 if (ret)
1641 return ret;
1642
1643 return i915_gem_object_wait_rendering__tail(obj, ring);
1644 }
1645
1646 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1647 * as the object state may change during this call.
1648 */
1649 static __must_check int
1650 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1651 struct drm_i915_file_private *file_priv,
1652 bool readonly)
1653 {
1654 struct drm_device *dev = obj->base.dev;
1655 struct drm_i915_private *dev_priv = dev->dev_private;
1656 struct intel_ring_buffer *ring = obj->ring;
1657 unsigned reset_counter;
1658 u32 seqno;
1659 int ret;
1660
1661 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1662 BUG_ON(!dev_priv->mm.interruptible);
1663
1664 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1665 if (seqno == 0)
1666 return 0;
1667
1668 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1669 if (ret)
1670 return ret;
1671
1672 ret = i915_gem_check_olr(ring, seqno);
1673 if (ret)
1674 return ret;
1675
1676 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1677 mutex_unlock(&dev->struct_mutex);
1678 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
1679 mutex_lock(&dev->struct_mutex);
1680 if (ret)
1681 return ret;
1682
1683 return i915_gem_object_wait_rendering__tail(obj, ring);
1684 }
1685
1686 /**
1687 * Called when user space prepares to use an object with the CPU, either
1688 * through the mmap ioctl's mapping or a GTT mapping.
1689 */
1690 int
1691 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1692 struct drm_file *file)
1693 {
1694 struct drm_i915_gem_set_domain *args = data;
1695 struct drm_gem_object *gobj;
1696 struct drm_i915_gem_object *obj;
1697 uint32_t read_domains = args->read_domains;
1698 uint32_t write_domain = args->write_domain;
1699 int ret;
1700
1701 /* Only handle setting domains to types used by the CPU. */
1702 if (write_domain & I915_GEM_GPU_DOMAINS)
1703 return -EINVAL;
1704
1705 if (read_domains & I915_GEM_GPU_DOMAINS)
1706 return -EINVAL;
1707
1708 /* Having something in the write domain implies it's in the read
1709 * domain, and only that read domain. Enforce that in the request.
1710 */
1711 if (write_domain != 0 && read_domains != write_domain)
1712 return -EINVAL;
1713
1714 ret = i915_mutex_lock_interruptible(dev);
1715 if (ret)
1716 return ret;
1717
1718 gobj = drm_gem_object_lookup(dev, file, args->handle);
1719 if (gobj == NULL) {
1720 ret = -ENOENT;
1721 goto unlock;
1722 }
1723 obj = to_intel_bo(gobj);
1724
1725 /* Try to flush the object off the GPU without holding the lock.
1726 * We will repeat the flush holding the lock in the normal manner
1727 * to catch cases where we are gazumped.
1728 */
1729 ret = i915_gem_object_wait_rendering__nonblocking(obj,
1730 file->driver_priv,
1731 !write_domain);
1732 if (ret)
1733 goto unref;
1734
1735 if (read_domains & I915_GEM_DOMAIN_GTT) {
1736 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1737
1738 /* Silently promote "you're not bound, there was nothing to do"
1739 * to success, since the client was just asking us to
1740 * make sure everything was done.
1741 */
1742 if (ret == -EINVAL)
1743 ret = 0;
1744 } else {
1745 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1746 }
1747
1748 unref:
1749 drm_gem_object_unreference(&obj->base);
1750 unlock:
1751 mutex_unlock(&dev->struct_mutex);
1752 return ret;
1753 }
1754
1755 /**
1756 * Called when user space has done writes to this buffer
1757 */
1758 int
1759 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1760 struct drm_file *file)
1761 {
1762 struct drm_i915_gem_sw_finish *args = data;
1763 struct drm_gem_object *gobj;
1764 struct drm_i915_gem_object *obj;
1765 int ret = 0;
1766
1767 ret = i915_mutex_lock_interruptible(dev);
1768 if (ret)
1769 return ret;
1770
1771 gobj = drm_gem_object_lookup(dev, file, args->handle);
1772 if (gobj == NULL) {
1773 ret = -ENOENT;
1774 goto unlock;
1775 }
1776 obj = to_intel_bo(gobj);
1777
1778 /* Pinned buffers may be scanout, so flush the cache */
1779 if (obj->pin_display)
1780 i915_gem_object_flush_cpu_write_domain(obj, true);
1781
1782 drm_gem_object_unreference(&obj->base);
1783 unlock:
1784 mutex_unlock(&dev->struct_mutex);
1785 return ret;
1786 }
1787
1788 /**
1789 * Maps the contents of an object, returning the address it is mapped
1790 * into.
1791 *
1792 * While the mapping holds a reference on the contents of the object, it doesn't
1793 * imply a ref on the object itself.
1794 */
1795 int
1796 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1797 struct drm_file *file)
1798 {
1799 struct drm_i915_gem_mmap *args = data;
1800 struct drm_gem_object *obj;
1801 unsigned long addr;
1802 #ifdef __NetBSD__
1803 int ret;
1804 #endif
1805
1806 obj = drm_gem_object_lookup(dev, file, args->handle);
1807 if (obj == NULL)
1808 return -ENOENT;
1809
1810 /* prime objects have no backing filp to GEM mmap
1811 * pages from.
1812 */
1813 #ifdef __NetBSD__
1814 /* Also stolen objects (XXX can we get them here?) */
1815 if (obj->gemo_shm_uao == NULL) {
1816 drm_gem_object_unreference_unlocked(obj);
1817 return -EINVAL;
1818 }
1819 #else
1820 if (!obj->filp) {
1821 drm_gem_object_unreference_unlocked(obj);
1822 return -EINVAL;
1823 }
1824 #endif
1825
1826 #ifdef __NetBSD__
1827 addr = (*curproc->p_emul->e_vm_default_addr)(curproc,
1828 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size);
1829 /* XXX errno NetBSD->Linux */
1830 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size,
1831 obj->gemo_shm_uao, args->offset, 0,
1832 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE),
1833 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL,
1834 0));
1835 if (ret) {
1836 drm_gem_object_unreference_unlocked(obj);
1837 return ret;
1838 }
1839 uao_reference(obj->gemo_shm_uao);
1840 drm_gem_object_unreference_unlocked(obj);
1841 #else
1842 addr = vm_mmap(obj->filp, 0, args->size,
1843 PROT_READ | PROT_WRITE, MAP_SHARED,
1844 args->offset);
1845 drm_gem_object_unreference_unlocked(obj);
1846 if (IS_ERR((void *)addr))
1847 return addr;
1848 #endif
1849
1850 args->addr_ptr = (uint64_t) addr;
1851
1852 return 0;
1853 }
1854
1855 #ifdef __NetBSD__ /* XXX gem gtt fault */
1856 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t,
1857 struct vm_page **, int, int, vm_prot_t, int, paddr_t);
1858
1859 int
1860 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1861 int npages, int centeridx, vm_prot_t access_type, int flags)
1862 {
1863 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1864 struct drm_gem_object *gem_obj =
1865 container_of(uobj, struct drm_gem_object, gemo_uvmobj);
1866 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
1867 struct drm_device *dev = obj->base.dev;
1868 struct drm_i915_private *dev_priv = dev->dev_private;
1869 voff_t byte_offset;
1870 pgoff_t page_offset;
1871 int ret = 0;
1872 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0;
1873
1874 byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start));
1875 KASSERT(byte_offset <= obj->base.size);
1876 page_offset = (byte_offset >> PAGE_SHIFT);
1877
1878 intel_runtime_pm_get(dev_priv);
1879
1880 /* Thanks, uvm, but we don't need this lock. */
1881 mutex_exit(uobj->vmobjlock);
1882
1883 ret = i915_mutex_lock_interruptible(dev);
1884 if (ret)
1885 goto out;
1886
1887 trace_i915_gem_object_fault(obj, page_offset, true, write);
1888
1889 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1890 if (ret)
1891 goto unlock;
1892
1893 if ((obj->cache_level != I915_CACHE_NONE) && !HAS_LLC(dev)) {
1894 ret = -EINVAL;
1895 goto unlock;
1896 }
1897
1898 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
1899 if (ret)
1900 goto unlock;
1901
1902 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1903 if (ret)
1904 goto unpin;
1905
1906 ret = i915_gem_object_get_fence(obj);
1907 if (ret)
1908 goto unpin;
1909
1910 obj->fault_mappable = true;
1911
1912 /* XXX errno NetBSD->Linux */
1913 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type,
1914 flags,
1915 (dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj)));
1916 unpin:
1917 i915_gem_object_ggtt_unpin(obj);
1918 unlock:
1919 mutex_unlock(&dev->struct_mutex);
1920 out:
1921 mutex_enter(uobj->vmobjlock);
1922 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
1923 if (ret == -ERESTART)
1924 uvm_wait("i915flt");
1925 /* XXX Deal with GPU hangs here... */
1926 intel_runtime_pm_put(dev_priv);
1927 /* XXX errno Linux->NetBSD */
1928 return -ret;
1929 }
1930
1931 /*
1932 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c.
1933 *
1934 * XXX pmap_enter_default instead of pmap_enter because of a problem
1935 * with using weak aliases in kernel modules or something.
1936 */
1937 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned);
1938
1939 static int
1940 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1941 int npages, int centeridx, vm_prot_t access_type, int flags,
1942 paddr_t gtt_paddr)
1943 {
1944 struct vm_map_entry *entry = ufi->entry;
1945 vaddr_t curr_va;
1946 off_t curr_offset;
1947 paddr_t paddr;
1948 u_int mmapflags;
1949 int lcv, retval;
1950 vm_prot_t mapprot;
1951 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist);
1952 UVMHIST_LOG(maphist," flags=%d", flags,0,0,0);
1953
1954 /*
1955 * we do not allow device mappings to be mapped copy-on-write
1956 * so we kill any attempt to do so here.
1957 */
1958
1959 if (UVM_ET_ISCOPYONWRITE(entry)) {
1960 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)",
1961 entry->etype, 0,0,0);
1962 return(EIO);
1963 }
1964
1965 /*
1966 * now we must determine the offset in udv to use and the VA to
1967 * use for pmap_enter. note that we always use orig_map's pmap
1968 * for pmap_enter (even if we have a submap). since virtual
1969 * addresses in a submap must match the main map, this is ok.
1970 */
1971
1972 /* udv offset = (offset from start of entry) + entry's offset */
1973 curr_offset = entry->offset + (vaddr - entry->start);
1974 /* pmap va = vaddr (virtual address of pps[0]) */
1975 curr_va = vaddr;
1976
1977 /*
1978 * loop over the page range entering in as needed
1979 */
1980
1981 retval = 0;
1982 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
1983 curr_va += PAGE_SIZE) {
1984 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
1985 continue;
1986
1987 if (pps[lcv] == PGO_DONTCARE)
1988 continue;
1989
1990 paddr = (gtt_paddr + curr_offset);
1991 mmapflags = 0;
1992 mapprot = ufi->entry->protection;
1993 UVMHIST_LOG(maphist,
1994 " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d",
1995 ufi->orig_map->pmap, curr_va, paddr, mapprot);
1996 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot,
1997 PMAP_CANFAIL | mapprot | mmapflags) != 0) {
1998 /*
1999 * pmap_enter() didn't have the resource to
2000 * enter this mapping. Unlock everything,
2001 * wait for the pagedaemon to free up some
2002 * pages, and then tell uvm_fault() to start
2003 * the fault again.
2004 *
2005 * XXX Needs some rethinking for the PGO_ALLPAGES
2006 * XXX case.
2007 */
2008 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */
2009 return (ERESTART);
2010 }
2011 }
2012
2013 pmap_update(ufi->orig_map->pmap);
2014 return (retval);
2015 }
2016 #else
2017 /**
2018 * i915_gem_fault - fault a page into the GTT
2019 * vma: VMA in question
2020 * vmf: fault info
2021 *
2022 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
2023 * from userspace. The fault handler takes care of binding the object to
2024 * the GTT (if needed), allocating and programming a fence register (again,
2025 * only if needed based on whether the old reg is still valid or the object
2026 * is tiled) and inserting a new PTE into the faulting process.
2027 *
2028 * Note that the faulting process may involve evicting existing objects
2029 * from the GTT and/or fence registers to make room. So performance may
2030 * suffer if the GTT working set is large or there are few fence registers
2031 * left.
2032 */
2033 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2034 {
2035 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
2036 struct drm_device *dev = obj->base.dev;
2037 struct drm_i915_private *dev_priv = dev->dev_private;
2038 pgoff_t page_offset;
2039 unsigned long pfn;
2040 int ret = 0;
2041 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
2042
2043 intel_runtime_pm_get(dev_priv);
2044
2045 /* We don't use vmf->pgoff since that has the fake offset */
2046 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
2047 PAGE_SHIFT;
2048
2049 ret = i915_mutex_lock_interruptible(dev);
2050 if (ret)
2051 goto out;
2052
2053 trace_i915_gem_object_fault(obj, page_offset, true, write);
2054
2055 /* Try to flush the object off the GPU first without holding the lock.
2056 * Upon reacquiring the lock, we will perform our sanity checks and then
2057 * repeat the flush holding the lock in the normal manner to catch cases
2058 * where we are gazumped.
2059 */
2060 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
2061 if (ret)
2062 goto unlock;
2063
2064 /* Access to snoopable pages through the GTT is incoherent. */
2065 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
2066 ret = -EINVAL;
2067 goto unlock;
2068 }
2069
2070 /* Now bind it into the GTT if needed */
2071 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
2072 if (ret)
2073 goto unlock;
2074
2075 ret = i915_gem_object_set_to_gtt_domain(obj, write);
2076 if (ret)
2077 goto unpin;
2078
2079 ret = i915_gem_object_get_fence(obj);
2080 if (ret)
2081 goto unpin;
2082
2083 obj->fault_mappable = true;
2084
2085 pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
2086 pfn >>= PAGE_SHIFT;
2087 pfn += page_offset;
2088
2089 /* Finally, remap it using the new GTT offset */
2090 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
2091 unpin:
2092 i915_gem_object_ggtt_unpin(obj);
2093 unlock:
2094 mutex_unlock(&dev->struct_mutex);
2095 out:
2096 switch (ret) {
2097 case -EIO:
2098 /* If this -EIO is due to a gpu hang, give the reset code a
2099 * chance to clean up the mess. Otherwise return the proper
2100 * SIGBUS. */
2101 if (i915_terminally_wedged(&dev_priv->gpu_error)) {
2102 ret = VM_FAULT_SIGBUS;
2103 break;
2104 }
2105 case -EAGAIN:
2106 /*
2107 * EAGAIN means the gpu is hung and we'll wait for the error
2108 * handler to reset everything when re-faulting in
2109 * i915_mutex_lock_interruptible.
2110 */
2111 case 0:
2112 case -ERESTARTSYS:
2113 case -EINTR:
2114 case -EBUSY:
2115 /*
2116 * EBUSY is ok: this just means that another thread
2117 * already did the job.
2118 */
2119 ret = VM_FAULT_NOPAGE;
2120 break;
2121 case -ENOMEM:
2122 ret = VM_FAULT_OOM;
2123 break;
2124 case -ENOSPC:
2125 case -EFAULT:
2126 ret = VM_FAULT_SIGBUS;
2127 break;
2128 default:
2129 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2130 ret = VM_FAULT_SIGBUS;
2131 break;
2132 }
2133
2134 intel_runtime_pm_put(dev_priv);
2135 return ret;
2136 }
2137
2138 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2139 {
2140 struct i915_vma *vma;
2141
2142 /*
2143 * Only the global gtt is relevant for gtt memory mappings, so restrict
2144 * list traversal to objects bound into the global address space. Note
2145 * that the active list should be empty, but better safe than sorry.
2146 */
2147 WARN_ON(!list_empty(&dev_priv->gtt.base.active_list));
2148 list_for_each_entry(vma, &dev_priv->gtt.base.active_list, mm_list)
2149 i915_gem_release_mmap(vma->obj);
2150 list_for_each_entry(vma, &dev_priv->gtt.base.inactive_list, mm_list)
2151 i915_gem_release_mmap(vma->obj);
2152 }
2153 #endif
2154
2155 /**
2156 * i915_gem_release_mmap - remove physical page mappings
2157 * @obj: obj in question
2158 *
2159 * Preserve the reservation of the mmapping with the DRM core code, but
2160 * relinquish ownership of the pages back to the system.
2161 *
2162 * It is vital that we remove the page mapping if we have mapped a tiled
2163 * object through the GTT and then lose the fence register due to
2164 * resource pressure. Similarly if the object has been moved out of the
2165 * aperture, than pages mapped into userspace must be revoked. Removing the
2166 * mapping will then trigger a page fault on the next user access, allowing
2167 * fixup by i915_gem_fault().
2168 */
2169 void
2170 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2171 {
2172 if (!obj->fault_mappable)
2173 return;
2174
2175 #ifdef __NetBSD__ /* XXX gem gtt fault */
2176 {
2177 struct vm_page *page;
2178
2179 mutex_enter(obj->base.gemo_shm_uao->vmobjlock);
2180 KASSERT(obj->pages != NULL);
2181 /* Force a fresh fault for each page. */
2182 /*
2183 * XXX OOPS! This doesn't actually do what we want.
2184 * This causes a fresh fault for access to the backing
2185 * pages -- but nothing accesses the backing pages
2186 * directly! What is actually entered into CPU page
2187 * table entries is aperture addresses which have been
2188 * programmed by the GTT to refer to those backing
2189 * pages.
2190 *
2191 * We need to clear those page table entries, but
2192 * there's no good way to do that at the moment: nobody
2193 * records for us a map from either uvm objects or
2194 * physical device addresses to a list of all virtual
2195 * pages where they have been mapped. pmap(9) records
2196 * a map only from physical RAM addresses to virtual
2197 * pages; it does nothing for physical device
2198 * addresses.
2199 */
2200 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue)
2201 pmap_page_protect(page, VM_PROT_NONE);
2202 mutex_exit(obj->base.gemo_shm_uao->vmobjlock);
2203 }
2204 #else
2205 drm_vma_node_unmap(&obj->base.vma_node,
2206 obj->base.dev->anon_inode->i_mapping);
2207 #endif
2208 obj->fault_mappable = false;
2209 }
2210
2211 uint32_t
2212 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2213 {
2214 uint32_t gtt_size;
2215
2216 if (INTEL_INFO(dev)->gen >= 4 ||
2217 tiling_mode == I915_TILING_NONE)
2218 return size;
2219
2220 /* Previous chips need a power-of-two fence region when tiling */
2221 if (INTEL_INFO(dev)->gen == 3)
2222 gtt_size = 1024*1024;
2223 else
2224 gtt_size = 512*1024;
2225
2226 while (gtt_size < size)
2227 gtt_size <<= 1;
2228
2229 return gtt_size;
2230 }
2231
2232 /**
2233 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2234 * @obj: object to check
2235 *
2236 * Return the required GTT alignment for an object, taking into account
2237 * potential fence register mapping.
2238 */
2239 uint32_t
2240 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2241 int tiling_mode, bool fenced)
2242 {
2243 /*
2244 * Minimum alignment is 4k (GTT page size), but might be greater
2245 * if a fence register is needed for the object.
2246 */
2247 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2248 tiling_mode == I915_TILING_NONE)
2249 return 4096;
2250
2251 /*
2252 * Previous chips need to be aligned to the size of the smallest
2253 * fence register that can contain the object.
2254 */
2255 return i915_gem_get_gtt_size(dev, size, tiling_mode);
2256 }
2257
2258 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2259 {
2260 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2261 int ret;
2262
2263 if (drm_vma_node_has_offset(&obj->base.vma_node))
2264 return 0;
2265
2266 dev_priv->mm.shrinker_no_lock_stealing = true;
2267
2268 ret = drm_gem_create_mmap_offset(&obj->base);
2269 if (ret != -ENOSPC)
2270 goto out;
2271
2272 /* Badly fragmented mmap space? The only way we can recover
2273 * space is by destroying unwanted objects. We can't randomly release
2274 * mmap_offsets as userspace expects them to be persistent for the
2275 * lifetime of the objects. The closest we can is to release the
2276 * offsets on purgeable objects by truncating it and marking it purged,
2277 * which prevents userspace from ever using that object again.
2278 */
2279 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
2280 ret = drm_gem_create_mmap_offset(&obj->base);
2281 if (ret != -ENOSPC)
2282 goto out;
2283
2284 i915_gem_shrink_all(dev_priv);
2285 ret = drm_gem_create_mmap_offset(&obj->base);
2286 out:
2287 dev_priv->mm.shrinker_no_lock_stealing = false;
2288
2289 return ret;
2290 }
2291
2292 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2293 {
2294 drm_gem_free_mmap_offset(&obj->base);
2295 }
2296
2297 int
2298 i915_gem_mmap_gtt(struct drm_file *file,
2299 struct drm_device *dev,
2300 uint32_t handle,
2301 uint64_t *offset)
2302 {
2303 struct drm_i915_private *dev_priv = dev->dev_private;
2304 struct drm_gem_object *gobj;
2305 struct drm_i915_gem_object *obj;
2306 int ret;
2307
2308 ret = i915_mutex_lock_interruptible(dev);
2309 if (ret)
2310 return ret;
2311
2312 gobj = drm_gem_object_lookup(dev, file, handle);
2313 if (gobj == NULL) {
2314 ret = -ENOENT;
2315 goto unlock;
2316 }
2317 obj = to_intel_bo(gobj);
2318
2319 if (obj->base.size > dev_priv->gtt.mappable_end) {
2320 ret = -E2BIG;
2321 goto out;
2322 }
2323
2324 if (obj->madv != I915_MADV_WILLNEED) {
2325 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2326 ret = -EFAULT;
2327 goto out;
2328 }
2329
2330 ret = i915_gem_object_create_mmap_offset(obj);
2331 if (ret)
2332 goto out;
2333
2334 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2335
2336 out:
2337 drm_gem_object_unreference(&obj->base);
2338 unlock:
2339 mutex_unlock(&dev->struct_mutex);
2340 return ret;
2341 }
2342
2343 /**
2344 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2345 * @dev: DRM device
2346 * @data: GTT mapping ioctl data
2347 * @file: GEM object info
2348 *
2349 * Simply returns the fake offset to userspace so it can mmap it.
2350 * The mmap call will end up in drm_gem_mmap(), which will set things
2351 * up so we can get faults in the handler above.
2352 *
2353 * The fault handler will take care of binding the object into the GTT
2354 * (since it may have been evicted to make room for something), allocating
2355 * a fence register, and mapping the appropriate aperture address into
2356 * userspace.
2357 */
2358 int
2359 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2360 struct drm_file *file)
2361 {
2362 struct drm_i915_gem_mmap_gtt *args = data;
2363
2364 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2365 }
2366
2367 /* Immediately discard the backing storage */
2368 static void
2369 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2370 {
2371 #ifndef __NetBSD__
2372 struct inode *inode;
2373 #endif
2374
2375 i915_gem_object_free_mmap_offset(obj);
2376
2377 #ifdef __NetBSD__
2378 if (obj->base.gemo_shm_uao == NULL)
2379 return;
2380
2381 {
2382 struct uvm_object *const uobj = obj->base.gemo_shm_uao;
2383
2384 if (uobj != NULL) {
2385 /* XXX Calling pgo_put like this is bogus. */
2386 mutex_enter(uobj->vmobjlock);
2387 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size,
2388 (PGO_ALLPAGES | PGO_FREE));
2389 }
2390 }
2391 #else
2392 if (obj->base.filp == NULL)
2393 return;
2394
2395 /* Our goal here is to return as much of the memory as
2396 * is possible back to the system as we are called from OOM.
2397 * To do this we must instruct the shmfs to drop all of its
2398 * backing pages, *now*.
2399 */
2400 inode = file_inode(obj->base.filp);
2401 shmem_truncate_range(inode, 0, (loff_t)-1);
2402 #endif
2403
2404 obj->madv = __I915_MADV_PURGED;
2405 }
2406
2407 static inline int
2408 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
2409 {
2410 return obj->madv == I915_MADV_DONTNEED;
2411 }
2412
2413 #ifdef __NetBSD__
2414 static void
2415 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2416 {
2417 struct drm_device *const dev = obj->base.dev;
2418 int ret;
2419
2420 /* XXX Cargo-culted from the Linux code. */
2421 BUG_ON(obj->madv == __I915_MADV_PURGED);
2422
2423 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2424 if (ret) {
2425 WARN_ON(ret != -EIO);
2426 i915_gem_clflush_object(obj, true);
2427 obj->base.read_domains = obj->base.write_domain =
2428 I915_GEM_DOMAIN_CPU;
2429 }
2430
2431 if (i915_gem_object_needs_bit17_swizzle(obj))
2432 i915_gem_object_save_bit_17_swizzle(obj);
2433
2434 /* XXX Maintain dirty flag? */
2435
2436 bus_dmamap_destroy(dev->dmat, obj->igo_dmamap);
2437 bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2438 obj->base.size, obj->pages, obj->igo_nsegs);
2439
2440 kfree(obj->pages);
2441 }
2442 #else
2443 static void
2444 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2445 {
2446 struct sg_page_iter sg_iter;
2447 int ret;
2448
2449 BUG_ON(obj->madv == __I915_MADV_PURGED);
2450
2451 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2452 if (ret) {
2453 /* In the event of a disaster, abandon all caches and
2454 * hope for the best.
2455 */
2456 WARN_ON(ret != -EIO);
2457 i915_gem_clflush_object(obj, true);
2458 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2459 }
2460
2461 if (i915_gem_object_needs_bit17_swizzle(obj))
2462 i915_gem_object_save_bit_17_swizzle(obj);
2463
2464 if (obj->madv == I915_MADV_DONTNEED)
2465 obj->dirty = 0;
2466
2467 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2468 struct page *page = sg_page_iter_page(&sg_iter);
2469
2470 if (obj->dirty)
2471 set_page_dirty(page);
2472
2473 if (obj->madv == I915_MADV_WILLNEED)
2474 mark_page_accessed(page);
2475
2476 page_cache_release(page);
2477 }
2478 obj->dirty = 0;
2479
2480 sg_free_table(obj->pages);
2481 kfree(obj->pages);
2482 }
2483 #endif
2484
2485 int
2486 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2487 {
2488 const struct drm_i915_gem_object_ops *ops = obj->ops;
2489
2490 if (obj->pages == NULL)
2491 return 0;
2492
2493 if (obj->pages_pin_count)
2494 return -EBUSY;
2495
2496 BUG_ON(i915_gem_obj_bound_any(obj));
2497
2498 /* ->put_pages might need to allocate memory for the bit17 swizzle
2499 * array, hence protect them from being reaped by removing them from gtt
2500 * lists early. */
2501 list_del(&obj->global_list);
2502
2503 ops->put_pages(obj);
2504 obj->pages = NULL;
2505
2506 if (i915_gem_object_is_purgeable(obj))
2507 i915_gem_object_truncate(obj);
2508
2509 return 0;
2510 }
2511
2512 static unsigned long
2513 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
2514 bool purgeable_only)
2515 {
2516 struct list_head still_bound_list;
2517 struct drm_i915_gem_object *obj, *next;
2518 unsigned long count = 0;
2519
2520 list_for_each_entry_safe(obj, next,
2521 &dev_priv->mm.unbound_list,
2522 global_list) {
2523 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2524 i915_gem_object_put_pages(obj) == 0) {
2525 count += obj->base.size >> PAGE_SHIFT;
2526 if (count >= target)
2527 return count;
2528 }
2529 }
2530
2531 /*
2532 * As we may completely rewrite the bound list whilst unbinding
2533 * (due to retiring requests) we have to strictly process only
2534 * one element of the list at the time, and recheck the list
2535 * on every iteration.
2536 */
2537 INIT_LIST_HEAD(&still_bound_list);
2538 while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
2539 struct i915_vma *vma, *v;
2540
2541 obj = list_first_entry(&dev_priv->mm.bound_list,
2542 typeof(*obj), global_list);
2543 list_move_tail(&obj->global_list, &still_bound_list);
2544
2545 if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
2546 continue;
2547
2548 /*
2549 * Hold a reference whilst we unbind this object, as we may
2550 * end up waiting for and retiring requests. This might
2551 * release the final reference (held by the active list)
2552 * and result in the object being freed from under us.
2553 * in this object being freed.
2554 *
2555 * Note 1: Shrinking the bound list is special since only active
2556 * (and hence bound objects) can contain such limbo objects, so
2557 * we don't need special tricks for shrinking the unbound list.
2558 * The only other place where we have to be careful with active
2559 * objects suddenly disappearing due to retiring requests is the
2560 * eviction code.
2561 *
2562 * Note 2: Even though the bound list doesn't hold a reference
2563 * to the object we can safely grab one here: The final object
2564 * unreferencing and the bound_list are both protected by the
2565 * dev->struct_mutex and so we won't ever be able to observe an
2566 * object on the bound_list with a reference count equals 0.
2567 */
2568 drm_gem_object_reference(&obj->base);
2569
2570 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
2571 if (i915_vma_unbind(vma))
2572 break;
2573
2574 if (i915_gem_object_put_pages(obj) == 0)
2575 count += obj->base.size >> PAGE_SHIFT;
2576
2577 drm_gem_object_unreference(&obj->base);
2578 }
2579 list_splice(&still_bound_list, &dev_priv->mm.bound_list);
2580
2581 return count;
2582 }
2583
2584 static unsigned long
2585 i915_gem_purge(struct drm_i915_private *dev_priv, long target)
2586 {
2587 return __i915_gem_shrink(dev_priv, target, true);
2588 }
2589
2590 static unsigned long
2591 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
2592 {
2593 struct drm_i915_gem_object *obj, *next;
2594 long freed = 0;
2595
2596 i915_gem_evict_everything(dev_priv->dev);
2597
2598 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
2599 global_list) {
2600 if (i915_gem_object_put_pages(obj) == 0)
2601 freed += obj->base.size >> PAGE_SHIFT;
2602 }
2603 return freed;
2604 }
2605
2606 #ifdef __NetBSD__
2607 static int
2608 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2609 {
2610 struct drm_device *const dev = obj->base.dev;
2611 struct vm_page *page;
2612 int error;
2613
2614 /* XXX Cargo-culted from the Linux code. */
2615 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2616 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2617
2618 KASSERT(obj->pages == NULL);
2619 TAILQ_INIT(&obj->igo_pageq);
2620 obj->pages = kcalloc((obj->base.size / PAGE_SIZE),
2621 sizeof(obj->pages[0]), GFP_KERNEL);
2622 if (obj->pages == NULL) {
2623 error = -ENOMEM;
2624 goto fail0;
2625 }
2626
2627 /* XXX errno NetBSD->Linux */
2628 error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao,
2629 0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages,
2630 (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT);
2631 if (error)
2632 /* XXX Try i915_gem_purge, i915_gem_shrink_all. */
2633 goto fail1;
2634 KASSERT(0 < obj->igo_nsegs);
2635 KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE));
2636
2637 /*
2638 * Check that the paddrs will fit in 40 bits, or 32 bits on i965.
2639 *
2640 * XXX This is wrong; we ought to pass this constraint to
2641 * bus_dmamem_wire_uvm_object instead.
2642 */
2643 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) {
2644 const uint64_t mask =
2645 (IS_BROADWATER(dev) || IS_CRESTLINE(dev)?
2646 0xffffffffULL : 0xffffffffffULL);
2647 if (VM_PAGE_TO_PHYS(page) & ~mask) {
2648 DRM_ERROR("GEM physical address exceeds %u bits"
2649 ": %"PRIxMAX"\n",
2650 popcount64(mask),
2651 (uintmax_t)VM_PAGE_TO_PHYS(page));
2652 error = -EIO;
2653 goto fail2;
2654 }
2655 }
2656
2657 /* XXX Should create the DMA map when creating the object. */
2658
2659 /* XXX errno NetBSD->Linux */
2660 error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs,
2661 PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap);
2662 if (error)
2663 goto fail2;
2664
2665 /* XXX Cargo-culted from the Linux code. */
2666 if (i915_gem_object_needs_bit17_swizzle(obj))
2667 i915_gem_object_do_bit_17_swizzle(obj);
2668
2669 /* Success! */
2670 return 0;
2671
2672 fail2: bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2673 obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE));
2674 fail1: kfree(obj->pages);
2675 obj->pages = NULL;
2676 fail0: KASSERT(error);
2677 return error;
2678 }
2679 #else
2680 static int
2681 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2682 {
2683 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2684 int page_count, i;
2685 struct address_space *mapping;
2686 struct sg_table *st;
2687 struct scatterlist *sg;
2688 struct sg_page_iter sg_iter;
2689 struct page *page;
2690 unsigned long last_pfn = 0; /* suppress gcc warning */
2691 gfp_t gfp;
2692
2693 /* Assert that the object is not currently in any GPU domain. As it
2694 * wasn't in the GTT, there shouldn't be any way it could have been in
2695 * a GPU cache
2696 */
2697 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2698 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2699
2700 st = kmalloc(sizeof(*st), GFP_KERNEL);
2701 if (st == NULL)
2702 return -ENOMEM;
2703
2704 page_count = obj->base.size / PAGE_SIZE;
2705 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2706 kfree(st);
2707 return -ENOMEM;
2708 }
2709
2710 /* Get the list of pages out of our struct file. They'll be pinned
2711 * at this point until we release them.
2712 *
2713 * Fail silently without starting the shrinker
2714 */
2715 mapping = file_inode(obj->base.filp)->i_mapping;
2716 gfp = mapping_gfp_mask(mapping);
2717 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2718 gfp &= ~(__GFP_IO | __GFP_WAIT);
2719 sg = st->sgl;
2720 st->nents = 0;
2721 for (i = 0; i < page_count; i++) {
2722 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2723 if (IS_ERR(page)) {
2724 i915_gem_purge(dev_priv, page_count);
2725 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2726 }
2727 if (IS_ERR(page)) {
2728 /* We've tried hard to allocate the memory by reaping
2729 * our own buffer, now let the real VM do its job and
2730 * go down in flames if truly OOM.
2731 */
2732 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
2733 gfp |= __GFP_IO | __GFP_WAIT;
2734
2735 i915_gem_shrink_all(dev_priv);
2736 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2737 if (IS_ERR(page))
2738 goto err_pages;
2739
2740 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2741 gfp &= ~(__GFP_IO | __GFP_WAIT);
2742 }
2743 #ifdef CONFIG_SWIOTLB
2744 if (swiotlb_nr_tbl()) {
2745 st->nents++;
2746 sg_set_page(sg, page, PAGE_SIZE, 0);
2747 sg = sg_next(sg);
2748 continue;
2749 }
2750 #endif
2751 if (!i || page_to_pfn(page) != last_pfn + 1) {
2752 if (i)
2753 sg = sg_next(sg);
2754 st->nents++;
2755 sg_set_page(sg, page, PAGE_SIZE, 0);
2756 } else {
2757 sg->length += PAGE_SIZE;
2758 }
2759 last_pfn = page_to_pfn(page);
2760
2761 /* Check that the i965g/gm workaround works. */
2762 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2763 }
2764 #ifdef CONFIG_SWIOTLB
2765 if (!swiotlb_nr_tbl())
2766 #endif
2767 sg_mark_end(sg);
2768 obj->pages = st;
2769
2770 if (i915_gem_object_needs_bit17_swizzle(obj))
2771 i915_gem_object_do_bit_17_swizzle(obj);
2772
2773 return 0;
2774
2775 err_pages:
2776 sg_mark_end(sg);
2777 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
2778 page_cache_release(sg_page_iter_page(&sg_iter));
2779 sg_free_table(st);
2780 kfree(st);
2781 return PTR_ERR(page);
2782 }
2783 #endif
2784
2785 /* Ensure that the associated pages are gathered from the backing storage
2786 * and pinned into our object. i915_gem_object_get_pages() may be called
2787 * multiple times before they are released by a single call to
2788 * i915_gem_object_put_pages() - once the pages are no longer referenced
2789 * either as a result of memory pressure (reaping pages under the shrinker)
2790 * or as the object is itself released.
2791 */
2792 int
2793 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2794 {
2795 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2796 const struct drm_i915_gem_object_ops *ops = obj->ops;
2797 int ret;
2798
2799 if (obj->pages)
2800 return 0;
2801
2802 if (obj->madv != I915_MADV_WILLNEED) {
2803 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2804 return -EFAULT;
2805 }
2806
2807 BUG_ON(obj->pages_pin_count);
2808
2809 ret = ops->get_pages(obj);
2810 if (ret)
2811 return ret;
2812
2813 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2814 return 0;
2815 }
2816
2817 static void
2818 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
2819 struct intel_ring_buffer *ring)
2820 {
2821 struct drm_device *dev = obj->base.dev;
2822 struct drm_i915_private *dev_priv = dev->dev_private;
2823 u32 seqno = intel_ring_get_seqno(ring);
2824
2825 BUG_ON(ring == NULL);
2826 if (obj->ring != ring && obj->last_write_seqno) {
2827 /* Keep the seqno relative to the current ring */
2828 obj->last_write_seqno = seqno;
2829 }
2830 obj->ring = ring;
2831
2832 /* Add a reference if we're newly entering the active list. */
2833 if (!obj->active) {
2834 drm_gem_object_reference(&obj->base);
2835 obj->active = 1;
2836 }
2837
2838 list_move_tail(&obj->ring_list, &ring->active_list);
2839
2840 obj->last_read_seqno = seqno;
2841
2842 if (obj->fenced_gpu_access) {
2843 obj->last_fenced_seqno = seqno;
2844
2845 /* Bump MRU to take account of the delayed flush */
2846 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2847 struct drm_i915_fence_reg *reg;
2848
2849 reg = &dev_priv->fence_regs[obj->fence_reg];
2850 list_move_tail(®->lru_list,
2851 &dev_priv->mm.fence_list);
2852 }
2853 }
2854 }
2855
2856 void i915_vma_move_to_active(struct i915_vma *vma,
2857 struct intel_ring_buffer *ring)
2858 {
2859 list_move_tail(&vma->mm_list, &vma->vm->active_list);
2860 return i915_gem_object_move_to_active(vma->obj, ring);
2861 }
2862
2863 static void
2864 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
2865 {
2866 struct drm_device *dev = obj->base.dev;
2867 struct drm_i915_private *dev_priv = dev->dev_private;
2868 struct i915_address_space *vm;
2869 struct i915_vma *vma;
2870
2871 if ((obj->base.write_domain & I915_GEM_DOMAIN_GTT) != 0) {
2872 #if 0
2873 printk(KERN_ERR "%s: %p 0x%x flushing gtt\n", __func__, obj,
2874 obj->base.write_domain);
2875 #endif
2876 i915_gem_object_flush_gtt_write_domain(obj);
2877 }
2878 if ((obj->base.write_domain & I915_GEM_DOMAIN_CPU) != 0) {
2879 #if 0
2880 printk(KERN_ERR "%s: %p 0x%x flushing cpu\n", __func__, obj,
2881 obj->base.write_domain);
2882 #endif
2883 i915_gem_object_flush_cpu_write_domain(obj, false);
2884 }
2885 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
2886 BUG_ON(!obj->active);
2887
2888 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
2889 vma = i915_gem_obj_to_vma(obj, vm);
2890 if (vma && !list_empty(&vma->mm_list))
2891 list_move_tail(&vma->mm_list, &vm->inactive_list);
2892 }
2893
2894 list_del_init(&obj->ring_list);
2895 obj->ring = NULL;
2896
2897 obj->last_read_seqno = 0;
2898 obj->last_write_seqno = 0;
2899 obj->base.write_domain = 0;
2900
2901 obj->last_fenced_seqno = 0;
2902 obj->fenced_gpu_access = false;
2903
2904 obj->active = 0;
2905 drm_gem_object_unreference(&obj->base);
2906
2907 WARN_ON(i915_verify_lists(dev));
2908 }
2909
2910 static int
2911 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2912 {
2913 struct drm_i915_private *dev_priv = dev->dev_private;
2914 struct intel_ring_buffer *ring;
2915 int ret, i, j;
2916
2917 /* Carefully retire all requests without writing to the rings */
2918 for_each_ring(ring, dev_priv, i) {
2919 ret = intel_ring_idle(ring);
2920 if (ret)
2921 return ret;
2922 }
2923 i915_gem_retire_requests(dev);
2924
2925 /* Finally reset hw state */
2926 for_each_ring(ring, dev_priv, i) {
2927 intel_ring_init_seqno(ring, seqno);
2928
2929 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
2930 ring->sync_seqno[j] = 0;
2931 }
2932
2933 return 0;
2934 }
2935
2936 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2937 {
2938 struct drm_i915_private *dev_priv = dev->dev_private;
2939 int ret;
2940
2941 if (seqno == 0)
2942 return -EINVAL;
2943
2944 /* HWS page needs to be set less than what we
2945 * will inject to ring
2946 */
2947 ret = i915_gem_init_seqno(dev, seqno - 1);
2948 if (ret)
2949 return ret;
2950
2951 /* Carefully set the last_seqno value so that wrap
2952 * detection still works
2953 */
2954 dev_priv->next_seqno = seqno;
2955 dev_priv->last_seqno = seqno - 1;
2956 if (dev_priv->last_seqno == 0)
2957 dev_priv->last_seqno--;
2958
2959 return 0;
2960 }
2961
2962 int
2963 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2964 {
2965 struct drm_i915_private *dev_priv = dev->dev_private;
2966
2967 /* reserve 0 for non-seqno */
2968 if (dev_priv->next_seqno == 0) {
2969 int ret = i915_gem_init_seqno(dev, 0);
2970 if (ret)
2971 return ret;
2972
2973 dev_priv->next_seqno = 1;
2974 }
2975
2976 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2977 return 0;
2978 }
2979
2980 int __i915_add_request(struct intel_ring_buffer *ring,
2981 struct drm_file *file,
2982 struct drm_i915_gem_object *obj,
2983 u32 *out_seqno)
2984 {
2985 struct drm_i915_private *dev_priv = ring->dev->dev_private;
2986 struct drm_i915_gem_request *request;
2987 u32 request_ring_position, request_start;
2988 int ret;
2989
2990 request_start = intel_ring_get_tail(ring);
2991 /*
2992 * Emit any outstanding flushes - execbuf can fail to emit the flush
2993 * after having emitted the batchbuffer command. Hence we need to fix
2994 * things up similar to emitting the lazy request. The difference here
2995 * is that the flush _must_ happen before the next request, no matter
2996 * what.
2997 */
2998 ret = intel_ring_flush_all_caches(ring);
2999 if (ret)
3000 return ret;
3001
3002 request = ring->preallocated_lazy_request;
3003 if (WARN_ON(request == NULL))
3004 return -ENOMEM;
3005
3006 /* Record the position of the start of the request so that
3007 * should we detect the updated seqno part-way through the
3008 * GPU processing the request, we never over-estimate the
3009 * position of the head.
3010 */
3011 request_ring_position = intel_ring_get_tail(ring);
3012
3013 ret = ring->add_request(ring);
3014 if (ret)
3015 return ret;
3016
3017 request->seqno = intel_ring_get_seqno(ring);
3018 request->ring = ring;
3019 request->head = request_start;
3020 request->tail = request_ring_position;
3021
3022 /* Whilst this request exists, batch_obj will be on the
3023 * active_list, and so will hold the active reference. Only when this
3024 * request is retired will the the batch_obj be moved onto the
3025 * inactive_list and lose its active reference. Hence we do not need
3026 * to explicitly hold another reference here.
3027 */
3028 request->batch_obj = obj;
3029
3030 /* Hold a reference to the current context so that we can inspect
3031 * it later in case a hangcheck error event fires.
3032 */
3033 request->ctx = ring->last_context;
3034 if (request->ctx)
3035 i915_gem_context_reference(request->ctx);
3036
3037 request->emitted_jiffies = jiffies;
3038 list_add_tail(&request->list, &ring->request_list);
3039 request->file_priv = NULL;
3040
3041 if (file) {
3042 struct drm_i915_file_private *file_priv = file->driver_priv;
3043
3044 spin_lock(&file_priv->mm.lock);
3045 request->file_priv = file_priv;
3046 list_add_tail(&request->client_list,
3047 &file_priv->mm.request_list);
3048 spin_unlock(&file_priv->mm.lock);
3049 }
3050
3051 trace_i915_gem_request_add(ring, request->seqno);
3052 ring->outstanding_lazy_seqno = 0;
3053 ring->preallocated_lazy_request = NULL;
3054
3055 if (!dev_priv->ums.mm_suspended) {
3056 i915_queue_hangcheck(ring->dev);
3057
3058 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
3059 queue_delayed_work(dev_priv->wq,
3060 &dev_priv->mm.retire_work,
3061 round_jiffies_up_relative(HZ));
3062 intel_mark_busy(dev_priv->dev);
3063 }
3064
3065 if (out_seqno)
3066 *out_seqno = request->seqno;
3067 return 0;
3068 }
3069
3070 static inline void
3071 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
3072 {
3073 struct drm_i915_file_private *file_priv = request->file_priv;
3074
3075 if (!file_priv)
3076 return;
3077
3078 spin_lock(&file_priv->mm.lock);
3079 list_del(&request->client_list);
3080 request->file_priv = NULL;
3081 spin_unlock(&file_priv->mm.lock);
3082 }
3083
3084 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
3085 const struct i915_hw_context *ctx)
3086 {
3087 unsigned long elapsed;
3088
3089 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
3090
3091 if (ctx->hang_stats.banned)
3092 return true;
3093
3094 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
3095 if (!i915_gem_context_is_default(ctx)) {
3096 DRM_DEBUG("context hanging too fast, banning!\n");
3097 return true;
3098 } else if (dev_priv->gpu_error.stop_rings == 0) {
3099 DRM_ERROR("gpu hanging too fast, banning!\n");
3100 return true;
3101 }
3102 }
3103
3104 return false;
3105 }
3106
3107 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
3108 struct i915_hw_context *ctx,
3109 const bool guilty)
3110 {
3111 struct i915_ctx_hang_stats *hs;
3112
3113 if (WARN_ON(!ctx))
3114 return;
3115
3116 hs = &ctx->hang_stats;
3117
3118 if (guilty) {
3119 hs->banned = i915_context_is_banned(dev_priv, ctx);
3120 hs->batch_active++;
3121 hs->guilty_ts = get_seconds();
3122 } else {
3123 hs->batch_pending++;
3124 }
3125 }
3126
3127 static void i915_gem_free_request(struct drm_i915_gem_request *request)
3128 {
3129 list_del(&request->list);
3130 i915_gem_request_remove_from_client(request);
3131
3132 if (request->ctx)
3133 i915_gem_context_unreference(request->ctx);
3134
3135 kfree(request);
3136 }
3137
3138 struct drm_i915_gem_request *
3139 i915_gem_find_active_request(struct intel_ring_buffer *ring)
3140 {
3141 struct drm_i915_gem_request *request;
3142 u32 completed_seqno;
3143
3144 completed_seqno = ring->get_seqno(ring, false);
3145
3146 list_for_each_entry(request, &ring->request_list, list) {
3147 if (i915_seqno_passed(completed_seqno, request->seqno))
3148 continue;
3149
3150 return request;
3151 }
3152
3153 return NULL;
3154 }
3155
3156 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
3157 struct intel_ring_buffer *ring)
3158 {
3159 struct drm_i915_gem_request *request;
3160 bool ring_hung;
3161
3162 request = i915_gem_find_active_request(ring);
3163
3164 if (request == NULL)
3165 return;
3166
3167 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
3168
3169 i915_set_reset_status(dev_priv, request->ctx, ring_hung);
3170
3171 list_for_each_entry_continue(request, &ring->request_list, list)
3172 i915_set_reset_status(dev_priv, request->ctx, false);
3173 }
3174
3175 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
3176 struct intel_ring_buffer *ring)
3177 {
3178 while (!list_empty(&ring->active_list)) {
3179 struct drm_i915_gem_object *obj;
3180
3181 obj = list_first_entry(&ring->active_list,
3182 struct drm_i915_gem_object,
3183 ring_list);
3184
3185 i915_gem_object_move_to_inactive(obj);
3186 }
3187
3188 /*
3189 * We must free the requests after all the corresponding objects have
3190 * been moved off active lists. Which is the same order as the normal
3191 * retire_requests function does. This is important if object hold
3192 * implicit references on things like e.g. ppgtt address spaces through
3193 * the request.
3194 */
3195 while (!list_empty(&ring->request_list)) {
3196 struct drm_i915_gem_request *request;
3197
3198 request = list_first_entry(&ring->request_list,
3199 struct drm_i915_gem_request,
3200 list);
3201
3202 i915_gem_free_request(request);
3203 }
3204 }
3205
3206 void i915_gem_restore_fences(struct drm_device *dev)
3207 {
3208 struct drm_i915_private *dev_priv = dev->dev_private;
3209 int i;
3210
3211 for (i = 0; i < dev_priv->num_fence_regs; i++) {
3212 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
3213
3214 /*
3215 * Commit delayed tiling changes if we have an object still
3216 * attached to the fence, otherwise just clear the fence.
3217 */
3218 if (reg->obj) {
3219 i915_gem_object_update_fence(reg->obj, reg,
3220 reg->obj->tiling_mode);
3221 } else {
3222 i915_gem_write_fence(dev, i, NULL);
3223 }
3224 }
3225 }
3226
3227 void i915_gem_reset(struct drm_device *dev)
3228 {
3229 struct drm_i915_private *dev_priv = dev->dev_private;
3230 struct intel_ring_buffer *ring;
3231 int i;
3232
3233 /*
3234 * Before we free the objects from the requests, we need to inspect
3235 * them for finding the guilty party. As the requests only borrow
3236 * their reference to the objects, the inspection must be done first.
3237 */
3238 for_each_ring(ring, dev_priv, i)
3239 i915_gem_reset_ring_status(dev_priv, ring);
3240
3241 for_each_ring(ring, dev_priv, i)
3242 i915_gem_reset_ring_cleanup(dev_priv, ring);
3243
3244 i915_gem_cleanup_ringbuffer(dev);
3245
3246 i915_gem_context_reset(dev);
3247
3248 i915_gem_restore_fences(dev);
3249 }
3250
3251 /**
3252 * This function clears the request list as sequence numbers are passed.
3253 */
3254 static void
3255 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
3256 {
3257 uint32_t seqno;
3258
3259 if (list_empty(&ring->request_list))
3260 return;
3261
3262 WARN_ON(i915_verify_lists(ring->dev));
3263
3264 seqno = ring->get_seqno(ring, true);
3265
3266 /* Move any buffers on the active list that are no longer referenced
3267 * by the ringbuffer to the flushing/inactive lists as appropriate,
3268 * before we free the context associated with the requests.
3269 */
3270 while (!list_empty(&ring->active_list)) {
3271 struct drm_i915_gem_object *obj;
3272
3273 obj = list_first_entry(&ring->active_list,
3274 struct drm_i915_gem_object,
3275 ring_list);
3276
3277 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
3278 break;
3279
3280 i915_gem_object_move_to_inactive(obj);
3281 }
3282
3283
3284 while (!list_empty(&ring->request_list)) {
3285 struct drm_i915_gem_request *request;
3286
3287 request = list_first_entry(&ring->request_list,
3288 struct drm_i915_gem_request,
3289 list);
3290
3291 if (!i915_seqno_passed(seqno, request->seqno))
3292 break;
3293
3294 trace_i915_gem_request_retire(ring, request->seqno);
3295 /* We know the GPU must have read the request to have
3296 * sent us the seqno + interrupt, so use the position
3297 * of tail of the request to update the last known position
3298 * of the GPU head.
3299 */
3300 ring->last_retired_head = request->tail;
3301
3302 i915_gem_free_request(request);
3303 }
3304
3305 if (unlikely(ring->trace_irq_seqno &&
3306 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
3307 ring->irq_put(ring);
3308 ring->trace_irq_seqno = 0;
3309 }
3310
3311 WARN_ON(i915_verify_lists(ring->dev));
3312 }
3313
3314 bool
3315 i915_gem_retire_requests(struct drm_device *dev)
3316 {
3317 struct drm_i915_private *dev_priv = dev->dev_private;
3318 struct intel_ring_buffer *ring;
3319 bool idle = true;
3320 int i;
3321
3322 for_each_ring(ring, dev_priv, i) {
3323 i915_gem_retire_requests_ring(ring);
3324 idle &= list_empty(&ring->request_list);
3325 }
3326
3327 if (idle)
3328 mod_delayed_work(dev_priv->wq,
3329 &dev_priv->mm.idle_work,
3330 msecs_to_jiffies(100));
3331
3332 return idle;
3333 }
3334
3335 static void
3336 i915_gem_retire_work_handler(struct work_struct *work)
3337 {
3338 struct drm_i915_private *dev_priv =
3339 container_of(work, typeof(*dev_priv), mm.retire_work.work);
3340 struct drm_device *dev = dev_priv->dev;
3341 bool idle;
3342
3343 /* Come back later if the device is busy... */
3344 idle = false;
3345 if (mutex_trylock(&dev->struct_mutex)) {
3346 idle = i915_gem_retire_requests(dev);
3347 mutex_unlock(&dev->struct_mutex);
3348 }
3349 if (!idle)
3350 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3351 round_jiffies_up_relative(HZ));
3352 }
3353
3354 static void
3355 i915_gem_idle_work_handler(struct work_struct *work)
3356 {
3357 struct drm_i915_private *dev_priv =
3358 container_of(work, typeof(*dev_priv), mm.idle_work.work);
3359
3360 intel_mark_idle(dev_priv->dev);
3361 }
3362
3363 /**
3364 * Ensures that an object will eventually get non-busy by flushing any required
3365 * write domains, emitting any outstanding lazy request and retiring and
3366 * completed requests.
3367 */
3368 static int
3369 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3370 {
3371 int ret;
3372
3373 if (obj->active) {
3374 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
3375 if (ret)
3376 return ret;
3377
3378 i915_gem_retire_requests_ring(obj->ring);
3379 }
3380
3381 return 0;
3382 }
3383
3384 /**
3385 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3386 * @DRM_IOCTL_ARGS: standard ioctl arguments
3387 *
3388 * Returns 0 if successful, else an error is returned with the remaining time in
3389 * the timeout parameter.
3390 * -ETIME: object is still busy after timeout
3391 * -ERESTARTSYS: signal interrupted the wait
3392 * -ENONENT: object doesn't exist
3393 * Also possible, but rare:
3394 * -EAGAIN: GPU wedged
3395 * -ENOMEM: damn
3396 * -ENODEV: Internal IRQ fail
3397 * -E?: The add request failed
3398 *
3399 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3400 * non-zero timeout parameter the wait ioctl will wait for the given number of
3401 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3402 * without holding struct_mutex the object may become re-busied before this
3403 * function completes. A similar but shorter * race condition exists in the busy
3404 * ioctl
3405 */
3406 int
3407 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3408 {
3409 struct drm_i915_private *dev_priv = dev->dev_private;
3410 struct drm_i915_gem_wait *args = data;
3411 struct drm_gem_object *gobj;
3412 struct drm_i915_gem_object *obj;
3413 struct intel_ring_buffer *ring = NULL;
3414 struct timespec timeout_stack, *timeout = NULL;
3415 unsigned reset_counter;
3416 u32 seqno = 0;
3417 int ret = 0;
3418
3419 if (args->timeout_ns >= 0) {
3420 timeout_stack = ns_to_timespec(args->timeout_ns);
3421 timeout = &timeout_stack;
3422 }
3423
3424 ret = i915_mutex_lock_interruptible(dev);
3425 if (ret)
3426 return ret;
3427
3428 gobj = drm_gem_object_lookup(dev, file, args->bo_handle);
3429 if (gobj == NULL) {
3430 mutex_unlock(&dev->struct_mutex);
3431 return -ENOENT;
3432 }
3433 obj = to_intel_bo(gobj);
3434
3435 /* Need to make sure the object gets inactive eventually. */
3436 ret = i915_gem_object_flush_active(obj);
3437 if (ret)
3438 goto out;
3439
3440 if (obj->active) {
3441 seqno = obj->last_read_seqno;
3442 ring = obj->ring;
3443 }
3444
3445 if (seqno == 0)
3446 goto out;
3447
3448 /* Do this after OLR check to make sure we make forward progress polling
3449 * on this IOCTL with a 0 timeout (like busy ioctl)
3450 */
3451 if (!args->timeout_ns) {
3452 ret = -ETIME;
3453 goto out;
3454 }
3455
3456 drm_gem_object_unreference(&obj->base);
3457 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3458 mutex_unlock(&dev->struct_mutex);
3459
3460 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
3461 if (timeout)
3462 args->timeout_ns = timespec_to_ns(timeout);
3463 return ret;
3464
3465 out:
3466 drm_gem_object_unreference(&obj->base);
3467 mutex_unlock(&dev->struct_mutex);
3468 return ret;
3469 }
3470
3471 /**
3472 * i915_gem_object_sync - sync an object to a ring.
3473 *
3474 * @obj: object which may be in use on another ring.
3475 * @to: ring we wish to use the object on. May be NULL.
3476 *
3477 * This code is meant to abstract object synchronization with the GPU.
3478 * Calling with NULL implies synchronizing the object with the CPU
3479 * rather than a particular GPU ring.
3480 *
3481 * Returns 0 if successful, else propagates up the lower layer error.
3482 */
3483 int
3484 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3485 struct intel_ring_buffer *to)
3486 {
3487 struct intel_ring_buffer *from = obj->ring;
3488 u32 seqno;
3489 int ret, idx;
3490
3491 if (from == NULL || to == from)
3492 return 0;
3493
3494 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
3495 return i915_gem_object_wait_rendering(obj, false);
3496
3497 idx = intel_ring_sync_index(from, to);
3498
3499 seqno = obj->last_read_seqno;
3500 if (seqno <= from->sync_seqno[idx])
3501 return 0;
3502
3503 ret = i915_gem_check_olr(obj->ring, seqno);
3504 if (ret)
3505 return ret;
3506
3507 trace_i915_gem_ring_sync_to(from, to, seqno);
3508 ret = to->sync_to(to, from, seqno);
3509 if (!ret)
3510 /* We use last_read_seqno because sync_to()
3511 * might have just caused seqno wrap under
3512 * the radar.
3513 */
3514 from->sync_seqno[idx] = obj->last_read_seqno;
3515
3516 return ret;
3517 }
3518
3519 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3520 {
3521 u32 old_write_domain, old_read_domains;
3522
3523 /* Force a pagefault for domain tracking on next user access */
3524 i915_gem_release_mmap(obj);
3525
3526 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3527 return;
3528
3529 /* Wait for any direct GTT access to complete */
3530 mb();
3531
3532 old_read_domains = obj->base.read_domains;
3533 old_write_domain = obj->base.write_domain;
3534
3535 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3536 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3537
3538 trace_i915_gem_object_change_domain(obj,
3539 old_read_domains,
3540 old_write_domain);
3541 }
3542
3543 int i915_vma_unbind(struct i915_vma *vma)
3544 {
3545 struct drm_i915_gem_object *obj = vma->obj;
3546 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3547 int ret;
3548
3549 if (list_empty(&vma->vma_link))
3550 return 0;
3551
3552 if (!drm_mm_node_allocated(&vma->node)) {
3553 i915_gem_vma_destroy(vma);
3554 return 0;
3555 }
3556
3557 if (vma->pin_count)
3558 return -EBUSY;
3559
3560 BUG_ON(obj->pages == NULL);
3561
3562 ret = i915_gem_object_finish_gpu(obj);
3563 if (ret)
3564 return ret;
3565 /* Continue on if we fail due to EIO, the GPU is hung so we
3566 * should be safe and we need to cleanup or else we might
3567 * cause memory corruption through use-after-free.
3568 */
3569
3570 i915_gem_object_finish_gtt(obj);
3571
3572 /* release the fence reg _after_ flushing */
3573 ret = i915_gem_object_put_fence(obj);
3574 if (ret)
3575 return ret;
3576
3577 trace_i915_vma_unbind(vma);
3578
3579 vma->unbind_vma(vma);
3580
3581 i915_gem_gtt_finish_object(obj);
3582
3583 list_del_init(&vma->mm_list);
3584 /* Avoid an unnecessary call to unbind on rebind. */
3585 if (i915_is_ggtt(vma->vm))
3586 obj->map_and_fenceable = true;
3587
3588 drm_mm_remove_node(&vma->node);
3589 i915_gem_vma_destroy(vma);
3590
3591 /* Since the unbound list is global, only move to that list if
3592 * no more VMAs exist. */
3593 if (list_empty(&obj->vma_list))
3594 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3595
3596 /* And finally now the object is completely decoupled from this vma,
3597 * we can drop its hold on the backing storage and allow it to be
3598 * reaped by the shrinker.
3599 */
3600 i915_gem_object_unpin_pages(obj);
3601
3602 return 0;
3603 }
3604
3605 int i915_gpu_idle(struct drm_device *dev)
3606 {
3607 struct drm_i915_private *dev_priv = dev->dev_private;
3608 struct intel_ring_buffer *ring;
3609 int ret, i;
3610
3611 /* Flush everything onto the inactive list. */
3612 for_each_ring(ring, dev_priv, i) {
3613 ret = i915_switch_context(ring, ring->default_context);
3614 if (ret)
3615 return ret;
3616
3617 ret = intel_ring_idle(ring);
3618 if (ret)
3619 return ret;
3620 }
3621
3622 return 0;
3623 }
3624
3625 static void i965_write_fence_reg(struct drm_device *dev, int reg,
3626 struct drm_i915_gem_object *obj)
3627 {
3628 struct drm_i915_private *dev_priv = dev->dev_private;
3629 int fence_reg;
3630 int fence_pitch_shift;
3631
3632 if (INTEL_INFO(dev)->gen >= 6) {
3633 fence_reg = FENCE_REG_SANDYBRIDGE_0;
3634 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
3635 } else {
3636 fence_reg = FENCE_REG_965_0;
3637 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
3638 }
3639
3640 fence_reg += reg * 8;
3641
3642 /* To w/a incoherency with non-atomic 64-bit register updates,
3643 * we split the 64-bit update into two 32-bit writes. In order
3644 * for a partial fence not to be evaluated between writes, we
3645 * precede the update with write to turn off the fence register,
3646 * and only enable the fence as the last step.
3647 *
3648 * For extra levels of paranoia, we make sure each step lands
3649 * before applying the next step.
3650 */
3651 I915_WRITE(fence_reg, 0);
3652 POSTING_READ(fence_reg);
3653
3654 if (obj) {
3655 u32 size = i915_gem_obj_ggtt_size(obj);
3656 uint64_t val;
3657
3658 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
3659 0xfffff000) << 32;
3660 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
3661 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
3662 if (obj->tiling_mode == I915_TILING_Y)
3663 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3664 val |= I965_FENCE_REG_VALID;
3665
3666 I915_WRITE(fence_reg + 4, val >> 32);
3667 POSTING_READ(fence_reg + 4);
3668
3669 I915_WRITE(fence_reg + 0, val);
3670 POSTING_READ(fence_reg);
3671 } else {
3672 I915_WRITE(fence_reg + 4, 0);
3673 POSTING_READ(fence_reg + 4);
3674 }
3675 }
3676
3677 static void i915_write_fence_reg(struct drm_device *dev, int reg,
3678 struct drm_i915_gem_object *obj)
3679 {
3680 struct drm_i915_private *dev_priv = dev->dev_private;
3681 u32 val;
3682
3683 if (obj) {
3684 u32 size = i915_gem_obj_ggtt_size(obj);
3685 int pitch_val;
3686 int tile_width;
3687
3688 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
3689 (size & -size) != size ||
3690 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3691 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3692 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
3693
3694 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
3695 tile_width = 128;
3696 else
3697 tile_width = 512;
3698
3699 /* Note: pitch better be a power of two tile widths */
3700 pitch_val = obj->stride / tile_width;
3701 pitch_val = ffs(pitch_val) - 1;
3702
3703 val = i915_gem_obj_ggtt_offset(obj);
3704 if (obj->tiling_mode == I915_TILING_Y)
3705 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3706 val |= I915_FENCE_SIZE_BITS(size);
3707 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3708 val |= I830_FENCE_REG_VALID;
3709 } else
3710 val = 0;
3711
3712 if (reg < 8)
3713 reg = FENCE_REG_830_0 + reg * 4;
3714 else
3715 reg = FENCE_REG_945_8 + (reg - 8) * 4;
3716
3717 I915_WRITE(reg, val);
3718 POSTING_READ(reg);
3719 }
3720
3721 static void i830_write_fence_reg(struct drm_device *dev, int reg,
3722 struct drm_i915_gem_object *obj)
3723 {
3724 struct drm_i915_private *dev_priv = dev->dev_private;
3725 uint32_t val;
3726
3727 if (obj) {
3728 u32 size = i915_gem_obj_ggtt_size(obj);
3729 uint32_t pitch_val;
3730
3731 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
3732 (size & -size) != size ||
3733 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3734 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
3735 i915_gem_obj_ggtt_offset(obj), size);
3736
3737 pitch_val = obj->stride / 128;
3738 pitch_val = ffs(pitch_val) - 1;
3739
3740 val = i915_gem_obj_ggtt_offset(obj);
3741 if (obj->tiling_mode == I915_TILING_Y)
3742 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3743 val |= I830_FENCE_SIZE_BITS(size);
3744 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3745 val |= I830_FENCE_REG_VALID;
3746 } else
3747 val = 0;
3748
3749 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
3750 POSTING_READ(FENCE_REG_830_0 + reg * 4);
3751 }
3752
3753 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
3754 {
3755 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
3756 }
3757
3758 static void i915_gem_write_fence(struct drm_device *dev, int reg,
3759 struct drm_i915_gem_object *obj)
3760 {
3761 struct drm_i915_private *dev_priv = dev->dev_private;
3762
3763 /* Ensure that all CPU reads are completed before installing a fence
3764 * and all writes before removing the fence.
3765 */
3766 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
3767 mb();
3768
3769 WARN(obj && (!obj->stride || !obj->tiling_mode),
3770 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
3771 obj->stride, obj->tiling_mode);
3772
3773 switch (INTEL_INFO(dev)->gen) {
3774 case 8:
3775 case 7:
3776 case 6:
3777 case 5:
3778 case 4: i965_write_fence_reg(dev, reg, obj); break;
3779 case 3: i915_write_fence_reg(dev, reg, obj); break;
3780 case 2: i830_write_fence_reg(dev, reg, obj); break;
3781 default: BUG();
3782 }
3783
3784 /* And similarly be paranoid that no direct access to this region
3785 * is reordered to before the fence is installed.
3786 */
3787 if (i915_gem_object_needs_mb(obj))
3788 mb();
3789 }
3790
3791 static inline int fence_number(struct drm_i915_private *dev_priv,
3792 struct drm_i915_fence_reg *fence)
3793 {
3794 return fence - dev_priv->fence_regs;
3795 }
3796
3797 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
3798 struct drm_i915_fence_reg *fence,
3799 bool enable)
3800 {
3801 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3802 int reg = fence_number(dev_priv, fence);
3803
3804 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
3805
3806 if (enable) {
3807 obj->fence_reg = reg;
3808 fence->obj = obj;
3809 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
3810 } else {
3811 obj->fence_reg = I915_FENCE_REG_NONE;
3812 fence->obj = NULL;
3813 list_del_init(&fence->lru_list);
3814 }
3815 obj->fence_dirty = false;
3816 }
3817
3818 static int
3819 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
3820 {
3821 if (obj->last_fenced_seqno) {
3822 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
3823 if (ret)
3824 return ret;
3825
3826 obj->last_fenced_seqno = 0;
3827 }
3828
3829 obj->fenced_gpu_access = false;
3830 return 0;
3831 }
3832
3833 int
3834 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
3835 {
3836 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3837 struct drm_i915_fence_reg *fence;
3838 int ret;
3839
3840 ret = i915_gem_object_wait_fence(obj);
3841 if (ret)
3842 return ret;
3843
3844 if (obj->fence_reg == I915_FENCE_REG_NONE)
3845 return 0;
3846
3847 fence = &dev_priv->fence_regs[obj->fence_reg];
3848
3849 i915_gem_object_fence_lost(obj);
3850 i915_gem_object_update_fence(obj, fence, false);
3851
3852 return 0;
3853 }
3854
3855 static struct drm_i915_fence_reg *
3856 i915_find_fence_reg(struct drm_device *dev)
3857 {
3858 struct drm_i915_private *dev_priv = dev->dev_private;
3859 struct drm_i915_fence_reg *reg, *avail;
3860 int i;
3861
3862 /* First try to find a free reg */
3863 avail = NULL;
3864 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
3865 reg = &dev_priv->fence_regs[i];
3866 if (!reg->obj)
3867 return reg;
3868
3869 if (!reg->pin_count)
3870 avail = reg;
3871 }
3872
3873 if (avail == NULL)
3874 goto deadlock;
3875
3876 /* None available, try to steal one or wait for a user to finish */
3877 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
3878 if (reg->pin_count)
3879 continue;
3880
3881 return reg;
3882 }
3883
3884 deadlock:
3885 /* Wait for completion of pending flips which consume fences */
3886 if (intel_has_pending_fb_unpin(dev))
3887 return ERR_PTR(-EAGAIN);
3888
3889 return ERR_PTR(-EDEADLK);
3890 }
3891
3892 /**
3893 * i915_gem_object_get_fence - set up fencing for an object
3894 * @obj: object to map through a fence reg
3895 *
3896 * When mapping objects through the GTT, userspace wants to be able to write
3897 * to them without having to worry about swizzling if the object is tiled.
3898 * This function walks the fence regs looking for a free one for @obj,
3899 * stealing one if it can't find any.
3900 *
3901 * It then sets up the reg based on the object's properties: address, pitch
3902 * and tiling format.
3903 *
3904 * For an untiled surface, this removes any existing fence.
3905 */
3906 int
3907 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
3908 {
3909 struct drm_device *dev = obj->base.dev;
3910 struct drm_i915_private *dev_priv = dev->dev_private;
3911 bool enable = obj->tiling_mode != I915_TILING_NONE;
3912 struct drm_i915_fence_reg *reg;
3913 int ret;
3914
3915 /* Have we updated the tiling parameters upon the object and so
3916 * will need to serialise the write to the associated fence register?
3917 */
3918 if (obj->fence_dirty) {
3919 ret = i915_gem_object_wait_fence(obj);
3920 if (ret)
3921 return ret;
3922 }
3923
3924 /* Just update our place in the LRU if our fence is getting reused. */
3925 if (obj->fence_reg != I915_FENCE_REG_NONE) {
3926 reg = &dev_priv->fence_regs[obj->fence_reg];
3927 if (!obj->fence_dirty) {
3928 list_move_tail(®->lru_list,
3929 &dev_priv->mm.fence_list);
3930 return 0;
3931 }
3932 } else if (enable) {
3933 reg = i915_find_fence_reg(dev);
3934 if (IS_ERR(reg))
3935 return PTR_ERR(reg);
3936
3937 if (reg->obj) {
3938 struct drm_i915_gem_object *old = reg->obj;
3939
3940 ret = i915_gem_object_wait_fence(old);
3941 if (ret)
3942 return ret;
3943
3944 i915_gem_object_fence_lost(old);
3945 }
3946 } else
3947 return 0;
3948
3949 i915_gem_object_update_fence(obj, reg, enable);
3950
3951 return 0;
3952 }
3953
3954 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
3955 struct drm_mm_node *gtt_space,
3956 unsigned long cache_level)
3957 {
3958 struct drm_mm_node *other;
3959
3960 /* On non-LLC machines we have to be careful when putting differing
3961 * types of snoopable memory together to avoid the prefetcher
3962 * crossing memory domains and dying.
3963 */
3964 if (HAS_LLC(dev))
3965 return true;
3966
3967 if (!drm_mm_node_allocated(gtt_space))
3968 return true;
3969
3970 if (list_empty(>t_space->node_list))
3971 return true;
3972
3973 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3974 if (other->allocated && !other->hole_follows && other->color != cache_level)
3975 return false;
3976
3977 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3978 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3979 return false;
3980
3981 return true;
3982 }
3983
3984 static void i915_gem_verify_gtt(struct drm_device *dev)
3985 {
3986 #if WATCH_GTT
3987 struct drm_i915_private *dev_priv = dev->dev_private;
3988 struct drm_i915_gem_object *obj;
3989 int err = 0;
3990
3991 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3992 if (obj->gtt_space == NULL) {
3993 printk(KERN_ERR "object found on GTT list with no space reserved\n");
3994 err++;
3995 continue;
3996 }
3997
3998 if (obj->cache_level != obj->gtt_space->color) {
3999 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
4000 i915_gem_obj_ggtt_offset(obj),
4001 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
4002 obj->cache_level,
4003 obj->gtt_space->color);
4004 err++;
4005 continue;
4006 }
4007
4008 if (!i915_gem_valid_gtt_space(dev,
4009 obj->gtt_space,
4010 obj->cache_level)) {
4011 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
4012 i915_gem_obj_ggtt_offset(obj),
4013 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
4014 obj->cache_level);
4015 err++;
4016 continue;
4017 }
4018 }
4019
4020 WARN_ON(err);
4021 #endif
4022 }
4023
4024 /**
4025 * Finds free space in the GTT aperture and binds the object there.
4026 */
4027 static struct i915_vma *
4028 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
4029 struct i915_address_space *vm,
4030 unsigned alignment,
4031 uint64_t flags)
4032 {
4033 struct drm_device *dev = obj->base.dev;
4034 struct drm_i915_private *dev_priv = dev->dev_private;
4035 u32 size, fence_size, fence_alignment, unfenced_alignment;
4036 unsigned long start =
4037 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
4038 unsigned long end =
4039 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
4040 struct i915_vma *vma;
4041 int ret;
4042
4043 fence_size = i915_gem_get_gtt_size(dev,
4044 obj->base.size,
4045 obj->tiling_mode);
4046 fence_alignment = i915_gem_get_gtt_alignment(dev,
4047 obj->base.size,
4048 obj->tiling_mode, true);
4049 unfenced_alignment =
4050 i915_gem_get_gtt_alignment(dev,
4051 obj->base.size,
4052 obj->tiling_mode, false);
4053
4054 if (alignment == 0)
4055 alignment = flags & PIN_MAPPABLE ? fence_alignment :
4056 unfenced_alignment;
4057 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
4058 DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
4059 return ERR_PTR(-EINVAL);
4060 }
4061
4062 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
4063
4064 /* If the object is bigger than the entire aperture, reject it early
4065 * before evicting everything in a vain attempt to find space.
4066 */
4067 if (obj->base.size > end) {
4068 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
4069 obj->base.size,
4070 flags & PIN_MAPPABLE ? "mappable" : "total",
4071 end);
4072 return ERR_PTR(-E2BIG);
4073 }
4074
4075 ret = i915_gem_object_get_pages(obj);
4076 if (ret)
4077 return ERR_PTR(ret);
4078
4079 i915_gem_object_pin_pages(obj);
4080
4081 vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
4082 if (IS_ERR(vma))
4083 goto err_unpin;
4084
4085 search_free:
4086 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
4087 size, alignment,
4088 obj->cache_level,
4089 start, end,
4090 DRM_MM_SEARCH_DEFAULT,
4091 DRM_MM_CREATE_DEFAULT);
4092 if (ret) {
4093 ret = i915_gem_evict_something(dev, vm, size, alignment,
4094 obj->cache_level,
4095 start, end,
4096 flags);
4097 if (ret == 0)
4098 goto search_free;
4099
4100 goto err_free_vma;
4101 }
4102 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
4103 obj->cache_level))) {
4104 ret = -EINVAL;
4105 goto err_remove_node;
4106 }
4107
4108 ret = i915_gem_gtt_prepare_object(obj);
4109 if (ret)
4110 goto err_remove_node;
4111
4112 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
4113 list_add_tail(&vma->mm_list, &vm->inactive_list);
4114
4115 if (i915_is_ggtt(vm)) {
4116 bool mappable, fenceable;
4117
4118 fenceable = (vma->node.size == fence_size &&
4119 (vma->node.start & (fence_alignment - 1)) == 0);
4120
4121 mappable = (vma->node.start + obj->base.size <=
4122 dev_priv->gtt.mappable_end);
4123
4124 obj->map_and_fenceable = mappable && fenceable;
4125 }
4126
4127 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4128
4129 trace_i915_vma_bind(vma, flags);
4130 vma->bind_vma(vma, obj->cache_level,
4131 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
4132
4133 i915_gem_verify_gtt(dev);
4134 return vma;
4135
4136 err_remove_node:
4137 drm_mm_remove_node(&vma->node);
4138 err_free_vma:
4139 i915_gem_vma_destroy(vma);
4140 vma = ERR_PTR(ret);
4141 err_unpin:
4142 i915_gem_object_unpin_pages(obj);
4143 return vma;
4144 }
4145
4146 bool
4147 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
4148 bool force)
4149 {
4150 /* If we don't have a page list set up, then we're not pinned
4151 * to GPU, and we can ignore the cache flush because it'll happen
4152 * again at bind time.
4153 */
4154 if (obj->pages == NULL)
4155 return false;
4156
4157 /*
4158 * Stolen memory is always coherent with the GPU as it is explicitly
4159 * marked as wc by the system, or the system is cache-coherent.
4160 */
4161 if (obj->stolen)
4162 return false;
4163
4164 /* If the GPU is snooping the contents of the CPU cache,
4165 * we do not need to manually clear the CPU cache lines. However,
4166 * the caches are only snooped when the render cache is
4167 * flushed/invalidated. As we always have to emit invalidations
4168 * and flushes when moving into and out of the RENDER domain, correct
4169 * snooping behaviour occurs naturally as the result of our domain
4170 * tracking.
4171 */
4172 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
4173 return false;
4174
4175 trace_i915_gem_object_clflush(obj);
4176 #ifdef __NetBSD__
4177 drm_clflush_pglist(&obj->igo_pageq);
4178 #else
4179 drm_clflush_sg(obj->pages);
4180 #endif
4181
4182 return true;
4183 }
4184
4185 /** Flushes the GTT write domain for the object if it's dirty. */
4186 static void
4187 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
4188 {
4189 uint32_t old_write_domain;
4190
4191 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
4192 return;
4193
4194 /* No actual flushing is required for the GTT write domain. Writes
4195 * to it immediately go to main memory as far as we know, so there's
4196 * no chipset flush. It also doesn't land in render cache.
4197 *
4198 * However, we do have to enforce the order so that all writes through
4199 * the GTT land before any writes to the device, such as updates to
4200 * the GATT itself.
4201 */
4202 wmb();
4203
4204 old_write_domain = obj->base.write_domain;
4205 obj->base.write_domain = 0;
4206
4207 trace_i915_gem_object_change_domain(obj,
4208 obj->base.read_domains,
4209 old_write_domain);
4210 }
4211
4212 /** Flushes the CPU write domain for the object if it's dirty. */
4213 static void
4214 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
4215 bool force)
4216 {
4217 uint32_t old_write_domain;
4218
4219 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
4220 return;
4221
4222 if (i915_gem_clflush_object(obj, force))
4223 i915_gem_chipset_flush(obj->base.dev);
4224
4225 old_write_domain = obj->base.write_domain;
4226 obj->base.write_domain = 0;
4227
4228 trace_i915_gem_object_change_domain(obj,
4229 obj->base.read_domains,
4230 old_write_domain);
4231 }
4232
4233 /**
4234 * Moves a single object to the GTT read, and possibly write domain.
4235 *
4236 * This function returns when the move is complete, including waiting on
4237 * flushes to occur.
4238 */
4239 int
4240 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4241 {
4242 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4243 uint32_t old_write_domain, old_read_domains;
4244 int ret;
4245
4246 /* Not valid to be called on unbound objects. */
4247 if (!i915_gem_obj_bound_any(obj))
4248 return -EINVAL;
4249
4250 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4251 return 0;
4252
4253 ret = i915_gem_object_wait_rendering(obj, !write);
4254 if (ret)
4255 return ret;
4256
4257 i915_gem_object_flush_cpu_write_domain(obj, false);
4258
4259 /* Serialise direct access to this object with the barriers for
4260 * coherent writes from the GPU, by effectively invalidating the
4261 * GTT domain upon first access.
4262 */
4263 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
4264 mb();
4265
4266 old_write_domain = obj->base.write_domain;
4267 old_read_domains = obj->base.read_domains;
4268
4269 /* It should now be out of any other write domains, and we can update
4270 * the domain values for our changes.
4271 */
4272 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4273 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4274 if (write) {
4275 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
4276 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
4277 obj->dirty = 1;
4278 }
4279
4280 trace_i915_gem_object_change_domain(obj,
4281 old_read_domains,
4282 old_write_domain);
4283
4284 /* And bump the LRU for this access */
4285 if (i915_gem_object_is_inactive(obj)) {
4286 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4287 if (vma)
4288 list_move_tail(&vma->mm_list,
4289 &dev_priv->gtt.base.inactive_list);
4290
4291 }
4292
4293 return 0;
4294 }
4295
4296 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4297 enum i915_cache_level cache_level)
4298 {
4299 struct drm_device *dev = obj->base.dev;
4300 struct i915_vma *vma, *next;
4301 int ret;
4302
4303 if (obj->cache_level == cache_level)
4304 return 0;
4305
4306 if (i915_gem_obj_is_pinned(obj)) {
4307 DRM_DEBUG("can not change the cache level of pinned objects\n");
4308 return -EBUSY;
4309 }
4310
4311 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4312 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
4313 ret = i915_vma_unbind(vma);
4314 if (ret)
4315 return ret;
4316 }
4317 }
4318
4319 if (i915_gem_obj_bound_any(obj)) {
4320 ret = i915_gem_object_finish_gpu(obj);
4321 if (ret)
4322 return ret;
4323
4324 i915_gem_object_finish_gtt(obj);
4325
4326 /* Before SandyBridge, you could not use tiling or fence
4327 * registers with snooped memory, so relinquish any fences
4328 * currently pointing to our region in the aperture.
4329 */
4330 if (INTEL_INFO(dev)->gen < 6) {
4331 ret = i915_gem_object_put_fence(obj);
4332 if (ret)
4333 return ret;
4334 }
4335
4336 list_for_each_entry(vma, &obj->vma_list, vma_link)
4337 if (drm_mm_node_allocated(&vma->node))
4338 vma->bind_vma(vma, cache_level,
4339 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
4340 }
4341
4342 list_for_each_entry(vma, &obj->vma_list, vma_link)
4343 vma->node.color = cache_level;
4344 obj->cache_level = cache_level;
4345
4346 if (cpu_write_needs_clflush(obj)) {
4347 u32 old_read_domains, old_write_domain;
4348
4349 /* If we're coming from LLC cached, then we haven't
4350 * actually been tracking whether the data is in the
4351 * CPU cache or not, since we only allow one bit set
4352 * in obj->write_domain and have been skipping the clflushes.
4353 * Just set it to the CPU cache for now.
4354 */
4355 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
4356
4357 old_read_domains = obj->base.read_domains;
4358 old_write_domain = obj->base.write_domain;
4359
4360 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4361 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4362
4363 trace_i915_gem_object_change_domain(obj,
4364 old_read_domains,
4365 old_write_domain);
4366 }
4367
4368 i915_gem_verify_gtt(dev);
4369 return 0;
4370 }
4371
4372 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4373 struct drm_file *file)
4374 {
4375 struct drm_i915_gem_caching *args = data;
4376 struct drm_gem_object *gobj;
4377 struct drm_i915_gem_object *obj;
4378 int ret;
4379
4380 ret = i915_mutex_lock_interruptible(dev);
4381 if (ret)
4382 return ret;
4383
4384 gobj = drm_gem_object_lookup(dev, file, args->handle);
4385 if (gobj == NULL) {
4386 ret = -ENOENT;
4387 goto unlock;
4388 }
4389 obj = to_intel_bo(gobj);
4390
4391 switch (obj->cache_level) {
4392 case I915_CACHE_LLC:
4393 case I915_CACHE_L3_LLC:
4394 args->caching = I915_CACHING_CACHED;
4395 break;
4396
4397 case I915_CACHE_WT:
4398 args->caching = I915_CACHING_DISPLAY;
4399 break;
4400
4401 default:
4402 args->caching = I915_CACHING_NONE;
4403 break;
4404 }
4405
4406 drm_gem_object_unreference(&obj->base);
4407 unlock:
4408 mutex_unlock(&dev->struct_mutex);
4409 return ret;
4410 }
4411
4412 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4413 struct drm_file *file)
4414 {
4415 struct drm_i915_gem_caching *args = data;
4416 struct drm_gem_object *gobj;
4417 struct drm_i915_gem_object *obj;
4418 enum i915_cache_level level;
4419 int ret;
4420
4421 switch (args->caching) {
4422 case I915_CACHING_NONE:
4423 level = I915_CACHE_NONE;
4424 break;
4425 case I915_CACHING_CACHED:
4426 level = I915_CACHE_LLC;
4427 break;
4428 case I915_CACHING_DISPLAY:
4429 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4430 break;
4431 default:
4432 return -EINVAL;
4433 }
4434
4435 ret = i915_mutex_lock_interruptible(dev);
4436 if (ret)
4437 return ret;
4438
4439 gobj = drm_gem_object_lookup(dev, file, args->handle);
4440 if (gobj == NULL) {
4441 ret = -ENOENT;
4442 goto unlock;
4443 }
4444 obj = to_intel_bo(gobj);
4445
4446 ret = i915_gem_object_set_cache_level(obj, level);
4447
4448 drm_gem_object_unreference(&obj->base);
4449 unlock:
4450 mutex_unlock(&dev->struct_mutex);
4451 return ret;
4452 }
4453
4454 static bool is_pin_display(struct drm_i915_gem_object *obj)
4455 {
4456 /* There are 3 sources that pin objects:
4457 * 1. The display engine (scanouts, sprites, cursors);
4458 * 2. Reservations for execbuffer;
4459 * 3. The user.
4460 *
4461 * We can ignore reservations as we hold the struct_mutex and
4462 * are only called outside of the reservation path. The user
4463 * can only increment pin_count once, and so if after
4464 * subtracting the potential reference by the user, any pin_count
4465 * remains, it must be due to another use by the display engine.
4466 */
4467 return i915_gem_obj_to_ggtt(obj)->pin_count - !!obj->user_pin_count;
4468 }
4469
4470 /*
4471 * Prepare buffer for display plane (scanout, cursors, etc).
4472 * Can be called from an uninterruptible phase (modesetting) and allows
4473 * any flushes to be pipelined (for pageflips).
4474 */
4475 int
4476 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4477 u32 alignment,
4478 struct intel_ring_buffer *pipelined)
4479 {
4480 u32 old_read_domains, old_write_domain;
4481 int ret;
4482
4483 if (pipelined != obj->ring) {
4484 ret = i915_gem_object_sync(obj, pipelined);
4485 if (ret)
4486 return ret;
4487 }
4488
4489 /* Mark the pin_display early so that we account for the
4490 * display coherency whilst setting up the cache domains.
4491 */
4492 obj->pin_display = true;
4493
4494 /* The display engine is not coherent with the LLC cache on gen6. As
4495 * a result, we make sure that the pinning that is about to occur is
4496 * done with uncached PTEs. This is lowest common denominator for all
4497 * chipsets.
4498 *
4499 * However for gen6+, we could do better by using the GFDT bit instead
4500 * of uncaching, which would allow us to flush all the LLC-cached data
4501 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4502 */
4503 ret = i915_gem_object_set_cache_level(obj,
4504 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4505 if (ret)
4506 goto err_unpin_display;
4507
4508 /* As the user may map the buffer once pinned in the display plane
4509 * (e.g. libkms for the bootup splash), we have to ensure that we
4510 * always use map_and_fenceable for all scanout buffers.
4511 */
4512 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
4513 if (ret)
4514 goto err_unpin_display;
4515
4516 i915_gem_object_flush_cpu_write_domain(obj, true);
4517
4518 old_write_domain = obj->base.write_domain;
4519 old_read_domains = obj->base.read_domains;
4520
4521 /* It should now be out of any other write domains, and we can update
4522 * the domain values for our changes.
4523 */
4524 obj->base.write_domain = 0;
4525 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4526
4527 trace_i915_gem_object_change_domain(obj,
4528 old_read_domains,
4529 old_write_domain);
4530
4531 return 0;
4532
4533 err_unpin_display:
4534 obj->pin_display = is_pin_display(obj);
4535 return ret;
4536 }
4537
4538 void
4539 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
4540 {
4541 i915_gem_object_ggtt_unpin(obj);
4542 obj->pin_display = is_pin_display(obj);
4543 }
4544
4545 int
4546 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
4547 {
4548 int ret;
4549
4550 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
4551 return 0;
4552
4553 ret = i915_gem_object_wait_rendering(obj, false);
4554 if (ret)
4555 return ret;
4556
4557 /* Ensure that we invalidate the GPU's caches and TLBs. */
4558 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
4559 return 0;
4560 }
4561
4562 /**
4563 * Moves a single object to the CPU read, and possibly write domain.
4564 *
4565 * This function returns when the move is complete, including waiting on
4566 * flushes to occur.
4567 */
4568 int
4569 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4570 {
4571 uint32_t old_write_domain, old_read_domains;
4572 int ret;
4573
4574 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4575 return 0;
4576
4577 ret = i915_gem_object_wait_rendering(obj, !write);
4578 if (ret)
4579 return ret;
4580
4581 i915_gem_object_flush_gtt_write_domain(obj);
4582
4583 old_write_domain = obj->base.write_domain;
4584 old_read_domains = obj->base.read_domains;
4585
4586 /* Flush the CPU cache if it's still invalid. */
4587 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4588 i915_gem_clflush_object(obj, false);
4589
4590 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4591 }
4592
4593 /* It should now be out of any other write domains, and we can update
4594 * the domain values for our changes.
4595 */
4596 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4597
4598 /* If we're writing through the CPU, then the GPU read domains will
4599 * need to be invalidated at next use.
4600 */
4601 if (write) {
4602 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4603 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4604 }
4605
4606 trace_i915_gem_object_change_domain(obj,
4607 old_read_domains,
4608 old_write_domain);
4609
4610 return 0;
4611 }
4612
4613 /* Throttle our rendering by waiting until the ring has completed our requests
4614 * emitted over 20 msec ago.
4615 *
4616 * Note that if we were to use the current jiffies each time around the loop,
4617 * we wouldn't escape the function with any frames outstanding if the time to
4618 * render a frame was over 20ms.
4619 *
4620 * This should get us reasonable parallelism between CPU and GPU but also
4621 * relatively low latency when blocking on a particular request to finish.
4622 */
4623 static int
4624 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4625 {
4626 struct drm_i915_private *dev_priv = dev->dev_private;
4627 struct drm_i915_file_private *file_priv = file->driver_priv;
4628 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
4629 struct drm_i915_gem_request *request;
4630 struct intel_ring_buffer *ring = NULL;
4631 unsigned reset_counter;
4632 u32 seqno = 0;
4633 int ret;
4634
4635 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4636 if (ret)
4637 return ret;
4638
4639 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4640 if (ret)
4641 return ret;
4642
4643 spin_lock(&file_priv->mm.lock);
4644 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4645 if (time_after_eq(request->emitted_jiffies, recent_enough))
4646 break;
4647
4648 ring = request->ring;
4649 seqno = request->seqno;
4650 }
4651 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
4652 spin_unlock(&file_priv->mm.lock);
4653
4654 if (seqno == 0)
4655 return 0;
4656
4657 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
4658 if (ret == 0)
4659 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4660
4661 return ret;
4662 }
4663
4664 static bool
4665 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4666 {
4667 struct drm_i915_gem_object *obj = vma->obj;
4668
4669 if (alignment &&
4670 vma->node.start & (alignment - 1))
4671 return true;
4672
4673 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4674 return true;
4675
4676 if (flags & PIN_OFFSET_BIAS &&
4677 vma->node.start < (flags & PIN_OFFSET_MASK))
4678 return true;
4679
4680 return false;
4681 }
4682
4683 int
4684 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4685 struct i915_address_space *vm,
4686 uint32_t alignment,
4687 uint64_t flags)
4688 {
4689 struct i915_vma *vma;
4690 int ret;
4691
4692 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4693 return -EINVAL;
4694
4695 vma = i915_gem_obj_to_vma(obj, vm);
4696 if (vma) {
4697 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4698 return -EBUSY;
4699
4700 if (i915_vma_misplaced(vma, alignment, flags)) {
4701 WARN(vma->pin_count,
4702 "bo is already pinned with incorrect alignment:"
4703 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
4704 " obj->map_and_fenceable=%d\n",
4705 i915_gem_obj_offset(obj, vm), alignment,
4706 !!(flags & PIN_MAPPABLE),
4707 obj->map_and_fenceable);
4708 ret = i915_vma_unbind(vma);
4709 if (ret)
4710 return ret;
4711
4712 vma = NULL;
4713 }
4714 }
4715
4716 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4717 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
4718 if (IS_ERR(vma))
4719 return PTR_ERR(vma);
4720 }
4721
4722 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
4723 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
4724
4725 vma->pin_count++;
4726 if (flags & PIN_MAPPABLE)
4727 obj->pin_mappable |= true;
4728
4729 return 0;
4730 }
4731
4732 void
4733 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
4734 {
4735 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4736
4737 BUG_ON(!vma);
4738 BUG_ON(vma->pin_count == 0);
4739 BUG_ON(!i915_gem_obj_ggtt_bound(obj));
4740
4741 if (--vma->pin_count == 0)
4742 obj->pin_mappable = false;
4743 }
4744
4745 int
4746 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4747 struct drm_file *file)
4748 {
4749 struct drm_i915_gem_pin *args = data;
4750 struct drm_gem_object *gobj;
4751 struct drm_i915_gem_object *obj;
4752 int ret;
4753
4754 if (INTEL_INFO(dev)->gen >= 6)
4755 return -ENODEV;
4756
4757 ret = i915_mutex_lock_interruptible(dev);
4758 if (ret)
4759 return ret;
4760
4761 gobj = drm_gem_object_lookup(dev, file, args->handle);
4762 if (gobj == NULL) {
4763 ret = -ENOENT;
4764 goto unlock;
4765 }
4766 obj = to_intel_bo(gobj);
4767
4768 if (obj->madv != I915_MADV_WILLNEED) {
4769 DRM_DEBUG("Attempting to pin a purgeable buffer\n");
4770 ret = -EFAULT;
4771 goto out;
4772 }
4773
4774 if (obj->pin_filp != NULL && obj->pin_filp != file) {
4775 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n",
4776 args->handle);
4777 ret = -EINVAL;
4778 goto out;
4779 }
4780
4781 if (obj->user_pin_count == ULONG_MAX) {
4782 ret = -EBUSY;
4783 goto out;
4784 }
4785
4786 if (obj->user_pin_count == 0) {
4787 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
4788 if (ret)
4789 goto out;
4790 }
4791
4792 obj->user_pin_count++;
4793 obj->pin_filp = file;
4794
4795 args->offset = i915_gem_obj_ggtt_offset(obj);
4796 out:
4797 drm_gem_object_unreference(&obj->base);
4798 unlock:
4799 mutex_unlock(&dev->struct_mutex);
4800 return ret;
4801 }
4802
4803 int
4804 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4805 struct drm_file *file)
4806 {
4807 struct drm_i915_gem_pin *args = data;
4808 struct drm_gem_object *gobj;
4809 struct drm_i915_gem_object *obj;
4810 int ret;
4811
4812 ret = i915_mutex_lock_interruptible(dev);
4813 if (ret)
4814 return ret;
4815
4816 gobj = drm_gem_object_lookup(dev, file, args->handle);
4817 if (gobj == NULL) {
4818 ret = -ENOENT;
4819 goto unlock;
4820 }
4821 obj = to_intel_bo(gobj);
4822
4823 if (obj->pin_filp != file) {
4824 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4825 args->handle);
4826 ret = -EINVAL;
4827 goto out;
4828 }
4829 obj->user_pin_count--;
4830 if (obj->user_pin_count == 0) {
4831 obj->pin_filp = NULL;
4832 i915_gem_object_ggtt_unpin(obj);
4833 }
4834
4835 out:
4836 drm_gem_object_unreference(&obj->base);
4837 unlock:
4838 mutex_unlock(&dev->struct_mutex);
4839 return ret;
4840 }
4841
4842 int
4843 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4844 struct drm_file *file)
4845 {
4846 struct drm_i915_gem_busy *args = data;
4847 struct drm_gem_object *gobj;
4848 struct drm_i915_gem_object *obj;
4849 int ret;
4850
4851 ret = i915_mutex_lock_interruptible(dev);
4852 if (ret)
4853 return ret;
4854
4855 gobj = drm_gem_object_lookup(dev, file, args->handle);
4856 if (gobj == NULL) {
4857 ret = -ENOENT;
4858 goto unlock;
4859 }
4860 obj = to_intel_bo(gobj);
4861
4862 /* Count all active objects as busy, even if they are currently not used
4863 * by the gpu. Users of this interface expect objects to eventually
4864 * become non-busy without any further actions, therefore emit any
4865 * necessary flushes here.
4866 */
4867 ret = i915_gem_object_flush_active(obj);
4868
4869 args->busy = obj->active;
4870 if (obj->ring) {
4871 BUILD_BUG_ON(I915_NUM_RINGS > 16);
4872 args->busy |= intel_ring_flag(obj->ring) << 16;
4873 }
4874
4875 drm_gem_object_unreference(&obj->base);
4876 unlock:
4877 mutex_unlock(&dev->struct_mutex);
4878 return ret;
4879 }
4880
4881 int
4882 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4883 struct drm_file *file_priv)
4884 {
4885 return i915_gem_ring_throttle(dev, file_priv);
4886 }
4887
4888 int
4889 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4890 struct drm_file *file_priv)
4891 {
4892 struct drm_i915_gem_madvise *args = data;
4893 struct drm_gem_object *gobj;
4894 struct drm_i915_gem_object *obj;
4895 int ret;
4896
4897 switch (args->madv) {
4898 case I915_MADV_DONTNEED:
4899 case I915_MADV_WILLNEED:
4900 break;
4901 default:
4902 return -EINVAL;
4903 }
4904
4905 ret = i915_mutex_lock_interruptible(dev);
4906 if (ret)
4907 return ret;
4908
4909 gobj = drm_gem_object_lookup(dev, file_priv, args->handle);
4910 if (gobj == NULL) {
4911 ret = -ENOENT;
4912 goto unlock;
4913 }
4914 obj = to_intel_bo(gobj);
4915
4916 if (i915_gem_obj_is_pinned(obj)) {
4917 ret = -EINVAL;
4918 goto out;
4919 }
4920
4921 if (obj->madv != __I915_MADV_PURGED)
4922 obj->madv = args->madv;
4923
4924 /* if the object is no longer attached, discard its backing storage */
4925 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
4926 i915_gem_object_truncate(obj);
4927
4928 args->retained = obj->madv != __I915_MADV_PURGED;
4929
4930 out:
4931 drm_gem_object_unreference(&obj->base);
4932 unlock:
4933 mutex_unlock(&dev->struct_mutex);
4934 return ret;
4935 }
4936
4937 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4938 const struct drm_i915_gem_object_ops *ops)
4939 {
4940 INIT_LIST_HEAD(&obj->global_list);
4941 INIT_LIST_HEAD(&obj->ring_list);
4942 INIT_LIST_HEAD(&obj->obj_exec_link);
4943 INIT_LIST_HEAD(&obj->vma_list);
4944
4945 obj->ops = ops;
4946
4947 obj->fence_reg = I915_FENCE_REG_NONE;
4948 obj->madv = I915_MADV_WILLNEED;
4949 /* Avoid an unnecessary call to unbind on the first bind. */
4950 obj->map_and_fenceable = true;
4951
4952 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4953 }
4954
4955 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4956 .get_pages = i915_gem_object_get_pages_gtt,
4957 .put_pages = i915_gem_object_put_pages_gtt,
4958 };
4959
4960 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4961 size_t size)
4962 {
4963 #ifdef __NetBSD__
4964 struct drm_i915_private *const dev_priv = dev->dev_private;
4965 #endif
4966 struct drm_i915_gem_object *obj;
4967 #ifndef __NetBSD__
4968 struct address_space *mapping;
4969 gfp_t mask;
4970 #endif
4971
4972 obj = i915_gem_object_alloc(dev);
4973 if (obj == NULL)
4974 return NULL;
4975
4976 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4977 i915_gem_object_free(obj);
4978 return NULL;
4979 }
4980
4981 #ifdef __NetBSD__
4982 uao_set_pgfl(obj->base.gemo_shm_uao, dev_priv->gtt.pgfl);
4983 #else
4984 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4985 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4986 /* 965gm cannot relocate objects above 4GiB. */
4987 mask &= ~__GFP_HIGHMEM;
4988 mask |= __GFP_DMA32;
4989 }
4990
4991 mapping = file_inode(obj->base.filp)->i_mapping;
4992 mapping_set_gfp_mask(mapping, mask);
4993 #endif
4994
4995 i915_gem_object_init(obj, &i915_gem_object_ops);
4996
4997 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4998 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4999
5000 if (HAS_LLC(dev)) {
5001 /* On some devices, we can have the GPU use the LLC (the CPU
5002 * cache) for about a 10% performance improvement
5003 * compared to uncached. Graphics requests other than
5004 * display scanout are coherent with the CPU in
5005 * accessing this cache. This means in this mode we
5006 * don't need to clflush on the CPU side, and on the
5007 * GPU side we only need to flush internal caches to
5008 * get data visible to the CPU.
5009 *
5010 * However, we maintain the display planes as UC, and so
5011 * need to rebind when first used as such.
5012 */
5013 obj->cache_level = I915_CACHE_LLC;
5014 } else
5015 obj->cache_level = I915_CACHE_NONE;
5016
5017 trace_i915_gem_object_create(obj);
5018
5019 return obj;
5020 }
5021
5022 void i915_gem_free_object(struct drm_gem_object *gem_obj)
5023 {
5024 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
5025 struct drm_device *dev = obj->base.dev;
5026 struct drm_i915_private *dev_priv = dev->dev_private;
5027 struct i915_vma *vma, *next;
5028
5029 intel_runtime_pm_get(dev_priv);
5030
5031 trace_i915_gem_object_destroy(obj);
5032
5033 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
5034 int ret;
5035
5036 vma->pin_count = 0;
5037 ret = i915_vma_unbind(vma);
5038 if (WARN_ON(ret == -ERESTARTSYS)) {
5039 bool was_interruptible;
5040
5041 was_interruptible = dev_priv->mm.interruptible;
5042 dev_priv->mm.interruptible = false;
5043
5044 WARN_ON(i915_vma_unbind(vma));
5045
5046 dev_priv->mm.interruptible = was_interruptible;
5047 }
5048 }
5049
5050 i915_gem_object_detach_phys(obj);
5051
5052 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
5053 * before progressing. */
5054 if (obj->stolen)
5055 i915_gem_object_unpin_pages(obj);
5056
5057 if (WARN_ON(obj->pages_pin_count))
5058 obj->pages_pin_count = 0;
5059 i915_gem_object_put_pages(obj);
5060 i915_gem_object_free_mmap_offset(obj);
5061 i915_gem_object_release_stolen(obj);
5062
5063 BUG_ON(obj->pages);
5064
5065 #ifndef __NetBSD__ /* XXX drm prime */
5066 if (obj->base.import_attach)
5067 drm_prime_gem_destroy(&obj->base, NULL);
5068 #endif
5069
5070 drm_gem_object_release(&obj->base);
5071 i915_gem_info_remove_obj(dev_priv, obj->base.size);
5072
5073 kfree(obj->bit_17);
5074 i915_gem_object_free(obj);
5075
5076 intel_runtime_pm_put(dev_priv);
5077 }
5078
5079 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
5080 struct i915_address_space *vm)
5081 {
5082 struct i915_vma *vma;
5083 list_for_each_entry(vma, &obj->vma_list, vma_link)
5084 if (vma->vm == vm)
5085 return vma;
5086
5087 return NULL;
5088 }
5089
5090 void i915_gem_vma_destroy(struct i915_vma *vma)
5091 {
5092 WARN_ON(vma->node.allocated);
5093
5094 /* Keep the vma as a placeholder in the execbuffer reservation lists */
5095 if (!list_empty(&vma->exec_list))
5096 return;
5097
5098 list_del(&vma->vma_link);
5099
5100 kfree(vma);
5101 }
5102
5103 int
5104 i915_gem_suspend(struct drm_device *dev)
5105 {
5106 struct drm_i915_private *dev_priv = dev->dev_private;
5107 int ret = 0;
5108
5109 mutex_lock(&dev->struct_mutex);
5110 if (dev_priv->ums.mm_suspended)
5111 goto err;
5112
5113 ret = i915_gpu_idle(dev);
5114 if (ret)
5115 goto err;
5116
5117 i915_gem_retire_requests(dev);
5118
5119 /* Under UMS, be paranoid and evict. */
5120 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5121 i915_gem_evict_everything(dev);
5122
5123 i915_kernel_lost_context(dev);
5124 i915_gem_cleanup_ringbuffer(dev);
5125
5126 /* Hack! Don't let anybody do execbuf while we don't control the chip.
5127 * We need to replace this with a semaphore, or something.
5128 * And not confound ums.mm_suspended!
5129 */
5130 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev,
5131 DRIVER_MODESET);
5132 mutex_unlock(&dev->struct_mutex);
5133
5134 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
5135 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5136 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
5137
5138 return 0;
5139
5140 err:
5141 mutex_unlock(&dev->struct_mutex);
5142 return ret;
5143 }
5144
5145 int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
5146 {
5147 struct drm_device *dev = ring->dev;
5148 struct drm_i915_private *dev_priv = dev->dev_private;
5149 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
5150 u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
5151 int i, ret;
5152
5153 if (!HAS_L3_DPF(dev) || !remap_info)
5154 return 0;
5155
5156 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
5157 if (ret)
5158 return ret;
5159
5160 /*
5161 * Note: We do not worry about the concurrent register cacheline hang
5162 * here because no other code should access these registers other than
5163 * at initialization time.
5164 */
5165 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
5166 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
5167 intel_ring_emit(ring, reg_base + i);
5168 intel_ring_emit(ring, remap_info[i/4]);
5169 }
5170
5171 intel_ring_advance(ring);
5172
5173 return ret;
5174 }
5175
5176 void i915_gem_init_swizzling(struct drm_device *dev)
5177 {
5178 struct drm_i915_private *dev_priv = dev->dev_private;
5179
5180 if (INTEL_INFO(dev)->gen < 5 ||
5181 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5182 return;
5183
5184 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5185 DISP_TILE_SURFACE_SWIZZLING);
5186
5187 if (IS_GEN5(dev))
5188 return;
5189
5190 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5191 if (IS_GEN6(dev))
5192 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5193 else if (IS_GEN7(dev))
5194 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5195 else if (IS_GEN8(dev))
5196 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5197 else
5198 BUG();
5199 }
5200
5201 static bool
5202 intel_enable_blt(struct drm_device *dev)
5203 {
5204 if (!HAS_BLT(dev))
5205 return false;
5206
5207 /* The blitter was dysfunctional on early prototypes */
5208 if (IS_GEN6(dev) && dev->pdev->revision < 8) {
5209 DRM_INFO("BLT not supported on this pre-production hardware;"
5210 " graphics performance will be degraded.\n");
5211 return false;
5212 }
5213
5214 return true;
5215 }
5216
5217 static int i915_gem_init_rings(struct drm_device *dev)
5218 {
5219 struct drm_i915_private *dev_priv = dev->dev_private;
5220 int ret;
5221
5222 ret = intel_init_render_ring_buffer(dev);
5223 if (ret)
5224 return ret;
5225
5226 if (HAS_BSD(dev)) {
5227 ret = intel_init_bsd_ring_buffer(dev);
5228 if (ret)
5229 goto cleanup_render_ring;
5230 }
5231
5232 if (intel_enable_blt(dev)) {
5233 ret = intel_init_blt_ring_buffer(dev);
5234 if (ret)
5235 goto cleanup_bsd_ring;
5236 }
5237
5238 if (HAS_VEBOX(dev)) {
5239 ret = intel_init_vebox_ring_buffer(dev);
5240 if (ret)
5241 goto cleanup_blt_ring;
5242 }
5243
5244
5245 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
5246 if (ret)
5247 goto cleanup_vebox_ring;
5248
5249 return 0;
5250
5251 cleanup_vebox_ring:
5252 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
5253 cleanup_blt_ring:
5254 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
5255 cleanup_bsd_ring:
5256 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
5257 cleanup_render_ring:
5258 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
5259
5260 return ret;
5261 }
5262
5263 int
5264 i915_gem_init_hw(struct drm_device *dev)
5265 {
5266 struct drm_i915_private *dev_priv = dev->dev_private;
5267 int ret, i;
5268
5269 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
5270 return -EIO;
5271
5272 if (dev_priv->ellc_size)
5273 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5274
5275 if (IS_HASWELL(dev))
5276 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
5277 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5278
5279 if (HAS_PCH_NOP(dev)) {
5280 if (IS_IVYBRIDGE(dev)) {
5281 u32 temp = I915_READ(GEN7_MSG_CTL);
5282 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5283 I915_WRITE(GEN7_MSG_CTL, temp);
5284 } else if (INTEL_INFO(dev)->gen >= 7) {
5285 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5286 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5287 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5288 }
5289 }
5290
5291 i915_gem_init_swizzling(dev);
5292
5293 ret = i915_gem_init_rings(dev);
5294 if (ret)
5295 return ret;
5296
5297 for (i = 0; i < NUM_L3_SLICES(dev); i++)
5298 i915_gem_l3_remap(&dev_priv->ring[RCS], i);
5299
5300 /*
5301 * XXX: Contexts should only be initialized once. Doing a switch to the
5302 * default context switch however is something we'd like to do after
5303 * reset or thaw (the latter may not actually be necessary for HW, but
5304 * goes with our code better). Context switching requires rings (for
5305 * the do_switch), but before enabling PPGTT. So don't move this.
5306 */
5307 ret = i915_gem_context_enable(dev_priv);
5308 if (ret) {
5309 DRM_ERROR("Context enable failed %d\n", ret);
5310 goto err_out;
5311 }
5312
5313 return 0;
5314
5315 err_out:
5316 i915_gem_cleanup_ringbuffer(dev);
5317 return ret;
5318 }
5319
5320 int i915_gem_init(struct drm_device *dev)
5321 {
5322 struct drm_i915_private *dev_priv = dev->dev_private;
5323 int ret;
5324
5325 mutex_lock(&dev->struct_mutex);
5326
5327 if (IS_VALLEYVIEW(dev)) {
5328 /* VLVA0 (potential hack), BIOS isn't actually waking us */
5329 I915_WRITE(VLV_GTLC_WAKE_CTRL, 1);
5330 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10))
5331 DRM_DEBUG_DRIVER("allow wake ack timed out\n");
5332 }
5333 i915_gem_init_global_gtt(dev);
5334
5335 ret = i915_gem_context_init(dev);
5336 if (ret) {
5337 mutex_unlock(&dev->struct_mutex);
5338 return ret;
5339 }
5340
5341 ret = i915_gem_init_hw(dev);
5342 mutex_unlock(&dev->struct_mutex);
5343 if (ret) {
5344 WARN_ON(dev_priv->mm.aliasing_ppgtt);
5345 i915_gem_context_fini(dev);
5346 drm_mm_takedown(&dev_priv->gtt.base.mm);
5347 return ret;
5348 }
5349
5350 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
5351 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5352 dev_priv->dri1.allow_batchbuffer = 1;
5353 return 0;
5354 }
5355
5356 void
5357 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
5358 {
5359 struct drm_i915_private *dev_priv = dev->dev_private;
5360 struct intel_ring_buffer *ring;
5361 int i;
5362
5363 for_each_ring(ring, dev_priv, i)
5364 intel_cleanup_ring_buffer(ring);
5365 }
5366
5367 int
5368 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
5369 struct drm_file *file_priv)
5370 {
5371 struct drm_i915_private *dev_priv = dev->dev_private;
5372 int ret;
5373
5374 if (drm_core_check_feature(dev, DRIVER_MODESET))
5375 return 0;
5376
5377 if (i915_reset_in_progress(&dev_priv->gpu_error)) {
5378 DRM_ERROR("Reenabling wedged hardware, good luck\n");
5379 atomic_set(&dev_priv->gpu_error.reset_counter, 0);
5380 }
5381
5382 mutex_lock(&dev->struct_mutex);
5383 dev_priv->ums.mm_suspended = 0;
5384
5385 ret = i915_gem_init_hw(dev);
5386 if (ret != 0) {
5387 mutex_unlock(&dev->struct_mutex);
5388 return ret;
5389 }
5390
5391 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
5392 mutex_unlock(&dev->struct_mutex);
5393
5394 ret = drm_irq_install(dev);
5395 if (ret)
5396 goto cleanup_ringbuffer;
5397
5398 return 0;
5399
5400 cleanup_ringbuffer:
5401 mutex_lock(&dev->struct_mutex);
5402 i915_gem_cleanup_ringbuffer(dev);
5403 dev_priv->ums.mm_suspended = 1;
5404 mutex_unlock(&dev->struct_mutex);
5405
5406 return ret;
5407 }
5408
5409 int
5410 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
5411 struct drm_file *file_priv)
5412 {
5413 if (drm_core_check_feature(dev, DRIVER_MODESET))
5414 return 0;
5415
5416 drm_irq_uninstall(dev);
5417
5418 return i915_gem_suspend(dev);
5419 }
5420
5421 void
5422 i915_gem_lastclose(struct drm_device *dev)
5423 {
5424 int ret;
5425
5426 if (drm_core_check_feature(dev, DRIVER_MODESET))
5427 return;
5428
5429 ret = i915_gem_suspend(dev);
5430 if (ret)
5431 DRM_ERROR("failed to idle hardware: %d\n", ret);
5432 }
5433
5434 static void
5435 init_ring_lists(struct intel_ring_buffer *ring)
5436 {
5437 INIT_LIST_HEAD(&ring->active_list);
5438 INIT_LIST_HEAD(&ring->request_list);
5439 }
5440
5441 void i915_init_vm(struct drm_i915_private *dev_priv,
5442 struct i915_address_space *vm)
5443 {
5444 if (!i915_is_ggtt(vm))
5445 drm_mm_init(&vm->mm, vm->start, vm->total);
5446 vm->dev = dev_priv->dev;
5447 INIT_LIST_HEAD(&vm->active_list);
5448 INIT_LIST_HEAD(&vm->inactive_list);
5449 INIT_LIST_HEAD(&vm->global_link);
5450 list_add_tail(&vm->global_link, &dev_priv->vm_list);
5451 }
5452
5453 void
5454 i915_gem_load(struct drm_device *dev)
5455 {
5456 struct drm_i915_private *dev_priv = dev->dev_private;
5457 int i;
5458
5459 dev_priv->slab =
5460 kmem_cache_create("i915_gem_object",
5461 sizeof(struct drm_i915_gem_object), 0,
5462 SLAB_HWCACHE_ALIGN,
5463 NULL);
5464
5465 INIT_LIST_HEAD(&dev_priv->vm_list);
5466 i915_init_vm(dev_priv, &dev_priv->gtt.base);
5467
5468 INIT_LIST_HEAD(&dev_priv->context_list);
5469 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5470 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5471 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5472 for (i = 0; i < I915_NUM_RINGS; i++)
5473 init_ring_lists(&dev_priv->ring[i]);
5474 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5475 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5476 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5477 i915_gem_retire_work_handler);
5478 INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5479 i915_gem_idle_work_handler);
5480 #ifdef __NetBSD__
5481 spin_lock_init(&dev_priv->gpu_error.reset_lock);
5482 DRM_INIT_WAITQUEUE(&dev_priv->gpu_error.reset_queue, "i915errst");
5483 #else
5484 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5485 #endif
5486
5487 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
5488 if (IS_GEN3(dev)) {
5489 I915_WRITE(MI_ARB_STATE,
5490 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
5491 }
5492
5493 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5494
5495 /* Old X drivers will take 0-2 for front, back, depth buffers */
5496 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5497 dev_priv->fence_reg_start = 3;
5498
5499 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
5500 dev_priv->num_fence_regs = 32;
5501 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
5502 dev_priv->num_fence_regs = 16;
5503 else
5504 dev_priv->num_fence_regs = 8;
5505
5506 /* Initialize fence registers to zero */
5507 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5508 i915_gem_restore_fences(dev);
5509
5510 i915_gem_detect_bit_6_swizzle(dev);
5511 #ifdef __NetBSD__
5512 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip");
5513 spin_lock_init(&dev_priv->pending_flip_lock);
5514 #else
5515 init_waitqueue_head(&dev_priv->pending_flip_queue);
5516 #endif
5517
5518 dev_priv->mm.interruptible = true;
5519
5520 dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
5521 dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
5522 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
5523 register_shrinker(&dev_priv->mm.inactive_shrinker);
5524 }
5525
5526 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5527 {
5528 struct drm_i915_file_private *file_priv = file->driver_priv;
5529
5530 cancel_delayed_work_sync(&file_priv->mm.idle_work);
5531
5532 /* Clean up our request list when the client is going away, so that
5533 * later retire_requests won't dereference our soon-to-be-gone
5534 * file_priv.
5535 */
5536 spin_lock(&file_priv->mm.lock);
5537 while (!list_empty(&file_priv->mm.request_list)) {
5538 struct drm_i915_gem_request *request;
5539
5540 request = list_first_entry(&file_priv->mm.request_list,
5541 struct drm_i915_gem_request,
5542 client_list);
5543 list_del(&request->client_list);
5544 request->file_priv = NULL;
5545 }
5546 spin_unlock(&file_priv->mm.lock);
5547 }
5548
5549 static void
5550 i915_gem_file_idle_work_handler(struct work_struct *work)
5551 {
5552 struct drm_i915_file_private *file_priv =
5553 container_of(work, typeof(*file_priv), mm.idle_work.work);
5554
5555 atomic_set(&file_priv->rps_wait_boost, false);
5556 }
5557
5558 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5559 {
5560 struct drm_i915_file_private *file_priv;
5561 int ret;
5562
5563 DRM_DEBUG_DRIVER("\n");
5564
5565 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5566 if (!file_priv)
5567 return -ENOMEM;
5568
5569 file->driver_priv = file_priv;
5570 file_priv->dev_priv = dev->dev_private;
5571 file_priv->file = file;
5572
5573 spin_lock_init(&file_priv->mm.lock);
5574 INIT_LIST_HEAD(&file_priv->mm.request_list);
5575 INIT_DELAYED_WORK(&file_priv->mm.idle_work,
5576 i915_gem_file_idle_work_handler);
5577
5578 ret = i915_gem_context_open(dev, file);
5579 if (ret)
5580 kfree(file_priv);
5581
5582 return ret;
5583 }
5584
5585 #ifndef __NetBSD__
5586 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
5587 {
5588 if (!mutex_is_locked(mutex))
5589 return false;
5590
5591 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
5592 return mutex->owner == task;
5593 #else
5594 /* Since UP may be pre-empted, we cannot assume that we own the lock */
5595 return false;
5596 #endif
5597 }
5598 #endif
5599
5600 static unsigned long
5601 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
5602 {
5603 #ifdef __NetBSD__ /* XXX shrinkers */
5604 return 0;
5605 #else
5606 struct drm_i915_private *dev_priv =
5607 container_of(shrinker,
5608 struct drm_i915_private,
5609 mm.inactive_shrinker);
5610 struct drm_device *dev = dev_priv->dev;
5611 struct drm_i915_gem_object *obj;
5612 bool unlock = true;
5613 unsigned long count;
5614
5615 if (!mutex_trylock(&dev->struct_mutex)) {
5616 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5617 return 0;
5618
5619 if (dev_priv->mm.shrinker_no_lock_stealing)
5620 return 0;
5621
5622 unlock = false;
5623 }
5624
5625 count = 0;
5626 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
5627 if (obj->pages_pin_count == 0)
5628 count += obj->base.size >> PAGE_SHIFT;
5629
5630 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
5631 if (obj->active)
5632 continue;
5633
5634 if (!i915_gem_obj_is_pinned(obj) && obj->pages_pin_count == 0)
5635 count += obj->base.size >> PAGE_SHIFT;
5636 }
5637
5638 if (unlock)
5639 mutex_unlock(&dev->struct_mutex);
5640
5641 return count;
5642 #endif
5643 }
5644
5645 /* All the new VM stuff */
5646 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
5647 struct i915_address_space *vm)
5648 {
5649 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5650 struct i915_vma *vma;
5651
5652 if (!dev_priv->mm.aliasing_ppgtt ||
5653 vm == &dev_priv->mm.aliasing_ppgtt->base)
5654 vm = &dev_priv->gtt.base;
5655
5656 BUG_ON(list_empty(&o->vma_list));
5657 list_for_each_entry(vma, &o->vma_list, vma_link) {
5658 if (vma->vm == vm)
5659 return vma->node.start;
5660
5661 }
5662 return -1;
5663 }
5664
5665 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5666 struct i915_address_space *vm)
5667 {
5668 struct i915_vma *vma;
5669
5670 list_for_each_entry(vma, &o->vma_list, vma_link)
5671 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5672 return true;
5673
5674 return false;
5675 }
5676
5677 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5678 {
5679 struct i915_vma *vma;
5680
5681 list_for_each_entry(vma, &o->vma_list, vma_link)
5682 if (drm_mm_node_allocated(&vma->node))
5683 return true;
5684
5685 return false;
5686 }
5687
5688 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5689 struct i915_address_space *vm)
5690 {
5691 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5692 struct i915_vma *vma;
5693
5694 if (!dev_priv->mm.aliasing_ppgtt ||
5695 vm == &dev_priv->mm.aliasing_ppgtt->base)
5696 vm = &dev_priv->gtt.base;
5697
5698 BUG_ON(list_empty(&o->vma_list));
5699
5700 list_for_each_entry(vma, &o->vma_list, vma_link)
5701 if (vma->vm == vm)
5702 return vma->node.size;
5703
5704 return 0;
5705 }
5706
5707 static unsigned long
5708 i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
5709 {
5710 #ifdef __NetBSD__ /* XXX shrinkers */
5711 return 0;
5712 #else
5713 struct drm_i915_private *dev_priv =
5714 container_of(shrinker,
5715 struct drm_i915_private,
5716 mm.inactive_shrinker);
5717 struct drm_device *dev = dev_priv->dev;
5718 unsigned long freed;
5719 bool unlock = true;
5720
5721 if (!mutex_trylock(&dev->struct_mutex)) {
5722 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5723 return SHRINK_STOP;
5724
5725 if (dev_priv->mm.shrinker_no_lock_stealing)
5726 return SHRINK_STOP;
5727
5728 unlock = false;
5729 }
5730
5731 freed = i915_gem_purge(dev_priv, sc->nr_to_scan);
5732 if (freed < sc->nr_to_scan)
5733 freed += __i915_gem_shrink(dev_priv,
5734 sc->nr_to_scan - freed,
5735 false);
5736 if (freed < sc->nr_to_scan)
5737 freed += i915_gem_shrink_all(dev_priv);
5738
5739 if (unlock)
5740 mutex_unlock(&dev->struct_mutex);
5741
5742 return freed;
5743 #endif
5744 }
5745
5746 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
5747 {
5748 struct i915_vma *vma;
5749
5750 if (WARN_ON(list_empty(&obj->vma_list)))
5751 return NULL;
5752
5753 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
5754 if (vma->vm != obj_to_ggtt(obj))
5755 return NULL;
5756
5757 return vma;
5758 }
5759