i915_gem.c revision 1.11 1 /*
2 * Copyright 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric (at) anholt.net>
25 *
26 */
27
28 #ifdef __NetBSD__
29 #if 0 /* XXX uvmhist option? */
30 #include "opt_uvmhist.h"
31 #endif
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35
36 #include <x86/machdep.h> /* x86_select_freelist */
37
38 #include <uvm/uvm.h>
39 #include <uvm/uvm_extern.h>
40 #include <uvm/uvm_fault.h>
41 #include <uvm/uvm_page.h>
42 #include <uvm/uvm_pmap.h>
43 #include <uvm/uvm_prot.h>
44 #endif
45
46 #include <drm/drmP.h>
47 #include <drm/i915_drm.h>
48 #include "i915_drv.h"
49 #include "i915_trace.h"
50 #include "intel_drv.h"
51 #include <linux/shmem_fs.h>
52 #include <linux/slab.h>
53 #include <linux/swap.h>
54 #include <linux/pci.h>
55 #include <linux/dma-buf.h>
56 #include <linux/errno.h>
57 #include <linux/time.h>
58 #include <linux/err.h>
59 #include <asm/param.h>
60
61 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
62 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
63 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
64 unsigned alignment,
65 bool map_and_fenceable,
66 bool nonblocking);
67 static int i915_gem_phys_pwrite(struct drm_device *dev,
68 struct drm_i915_gem_object *obj,
69 struct drm_i915_gem_pwrite *args,
70 struct drm_file *file);
71
72 static void i915_gem_write_fence(struct drm_device *dev, int reg,
73 struct drm_i915_gem_object *obj);
74 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
75 struct drm_i915_fence_reg *fence,
76 bool enable);
77
78 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
79 struct shrink_control *sc);
80 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
81 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
82 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
83
84 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
85 {
86 if (obj->tiling_mode)
87 i915_gem_release_mmap(obj);
88
89 /* As we do not have an associated fence register, we will force
90 * a tiling change if we ever need to acquire one.
91 */
92 obj->fence_dirty = false;
93 obj->fence_reg = I915_FENCE_REG_NONE;
94 }
95
96 /* some bookkeeping */
97 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
98 size_t size)
99 {
100 dev_priv->mm.object_count++;
101 dev_priv->mm.object_memory += size;
102 }
103
104 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
105 size_t size)
106 {
107 dev_priv->mm.object_count--;
108 dev_priv->mm.object_memory -= size;
109 }
110
111 static int
112 i915_gem_wait_for_error(struct drm_device *dev)
113 {
114 struct drm_i915_private *dev_priv = dev->dev_private;
115 struct completion *x = &dev_priv->error_completion;
116 #ifndef __NetBSD__
117 unsigned long flags;
118 #endif
119 int ret;
120
121 if (!atomic_read(&dev_priv->mm.wedged))
122 return 0;
123
124 /*
125 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
126 * userspace. If it takes that long something really bad is going on and
127 * we should simply try to bail out and fail as gracefully as possible.
128 */
129 ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
130 if (ret == 0) {
131 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
132 return -EIO;
133 } else if (ret < 0) {
134 return ret;
135 }
136
137 if (atomic_read(&dev_priv->mm.wedged)) {
138 /* GPU is hung, bump the completion count to account for
139 * the token we just consumed so that we never hit zero and
140 * end up waiting upon a subsequent completion event that
141 * will never happen.
142 */
143 #ifdef __NetBSD__
144 /* XXX Hope it's not a problem that we might wake someone. */
145 complete(x);
146 #else
147 spin_lock_irqsave(&x->wait.lock, flags);
148 x->done++;
149 spin_unlock_irqrestore(&x->wait.lock, flags);
150 #endif
151 }
152 return 0;
153 }
154
155 int i915_mutex_lock_interruptible(struct drm_device *dev)
156 {
157 int ret;
158
159 ret = i915_gem_wait_for_error(dev);
160 if (ret)
161 return ret;
162
163 ret = mutex_lock_interruptible(&dev->struct_mutex);
164 if (ret)
165 return ret;
166
167 WARN_ON(i915_verify_lists(dev));
168 return 0;
169 }
170
171 static inline bool
172 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
173 {
174 return obj->gtt_space && !obj->active;
175 }
176
177 int
178 i915_gem_init_ioctl(struct drm_device *dev, void *data,
179 struct drm_file *file)
180 {
181 struct drm_i915_gem_init *args = data;
182
183 if (drm_core_check_feature(dev, DRIVER_MODESET))
184 return -ENODEV;
185
186 if (args->gtt_start >= args->gtt_end ||
187 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
188 return -EINVAL;
189
190 /* GEM with user mode setting was never supported on ilk and later. */
191 if (INTEL_INFO(dev)->gen >= 5)
192 return -ENODEV;
193
194 mutex_lock(&dev->struct_mutex);
195 i915_gem_init_global_gtt(dev, args->gtt_start,
196 args->gtt_end, args->gtt_end);
197 mutex_unlock(&dev->struct_mutex);
198
199 return 0;
200 }
201
202 int
203 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
204 struct drm_file *file)
205 {
206 struct drm_i915_private *dev_priv = dev->dev_private;
207 struct drm_i915_gem_get_aperture *args = data;
208 struct drm_i915_gem_object *obj;
209 size_t pinned;
210
211 pinned = 0;
212 mutex_lock(&dev->struct_mutex);
213 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
214 if (obj->pin_count)
215 pinned += obj->gtt_space->size;
216 mutex_unlock(&dev->struct_mutex);
217
218 args->aper_size = dev_priv->mm.gtt_total;
219 args->aper_available_size = args->aper_size - pinned;
220
221 return 0;
222 }
223
224 static int
225 i915_gem_create(struct drm_file *file,
226 struct drm_device *dev,
227 uint64_t size,
228 uint32_t *handle_p)
229 {
230 struct drm_i915_gem_object *obj;
231 int ret;
232 u32 handle;
233
234 size = roundup(size, PAGE_SIZE);
235 if (size == 0)
236 return -EINVAL;
237
238 /* Allocate the new object */
239 obj = i915_gem_alloc_object(dev, size);
240 if (obj == NULL)
241 return -ENOMEM;
242
243 ret = drm_gem_handle_create(file, &obj->base, &handle);
244 if (ret) {
245 drm_gem_object_release(&obj->base);
246 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
247 kfree(obj);
248 return ret;
249 }
250
251 /* drop reference from allocate - handle holds it now */
252 drm_gem_object_unreference(&obj->base);
253 trace_i915_gem_object_create(obj);
254
255 *handle_p = handle;
256 return 0;
257 }
258
259 int
260 i915_gem_dumb_create(struct drm_file *file,
261 struct drm_device *dev,
262 struct drm_mode_create_dumb *args)
263 {
264 /* have to work out size/pitch and return them */
265 #ifdef __NetBSD__ /* ALIGN already means something. */
266 args->pitch = round_up(args->width * ((args->bpp + 7) / 8), 64);
267 #else
268 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
269 #endif
270 args->size = args->pitch * args->height;
271 return i915_gem_create(file, dev,
272 args->size, &args->handle);
273 }
274
275 int i915_gem_dumb_destroy(struct drm_file *file,
276 struct drm_device *dev,
277 uint32_t handle)
278 {
279 return drm_gem_handle_delete(file, handle);
280 }
281
282 /**
283 * Creates a new mm object and returns a handle to it.
284 */
285 int
286 i915_gem_create_ioctl(struct drm_device *dev, void *data,
287 struct drm_file *file)
288 {
289 struct drm_i915_gem_create *args = data;
290
291 return i915_gem_create(file, dev,
292 args->size, &args->handle);
293 }
294
295 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
296 {
297 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
298
299 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
300 obj->tiling_mode != I915_TILING_NONE;
301 }
302
303 static inline int
304 __copy_to_user_swizzled(char __user *cpu_vaddr,
305 const char *gpu_vaddr, int gpu_offset,
306 int length)
307 {
308 int ret, cpu_offset = 0;
309
310 while (length > 0) {
311 #ifdef __NetBSD__
312 int cacheline_end = round_up(gpu_offset + 1, 64);
313 #else
314 int cacheline_end = ALIGN(gpu_offset + 1, 64);
315 #endif
316 int this_length = min(cacheline_end - gpu_offset, length);
317 int swizzled_gpu_offset = gpu_offset ^ 64;
318
319 ret = __copy_to_user(cpu_vaddr + cpu_offset,
320 gpu_vaddr + swizzled_gpu_offset,
321 this_length);
322 if (ret)
323 return ret + length;
324
325 cpu_offset += this_length;
326 gpu_offset += this_length;
327 length -= this_length;
328 }
329
330 return 0;
331 }
332
333 static inline int
334 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
335 const char __user *cpu_vaddr,
336 int length)
337 {
338 int ret, cpu_offset = 0;
339
340 while (length > 0) {
341 #ifdef __NetBSD__
342 int cacheline_end = round_up(gpu_offset + 1, 64);
343 #else
344 int cacheline_end = ALIGN(gpu_offset + 1, 64);
345 #endif
346 int this_length = min(cacheline_end - gpu_offset, length);
347 int swizzled_gpu_offset = gpu_offset ^ 64;
348
349 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
350 cpu_vaddr + cpu_offset,
351 this_length);
352 if (ret)
353 return ret + length;
354
355 cpu_offset += this_length;
356 gpu_offset += this_length;
357 length -= this_length;
358 }
359
360 return 0;
361 }
362
363 /* Per-page copy function for the shmem pread fastpath.
364 * Flushes invalid cachelines before reading the target if
365 * needs_clflush is set. */
366 static int
367 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
368 char __user *user_data,
369 bool page_do_bit17_swizzling, bool needs_clflush)
370 {
371 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
372 return -EFAULT;
373 #else
374 char *vaddr;
375 int ret;
376
377 if (unlikely(page_do_bit17_swizzling))
378 return -EINVAL;
379
380 vaddr = kmap_atomic(page);
381 if (needs_clflush)
382 drm_clflush_virt_range(vaddr + shmem_page_offset,
383 page_length);
384 ret = __copy_to_user_inatomic(user_data,
385 vaddr + shmem_page_offset,
386 page_length);
387 kunmap_atomic(vaddr);
388
389 return ret ? -EFAULT : 0;
390 #endif
391 }
392
393 static void
394 shmem_clflush_swizzled_range(char *addr, unsigned long length,
395 bool swizzled)
396 {
397 if (unlikely(swizzled)) {
398 unsigned long start = (unsigned long) addr;
399 unsigned long end = (unsigned long) addr + length;
400
401 /* For swizzling simply ensure that we always flush both
402 * channels. Lame, but simple and it works. Swizzled
403 * pwrite/pread is far from a hotpath - current userspace
404 * doesn't use it at all. */
405 start = round_down(start, 128);
406 end = round_up(end, 128);
407
408 drm_clflush_virt_range((void *)start, end - start);
409 } else {
410 drm_clflush_virt_range(addr, length);
411 }
412
413 }
414
415 /* Only difference to the fast-path function is that this can handle bit17
416 * and uses non-atomic copy and kmap functions. */
417 static int
418 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
419 char __user *user_data,
420 bool page_do_bit17_swizzling, bool needs_clflush)
421 {
422 char *vaddr;
423 int ret;
424
425 vaddr = kmap(page);
426 if (needs_clflush)
427 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
428 page_length,
429 page_do_bit17_swizzling);
430
431 if (page_do_bit17_swizzling)
432 ret = __copy_to_user_swizzled(user_data,
433 vaddr, shmem_page_offset,
434 page_length);
435 else
436 ret = __copy_to_user(user_data,
437 vaddr + shmem_page_offset,
438 page_length);
439 kunmap(page);
440
441 return ret ? - EFAULT : 0;
442 }
443
444 static int
445 i915_gem_shmem_pread(struct drm_device *dev,
446 struct drm_i915_gem_object *obj,
447 struct drm_i915_gem_pread *args,
448 struct drm_file *file)
449 {
450 char __user *user_data;
451 ssize_t remain;
452 loff_t offset;
453 int shmem_page_offset, page_length, ret = 0;
454 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
455 int hit_slowpath = 0;
456 #ifndef __NetBSD__ /* XXX */
457 int prefaulted = 0;
458 #endif
459 int needs_clflush = 0;
460 #ifndef __NetBSD__
461 struct scatterlist *sg;
462 int i;
463 #endif
464
465 user_data = (char __user *) (uintptr_t) args->data_ptr;
466 remain = args->size;
467
468 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
469
470 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
471 /* If we're not in the cpu read domain, set ourself into the gtt
472 * read domain and manually flush cachelines (if required). This
473 * optimizes for the case when the gpu will dirty the data
474 * anyway again before the next pread happens. */
475 if (obj->cache_level == I915_CACHE_NONE)
476 needs_clflush = 1;
477 if (obj->gtt_space) {
478 ret = i915_gem_object_set_to_gtt_domain(obj, false);
479 if (ret)
480 return ret;
481 }
482 }
483
484 ret = i915_gem_object_get_pages(obj);
485 if (ret)
486 return ret;
487
488 i915_gem_object_pin_pages(obj);
489
490 offset = args->offset;
491
492 #ifdef __NetBSD__
493 /*
494 * XXX This is a big #ifdef with a lot of duplicated code, but
495 * factoring out the loop head -- which is all that
496 * substantially differs -- is probably more trouble than it's
497 * worth at the moment.
498 */
499 while (0 < remain) {
500 /* Get the next page. */
501 shmem_page_offset = offset_in_page(offset);
502 KASSERT(shmem_page_offset < PAGE_SIZE);
503 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
504 struct page *const page = i915_gem_object_get_page(obj,
505 atop(offset));
506
507 /* Decide whether to swizzle bit 17. */
508 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
509 (page_to_phys(page) & (1 << 17)) != 0;
510
511 /* Try the fast path. */
512 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
513 user_data, page_do_bit17_swizzling, needs_clflush);
514 if (ret == 0)
515 goto next_page;
516
517 /* Fast path failed. Try the slow path. */
518 hit_slowpath = 1;
519 mutex_unlock(&dev->struct_mutex);
520 /* XXX prefault */
521 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
522 user_data, page_do_bit17_swizzling, needs_clflush);
523 mutex_lock(&dev->struct_mutex);
524
525 next_page:
526 /* XXX mark page accessed */
527 if (ret)
528 goto out;
529
530 KASSERT(page_length <= remain);
531 remain -= page_length;
532 user_data += page_length;
533 offset += page_length;
534 }
535 #else
536 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
537 struct page *page;
538
539 if (i < offset >> PAGE_SHIFT)
540 continue;
541
542 if (remain <= 0)
543 break;
544
545 /* Operation in this page
546 *
547 * shmem_page_offset = offset within page in shmem file
548 * page_length = bytes to copy for this page
549 */
550 shmem_page_offset = offset_in_page(offset);
551 page_length = remain;
552 if ((shmem_page_offset + page_length) > PAGE_SIZE)
553 page_length = PAGE_SIZE - shmem_page_offset;
554
555 page = sg_page(sg);
556 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
557 (page_to_phys(page) & (1 << 17)) != 0;
558
559 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
560 user_data, page_do_bit17_swizzling,
561 needs_clflush);
562 if (ret == 0)
563 goto next_page;
564
565 hit_slowpath = 1;
566 mutex_unlock(&dev->struct_mutex);
567
568 if (!prefaulted) {
569 ret = fault_in_multipages_writeable(user_data, remain);
570 /* Userspace is tricking us, but we've already clobbered
571 * its pages with the prefault and promised to write the
572 * data up to the first fault. Hence ignore any errors
573 * and just continue. */
574 (void)ret;
575 prefaulted = 1;
576 }
577
578 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
579 user_data, page_do_bit17_swizzling,
580 needs_clflush);
581
582 mutex_lock(&dev->struct_mutex);
583
584 next_page:
585 mark_page_accessed(page);
586
587 if (ret)
588 goto out;
589
590 remain -= page_length;
591 user_data += page_length;
592 offset += page_length;
593 }
594 #endif
595
596 out:
597 i915_gem_object_unpin_pages(obj);
598
599 if (hit_slowpath) {
600 /* Fixup: Kill any reinstated backing storage pages */
601 if (obj->madv == __I915_MADV_PURGED)
602 i915_gem_object_truncate(obj);
603 }
604
605 return ret;
606 }
607
608 /**
609 * Reads data from the object referenced by handle.
610 *
611 * On error, the contents of *data are undefined.
612 */
613 int
614 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
615 struct drm_file *file)
616 {
617 struct drm_i915_gem_pread *args = data;
618 struct drm_i915_gem_object *obj;
619 int ret = 0;
620
621 if (args->size == 0)
622 return 0;
623
624 if (!access_ok(VERIFY_WRITE,
625 (char __user *)(uintptr_t)args->data_ptr,
626 args->size))
627 return -EFAULT;
628
629 ret = i915_mutex_lock_interruptible(dev);
630 if (ret)
631 return ret;
632
633 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
634 if (&obj->base == NULL) {
635 ret = -ENOENT;
636 goto unlock;
637 }
638
639 /* Bounds check source. */
640 if (args->offset > obj->base.size ||
641 args->size > obj->base.size - args->offset) {
642 ret = -EINVAL;
643 goto out;
644 }
645
646 #ifndef __NetBSD__ /* XXX drm prime */
647 /* prime objects have no backing filp to GEM pread/pwrite
648 * pages from.
649 */
650 if (!obj->base.filp) {
651 ret = -EINVAL;
652 goto out;
653 }
654 #endif
655
656 trace_i915_gem_object_pread(obj, args->offset, args->size);
657
658 ret = i915_gem_shmem_pread(dev, obj, args, file);
659
660 out:
661 drm_gem_object_unreference(&obj->base);
662 unlock:
663 mutex_unlock(&dev->struct_mutex);
664 return ret;
665 }
666
667 /* This is the fast write path which cannot handle
668 * page faults in the source data
669 */
670
671 static inline int
672 fast_user_write(struct io_mapping *mapping,
673 loff_t page_base, int page_offset,
674 char __user *user_data,
675 int length)
676 {
677 #ifdef __NetBSD__ /* XXX atomic shmem fast path */
678 return -EFAULT;
679 #else
680 void __iomem *vaddr_atomic;
681 void *vaddr;
682 unsigned long unwritten;
683
684 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
685 /* We can use the cpu mem copy function because this is X86. */
686 vaddr = (void __force*)vaddr_atomic + page_offset;
687 unwritten = __copy_from_user_inatomic_nocache(vaddr,
688 user_data, length);
689 io_mapping_unmap_atomic(vaddr_atomic);
690 return unwritten;
691 #endif
692 }
693
694 /**
695 * This is the fast pwrite path, where we copy the data directly from the
696 * user into the GTT, uncached.
697 */
698 static int
699 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
700 struct drm_i915_gem_object *obj,
701 struct drm_i915_gem_pwrite *args,
702 struct drm_file *file)
703 {
704 drm_i915_private_t *dev_priv = dev->dev_private;
705 ssize_t remain;
706 loff_t offset, page_base;
707 char __user *user_data;
708 int page_offset, page_length, ret;
709
710 ret = i915_gem_object_pin(obj, 0, true, true);
711 if (ret)
712 goto out;
713
714 ret = i915_gem_object_set_to_gtt_domain(obj, true);
715 if (ret)
716 goto out_unpin;
717
718 ret = i915_gem_object_put_fence(obj);
719 if (ret)
720 goto out_unpin;
721
722 user_data = (char __user *) (uintptr_t) args->data_ptr;
723 remain = args->size;
724
725 offset = obj->gtt_offset + args->offset;
726
727 while (remain > 0) {
728 /* Operation in this page
729 *
730 * page_base = page offset within aperture
731 * page_offset = offset within page
732 * page_length = bytes to copy for this page
733 */
734 page_base = offset & PAGE_MASK;
735 page_offset = offset_in_page(offset);
736 page_length = remain;
737 if ((page_offset + remain) > PAGE_SIZE)
738 page_length = PAGE_SIZE - page_offset;
739
740 /* If we get a fault while copying data, then (presumably) our
741 * source page isn't available. Return the error and we'll
742 * retry in the slow path.
743 */
744 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
745 page_offset, user_data, page_length)) {
746 ret = -EFAULT;
747 goto out_unpin;
748 }
749
750 remain -= page_length;
751 user_data += page_length;
752 offset += page_length;
753 }
754
755 out_unpin:
756 i915_gem_object_unpin(obj);
757 out:
758 return ret;
759 }
760
761 /* Per-page copy function for the shmem pwrite fastpath.
762 * Flushes invalid cachelines before writing to the target if
763 * needs_clflush_before is set and flushes out any written cachelines after
764 * writing if needs_clflush is set. */
765 static int
766 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
767 char __user *user_data,
768 bool page_do_bit17_swizzling,
769 bool needs_clflush_before,
770 bool needs_clflush_after)
771 {
772 #ifdef __NetBSD__
773 return -EFAULT;
774 #else
775 char *vaddr;
776 int ret;
777
778 if (unlikely(page_do_bit17_swizzling))
779 return -EINVAL;
780
781 vaddr = kmap_atomic(page);
782 if (needs_clflush_before)
783 drm_clflush_virt_range(vaddr + shmem_page_offset,
784 page_length);
785 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
786 user_data,
787 page_length);
788 if (needs_clflush_after)
789 drm_clflush_virt_range(vaddr + shmem_page_offset,
790 page_length);
791 kunmap_atomic(vaddr);
792
793 return ret ? -EFAULT : 0;
794 #endif
795 }
796
797 /* Only difference to the fast-path function is that this can handle bit17
798 * and uses non-atomic copy and kmap functions. */
799 static int
800 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
801 char __user *user_data,
802 bool page_do_bit17_swizzling,
803 bool needs_clflush_before,
804 bool needs_clflush_after)
805 {
806 char *vaddr;
807 int ret;
808
809 vaddr = kmap(page);
810 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
811 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
812 page_length,
813 page_do_bit17_swizzling);
814 if (page_do_bit17_swizzling)
815 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
816 user_data,
817 page_length);
818 else
819 ret = __copy_from_user(vaddr + shmem_page_offset,
820 user_data,
821 page_length);
822 if (needs_clflush_after)
823 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
824 page_length,
825 page_do_bit17_swizzling);
826 kunmap(page);
827
828 return ret ? -EFAULT : 0;
829 }
830
831 static int
832 i915_gem_shmem_pwrite(struct drm_device *dev,
833 struct drm_i915_gem_object *obj,
834 struct drm_i915_gem_pwrite *args,
835 struct drm_file *file)
836 {
837 ssize_t remain;
838 loff_t offset;
839 char __user *user_data;
840 int shmem_page_offset, page_length, ret = 0;
841 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
842 int hit_slowpath = 0;
843 int needs_clflush_after = 0;
844 int needs_clflush_before = 0;
845 #ifndef __NetBSD__
846 int i;
847 struct scatterlist *sg;
848 #endif
849
850 user_data = (char __user *) (uintptr_t) args->data_ptr;
851 remain = args->size;
852
853 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
854
855 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
856 /* If we're not in the cpu write domain, set ourself into the gtt
857 * write domain and manually flush cachelines (if required). This
858 * optimizes for the case when the gpu will use the data
859 * right away and we therefore have to clflush anyway. */
860 if (obj->cache_level == I915_CACHE_NONE)
861 needs_clflush_after = 1;
862 if (obj->gtt_space) {
863 ret = i915_gem_object_set_to_gtt_domain(obj, true);
864 if (ret)
865 return ret;
866 }
867 }
868 /* Same trick applies for invalidate partially written cachelines before
869 * writing. */
870 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
871 && obj->cache_level == I915_CACHE_NONE)
872 needs_clflush_before = 1;
873
874 ret = i915_gem_object_get_pages(obj);
875 if (ret)
876 return ret;
877
878 i915_gem_object_pin_pages(obj);
879
880 offset = args->offset;
881 obj->dirty = 1;
882
883 #ifdef __NetBSD__
884 while (0 < remain) {
885 /* Get the next page. */
886 shmem_page_offset = offset_in_page(offset);
887 KASSERT(shmem_page_offset < PAGE_SIZE);
888 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset));
889 struct page *const page = i915_gem_object_get_page(obj,
890 atop(offset));
891
892 /* Decide whether to flush the cache or swizzle bit 17. */
893 const bool partial_cacheline_write = needs_clflush_before &&
894 ((shmem_page_offset | page_length)
895 & (cpu_info_primary.ci_cflush_lsize - 1));
896 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
897 (page_to_phys(page) & (1 << 17)) != 0;
898
899 /* Try the fast path. */
900 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
901 user_data, page_do_bit17_swizzling,
902 partial_cacheline_write, needs_clflush_after);
903 if (ret == 0)
904 goto next_page;
905
906 /* Fast path failed. Try the slow path. */
907 hit_slowpath = 1;
908 mutex_unlock(&dev->struct_mutex);
909 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
910 user_data, page_do_bit17_swizzling,
911 partial_cacheline_write, needs_clflush_after);
912 mutex_lock(&dev->struct_mutex);
913
914 next_page:
915 page->p_vmp.flags &= ~PG_CLEAN;
916 /* XXX mark page accessed */
917 if (ret)
918 goto out;
919
920 KASSERT(page_length <= remain);
921 remain -= page_length;
922 user_data += page_length;
923 offset += page_length;
924 }
925 #else
926 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
927 struct page *page;
928 int partial_cacheline_write;
929
930 if (i < offset >> PAGE_SHIFT)
931 continue;
932
933 if (remain <= 0)
934 break;
935
936 /* Operation in this page
937 *
938 * shmem_page_offset = offset within page in shmem file
939 * page_length = bytes to copy for this page
940 */
941 shmem_page_offset = offset_in_page(offset);
942
943 page_length = remain;
944 if ((shmem_page_offset + page_length) > PAGE_SIZE)
945 page_length = PAGE_SIZE - shmem_page_offset;
946
947 /* If we don't overwrite a cacheline completely we need to be
948 * careful to have up-to-date data by first clflushing. Don't
949 * overcomplicate things and flush the entire patch. */
950 partial_cacheline_write = needs_clflush_before &&
951 ((shmem_page_offset | page_length)
952 & (boot_cpu_data.x86_clflush_size - 1));
953
954 page = sg_page(sg);
955 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
956 (page_to_phys(page) & (1 << 17)) != 0;
957
958 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
959 user_data, page_do_bit17_swizzling,
960 partial_cacheline_write,
961 needs_clflush_after);
962 if (ret == 0)
963 goto next_page;
964
965 hit_slowpath = 1;
966 mutex_unlock(&dev->struct_mutex);
967 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
968 user_data, page_do_bit17_swizzling,
969 partial_cacheline_write,
970 needs_clflush_after);
971
972 mutex_lock(&dev->struct_mutex);
973
974 next_page:
975 set_page_dirty(page);
976 mark_page_accessed(page);
977
978 if (ret)
979 goto out;
980
981 remain -= page_length;
982 user_data += page_length;
983 offset += page_length;
984 }
985 #endif
986
987 out:
988 i915_gem_object_unpin_pages(obj);
989
990 if (hit_slowpath) {
991 /* Fixup: Kill any reinstated backing storage pages */
992 if (obj->madv == __I915_MADV_PURGED)
993 i915_gem_object_truncate(obj);
994 /* and flush dirty cachelines in case the object isn't in the cpu write
995 * domain anymore. */
996 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
997 i915_gem_clflush_object(obj);
998 i915_gem_chipset_flush(dev);
999 }
1000 }
1001
1002 if (needs_clflush_after)
1003 i915_gem_chipset_flush(dev);
1004
1005 return ret;
1006 }
1007
1008 /**
1009 * Writes data to the object referenced by handle.
1010 *
1011 * On error, the contents of the buffer that were to be modified are undefined.
1012 */
1013 int
1014 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1015 struct drm_file *file)
1016 {
1017 struct drm_i915_gem_pwrite *args = data;
1018 struct drm_i915_gem_object *obj;
1019 int ret;
1020
1021 if (args->size == 0)
1022 return 0;
1023
1024 if (!access_ok(VERIFY_READ,
1025 (char __user *)(uintptr_t)args->data_ptr,
1026 args->size))
1027 return -EFAULT;
1028
1029 #ifndef __NetBSD__ /* XXX prefault */
1030 ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
1031 args->size);
1032 if (ret)
1033 return -EFAULT;
1034 #endif
1035
1036 ret = i915_mutex_lock_interruptible(dev);
1037 if (ret)
1038 return ret;
1039
1040 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1041 if (&obj->base == NULL) {
1042 ret = -ENOENT;
1043 goto unlock;
1044 }
1045
1046 /* Bounds check destination. */
1047 if (args->offset > obj->base.size ||
1048 args->size > obj->base.size - args->offset) {
1049 ret = -EINVAL;
1050 goto out;
1051 }
1052
1053 #ifndef __NetBSD__ /* XXX drm prime */
1054 /* prime objects have no backing filp to GEM pread/pwrite
1055 * pages from.
1056 */
1057 if (!obj->base.filp) {
1058 ret = -EINVAL;
1059 goto out;
1060 }
1061 #endif
1062
1063 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1064
1065 ret = -EFAULT;
1066 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1067 * it would end up going through the fenced access, and we'll get
1068 * different detiling behavior between reading and writing.
1069 * pread/pwrite currently are reading and writing from the CPU
1070 * perspective, requiring manual detiling by the client.
1071 */
1072 if (obj->phys_obj) {
1073 ret = i915_gem_phys_pwrite(dev, obj, args, file);
1074 goto out;
1075 }
1076
1077 if (obj->cache_level == I915_CACHE_NONE &&
1078 obj->tiling_mode == I915_TILING_NONE &&
1079 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1080 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1081 /* Note that the gtt paths might fail with non-page-backed user
1082 * pointers (e.g. gtt mappings when moving data between
1083 * textures). Fallback to the shmem path in that case. */
1084 }
1085
1086 if (ret == -EFAULT || ret == -ENOSPC)
1087 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1088
1089 out:
1090 drm_gem_object_unreference(&obj->base);
1091 unlock:
1092 mutex_unlock(&dev->struct_mutex);
1093 return ret;
1094 }
1095
1096 int
1097 i915_gem_check_wedge(struct drm_i915_private *dev_priv,
1098 bool interruptible)
1099 {
1100 if (atomic_read(&dev_priv->mm.wedged)) {
1101 struct completion *x = &dev_priv->error_completion;
1102 bool recovery_complete;
1103 #ifndef __NetBSD__
1104 unsigned long flags;
1105 #endif
1106
1107 #ifdef __NetBSD__
1108 /*
1109 * XXX This is a horrible kludge. Reading internal
1110 * fields is no good, nor is reading them unlocked, and
1111 * neither is locking it and then unlocking it before
1112 * making a decision.
1113 */
1114 recovery_complete = x->c_done > 0;
1115 #else
1116 /* Give the error handler a chance to run. */
1117 spin_lock_irqsave(&x->wait.lock, flags);
1118 recovery_complete = x->done > 0;
1119 spin_unlock_irqrestore(&x->wait.lock, flags);
1120 #endif
1121
1122 /* Non-interruptible callers can't handle -EAGAIN, hence return
1123 * -EIO unconditionally for these. */
1124 if (!interruptible)
1125 return -EIO;
1126
1127 /* Recovery complete, but still wedged means reset failure. */
1128 if (recovery_complete)
1129 return -EIO;
1130
1131 return -EAGAIN;
1132 }
1133
1134 return 0;
1135 }
1136
1137 /*
1138 * Compare seqno against outstanding lazy request. Emit a request if they are
1139 * equal.
1140 */
1141 static int
1142 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1143 {
1144 int ret;
1145
1146 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1147
1148 ret = 0;
1149 if (seqno == ring->outstanding_lazy_request)
1150 ret = i915_add_request(ring, NULL, NULL);
1151
1152 return ret;
1153 }
1154
1155 /**
1156 * __wait_seqno - wait until execution of seqno has finished
1157 * @ring: the ring expected to report seqno
1158 * @seqno: duh!
1159 * @interruptible: do an interruptible wait (normally yes)
1160 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1161 *
1162 * Returns 0 if the seqno was found within the alloted time. Else returns the
1163 * errno with remaining time filled in timeout argument.
1164 */
1165 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1166 bool interruptible, struct timespec *timeout)
1167 {
1168 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1169 struct timespec before, now, wait_time={1,0};
1170 unsigned long timeout_jiffies;
1171 long end;
1172 bool wait_forever = true;
1173 int ret;
1174
1175 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1176 return 0;
1177
1178 trace_i915_gem_request_wait_begin(ring, seqno);
1179
1180 if (timeout != NULL) {
1181 wait_time = *timeout;
1182 wait_forever = false;
1183 }
1184
1185 timeout_jiffies = timespec_to_jiffies(&wait_time);
1186
1187 if (WARN_ON(!ring->irq_get(ring)))
1188 return -ENODEV;
1189
1190 /* Record current time in case interrupted by signal, or wedged * */
1191 getrawmonotonic(&before);
1192
1193 #define EXIT_COND \
1194 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
1195 atomic_read(&dev_priv->mm.wedged))
1196 do {
1197 #ifdef __NetBSD__
1198 unsigned long flags;
1199 spin_lock_irqsave(&dev_priv->irq_lock, flags);
1200 if (interruptible)
1201 DRM_SPIN_TIMED_WAIT_UNTIL(end, &ring->irq_queue,
1202 &dev_priv->irq_lock,
1203 timeout_jiffies,
1204 EXIT_COND);
1205 else
1206 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(end, &ring->irq_queue,
1207 &dev_priv->irq_lock,
1208 timeout_jiffies,
1209 EXIT_COND);
1210 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1211 #else
1212 if (interruptible)
1213 end = wait_event_interruptible_timeout(ring->irq_queue,
1214 EXIT_COND,
1215 timeout_jiffies);
1216 else
1217 end = wait_event_timeout(ring->irq_queue, EXIT_COND,
1218 timeout_jiffies);
1219
1220 #endif
1221 ret = i915_gem_check_wedge(dev_priv, interruptible);
1222 if (ret)
1223 end = ret;
1224 } while (end == 0 && wait_forever);
1225
1226 getrawmonotonic(&now);
1227
1228 ring->irq_put(ring);
1229 trace_i915_gem_request_wait_end(ring, seqno);
1230 #undef EXIT_COND
1231
1232 if (timeout) {
1233 struct timespec sleep_time = timespec_sub(now, before);
1234 *timeout = timespec_sub(*timeout, sleep_time);
1235 }
1236
1237 switch (end) {
1238 case -EIO:
1239 case -EAGAIN: /* Wedged */
1240 case -ERESTARTSYS: /* Signal */
1241 case -EINTR:
1242 return (int)end;
1243 case 0: /* Timeout */
1244 if (timeout)
1245 set_normalized_timespec(timeout, 0, 0);
1246 return -ETIME;
1247 default: /* Completed */
1248 WARN_ON(end < 0); /* We're not aware of other errors */
1249 return 0;
1250 }
1251 }
1252
1253 /**
1254 * Waits for a sequence number to be signaled, and cleans up the
1255 * request and object lists appropriately for that event.
1256 */
1257 int
1258 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1259 {
1260 struct drm_device *dev = ring->dev;
1261 struct drm_i915_private *dev_priv = dev->dev_private;
1262 bool interruptible = dev_priv->mm.interruptible;
1263 int ret;
1264
1265 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1266 BUG_ON(seqno == 0);
1267
1268 ret = i915_gem_check_wedge(dev_priv, interruptible);
1269 if (ret)
1270 return ret;
1271
1272 ret = i915_gem_check_olr(ring, seqno);
1273 if (ret)
1274 return ret;
1275
1276 return __wait_seqno(ring, seqno, interruptible, NULL);
1277 }
1278
1279 /**
1280 * Ensures that all rendering to the object has completed and the object is
1281 * safe to unbind from the GTT or access from the CPU.
1282 */
1283 static __must_check int
1284 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1285 bool readonly)
1286 {
1287 struct intel_ring_buffer *ring = obj->ring;
1288 u32 seqno;
1289 int ret;
1290
1291 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1292 if (seqno == 0)
1293 return 0;
1294
1295 ret = i915_wait_seqno(ring, seqno);
1296 if (ret)
1297 return ret;
1298
1299 i915_gem_retire_requests_ring(ring);
1300
1301 /* Manually manage the write flush as we may have not yet
1302 * retired the buffer.
1303 */
1304 if (obj->last_write_seqno &&
1305 i915_seqno_passed(seqno, obj->last_write_seqno)) {
1306 obj->last_write_seqno = 0;
1307 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1308 }
1309
1310 return 0;
1311 }
1312
1313 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1314 * as the object state may change during this call.
1315 */
1316 static __must_check int
1317 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1318 bool readonly)
1319 {
1320 struct drm_device *dev = obj->base.dev;
1321 struct drm_i915_private *dev_priv = dev->dev_private;
1322 struct intel_ring_buffer *ring = obj->ring;
1323 u32 seqno;
1324 int ret;
1325
1326 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1327 BUG_ON(!dev_priv->mm.interruptible);
1328
1329 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1330 if (seqno == 0)
1331 return 0;
1332
1333 ret = i915_gem_check_wedge(dev_priv, true);
1334 if (ret)
1335 return ret;
1336
1337 ret = i915_gem_check_olr(ring, seqno);
1338 if (ret)
1339 return ret;
1340
1341 mutex_unlock(&dev->struct_mutex);
1342 ret = __wait_seqno(ring, seqno, true, NULL);
1343 mutex_lock(&dev->struct_mutex);
1344
1345 i915_gem_retire_requests_ring(ring);
1346
1347 /* Manually manage the write flush as we may have not yet
1348 * retired the buffer.
1349 */
1350 if (obj->last_write_seqno &&
1351 i915_seqno_passed(seqno, obj->last_write_seqno)) {
1352 obj->last_write_seqno = 0;
1353 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1354 }
1355
1356 return ret;
1357 }
1358
1359 /**
1360 * Called when user space prepares to use an object with the CPU, either
1361 * through the mmap ioctl's mapping or a GTT mapping.
1362 */
1363 int
1364 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1365 struct drm_file *file)
1366 {
1367 struct drm_i915_gem_set_domain *args = data;
1368 struct drm_i915_gem_object *obj;
1369 uint32_t read_domains = args->read_domains;
1370 uint32_t write_domain = args->write_domain;
1371 int ret;
1372
1373 /* Only handle setting domains to types used by the CPU. */
1374 if (write_domain & I915_GEM_GPU_DOMAINS)
1375 return -EINVAL;
1376
1377 if (read_domains & I915_GEM_GPU_DOMAINS)
1378 return -EINVAL;
1379
1380 /* Having something in the write domain implies it's in the read
1381 * domain, and only that read domain. Enforce that in the request.
1382 */
1383 if (write_domain != 0 && read_domains != write_domain)
1384 return -EINVAL;
1385
1386 ret = i915_mutex_lock_interruptible(dev);
1387 if (ret)
1388 return ret;
1389
1390 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1391 if (&obj->base == NULL) {
1392 ret = -ENOENT;
1393 goto unlock;
1394 }
1395
1396 /* Try to flush the object off the GPU without holding the lock.
1397 * We will repeat the flush holding the lock in the normal manner
1398 * to catch cases where we are gazumped.
1399 */
1400 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
1401 if (ret)
1402 goto unref;
1403
1404 if (read_domains & I915_GEM_DOMAIN_GTT) {
1405 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1406
1407 /* Silently promote "you're not bound, there was nothing to do"
1408 * to success, since the client was just asking us to
1409 * make sure everything was done.
1410 */
1411 if (ret == -EINVAL)
1412 ret = 0;
1413 } else {
1414 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1415 }
1416
1417 unref:
1418 drm_gem_object_unreference(&obj->base);
1419 unlock:
1420 mutex_unlock(&dev->struct_mutex);
1421 return ret;
1422 }
1423
1424 /**
1425 * Called when user space has done writes to this buffer
1426 */
1427 int
1428 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1429 struct drm_file *file)
1430 {
1431 struct drm_i915_gem_sw_finish *args = data;
1432 struct drm_i915_gem_object *obj;
1433 int ret = 0;
1434
1435 ret = i915_mutex_lock_interruptible(dev);
1436 if (ret)
1437 return ret;
1438
1439 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1440 if (&obj->base == NULL) {
1441 ret = -ENOENT;
1442 goto unlock;
1443 }
1444
1445 /* Pinned buffers may be scanout, so flush the cache */
1446 if (obj->pin_count)
1447 i915_gem_object_flush_cpu_write_domain(obj);
1448
1449 drm_gem_object_unreference(&obj->base);
1450 unlock:
1451 mutex_unlock(&dev->struct_mutex);
1452 return ret;
1453 }
1454
1455 /**
1456 * Maps the contents of an object, returning the address it is mapped
1457 * into.
1458 *
1459 * While the mapping holds a reference on the contents of the object, it doesn't
1460 * imply a ref on the object itself.
1461 */
1462 int
1463 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1464 struct drm_file *file)
1465 {
1466 struct drm_i915_gem_mmap *args = data;
1467 struct drm_gem_object *obj;
1468 unsigned long addr;
1469 #ifdef __NetBSD__
1470 int ret;
1471 #endif
1472
1473 obj = drm_gem_object_lookup(dev, file, args->handle);
1474 if (obj == NULL)
1475 return -ENOENT;
1476
1477 #ifndef __NetBSD__ /* XXX drm prime */
1478 /* prime objects have no backing filp to GEM mmap
1479 * pages from.
1480 */
1481 if (!obj->filp) {
1482 drm_gem_object_unreference_unlocked(obj);
1483 return -EINVAL;
1484 }
1485 #endif
1486
1487 #ifdef __NetBSD__
1488 addr = (*curproc->p_emul->e_vm_default_addr)(curproc,
1489 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size);
1490 /* XXX errno NetBSD->Linux */
1491 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size,
1492 obj->gemo_shm_uao, args->offset, 0,
1493 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE),
1494 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL,
1495 0));
1496 if (ret) {
1497 drm_gem_object_unreference_unlocked(obj);
1498 return ret;
1499 }
1500 uao_reference(obj->gemo_shm_uao);
1501 drm_gem_object_unreference_unlocked(obj);
1502 #else
1503 addr = vm_mmap(obj->filp, 0, args->size,
1504 PROT_READ | PROT_WRITE, MAP_SHARED,
1505 args->offset);
1506 drm_gem_object_unreference_unlocked(obj);
1507 if (IS_ERR((void *)addr))
1508 return addr;
1509 #endif
1510
1511 args->addr_ptr = (uint64_t) addr;
1512
1513 return 0;
1514 }
1515
1516 #ifdef __NetBSD__ /* XXX gem gtt fault */
1517 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t,
1518 struct vm_page **, int, int, vm_prot_t, int, paddr_t);
1519
1520 int
1521 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1522 int npages, int centeridx, vm_prot_t access_type, int flags)
1523 {
1524 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1525 struct drm_gem_object *gem_obj =
1526 container_of(uobj, struct drm_gem_object, gemo_uvmobj);
1527 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
1528 struct drm_device *dev = obj->base.dev;
1529 struct drm_i915_private *dev_priv = dev->dev_private;
1530 voff_t byte_offset;
1531 pgoff_t page_offset;
1532 int ret = 0;
1533 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0;
1534
1535 byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start));
1536 KASSERT(byte_offset <= obj->base.size);
1537 page_offset = (byte_offset >> PAGE_SHIFT);
1538
1539 ret = i915_mutex_lock_interruptible(dev);
1540 if (ret)
1541 goto out;
1542
1543 trace_i915_gem_object_fault(obj, page_offset, true, write);
1544
1545 /* Now bind it into the GTT if needed */
1546 ret = i915_gem_object_pin(obj, 0, true, false);
1547 if (ret)
1548 goto unlock;
1549
1550 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1551 if (ret)
1552 goto unpin;
1553
1554 ret = i915_gem_object_get_fence(obj);
1555 if (ret)
1556 goto unpin;
1557
1558 obj->fault_mappable = true;
1559
1560 /* Finally, remap it using the new GTT offset */
1561 /* XXX errno NetBSD->Linux */
1562 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type,
1563 flags, (dev_priv->mm.gtt_base_addr + obj->gtt_offset));
1564 unpin:
1565 i915_gem_object_unpin(obj);
1566 unlock:
1567 mutex_unlock(&dev->struct_mutex);
1568 out:
1569 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
1570 if (ret == -ERESTART)
1571 uvm_wait("i915flt");
1572 return ret;
1573 }
1574
1575 /*
1576 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c.
1577 *
1578 * XXX pmap_enter_default instead of pmap_enter because of a problem
1579 * with using weak aliases in kernel modules or something.
1580 */
1581 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned);
1582
1583 static int
1584 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1585 int npages, int centeridx, vm_prot_t access_type, int flags,
1586 paddr_t gtt_paddr)
1587 {
1588 struct vm_map_entry *entry = ufi->entry;
1589 vaddr_t curr_va;
1590 off_t curr_offset;
1591 paddr_t paddr;
1592 u_int mmapflags;
1593 int lcv, retval;
1594 vm_prot_t mapprot;
1595 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist);
1596 UVMHIST_LOG(maphist," flags=%d", flags,0,0,0);
1597
1598 /*
1599 * we do not allow device mappings to be mapped copy-on-write
1600 * so we kill any attempt to do so here.
1601 */
1602
1603 if (UVM_ET_ISCOPYONWRITE(entry)) {
1604 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)",
1605 entry->etype, 0,0,0);
1606 return(EIO);
1607 }
1608
1609 /*
1610 * now we must determine the offset in udv to use and the VA to
1611 * use for pmap_enter. note that we always use orig_map's pmap
1612 * for pmap_enter (even if we have a submap). since virtual
1613 * addresses in a submap must match the main map, this is ok.
1614 */
1615
1616 /* udv offset = (offset from start of entry) + entry's offset */
1617 curr_offset = entry->offset + (vaddr - entry->start);
1618 /* pmap va = vaddr (virtual address of pps[0]) */
1619 curr_va = vaddr;
1620
1621 /*
1622 * loop over the page range entering in as needed
1623 */
1624
1625 retval = 0;
1626 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
1627 curr_va += PAGE_SIZE) {
1628 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
1629 continue;
1630
1631 if (pps[lcv] == PGO_DONTCARE)
1632 continue;
1633
1634 paddr = (gtt_paddr + curr_offset);
1635 mmapflags = 0;
1636 mapprot = ufi->entry->protection;
1637 UVMHIST_LOG(maphist,
1638 " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d",
1639 ufi->orig_map->pmap, curr_va, paddr, mapprot);
1640 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot,
1641 PMAP_CANFAIL | mapprot | mmapflags) != 0) {
1642 /*
1643 * pmap_enter() didn't have the resource to
1644 * enter this mapping. Unlock everything,
1645 * wait for the pagedaemon to free up some
1646 * pages, and then tell uvm_fault() to start
1647 * the fault again.
1648 *
1649 * XXX Needs some rethinking for the PGO_ALLPAGES
1650 * XXX case.
1651 */
1652 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */
1653 return (ERESTART);
1654 }
1655 }
1656
1657 pmap_update(ufi->orig_map->pmap);
1658 return (retval);
1659 }
1660 #else
1661 /**
1662 * i915_gem_fault - fault a page into the GTT
1663 * vma: VMA in question
1664 * vmf: fault info
1665 *
1666 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1667 * from userspace. The fault handler takes care of binding the object to
1668 * the GTT (if needed), allocating and programming a fence register (again,
1669 * only if needed based on whether the old reg is still valid or the object
1670 * is tiled) and inserting a new PTE into the faulting process.
1671 *
1672 * Note that the faulting process may involve evicting existing objects
1673 * from the GTT and/or fence registers to make room. So performance may
1674 * suffer if the GTT working set is large or there are few fence registers
1675 * left.
1676 */
1677 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1678 {
1679 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1680 struct drm_device *dev = obj->base.dev;
1681 drm_i915_private_t *dev_priv = dev->dev_private;
1682 pgoff_t page_offset;
1683 unsigned long pfn;
1684 int ret = 0;
1685 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1686
1687 /* We don't use vmf->pgoff since that has the fake offset */
1688 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1689 PAGE_SHIFT;
1690
1691 ret = i915_mutex_lock_interruptible(dev);
1692 if (ret)
1693 goto out;
1694
1695 trace_i915_gem_object_fault(obj, page_offset, true, write);
1696
1697 /* Now bind it into the GTT if needed */
1698 ret = i915_gem_object_pin(obj, 0, true, false);
1699 if (ret)
1700 goto unlock;
1701
1702 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1703 if (ret)
1704 goto unpin;
1705
1706 ret = i915_gem_object_get_fence(obj);
1707 if (ret)
1708 goto unpin;
1709
1710 obj->fault_mappable = true;
1711
1712 pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) +
1713 page_offset;
1714
1715 /* Finally, remap it using the new GTT offset */
1716 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1717 unpin:
1718 i915_gem_object_unpin(obj);
1719 unlock:
1720 mutex_unlock(&dev->struct_mutex);
1721 out:
1722 switch (ret) {
1723 case -EIO:
1724 /* If this -EIO is due to a gpu hang, give the reset code a
1725 * chance to clean up the mess. Otherwise return the proper
1726 * SIGBUS. */
1727 if (!atomic_read(&dev_priv->mm.wedged))
1728 return VM_FAULT_SIGBUS;
1729 case -EAGAIN:
1730 /* Give the error handler a chance to run and move the
1731 * objects off the GPU active list. Next time we service the
1732 * fault, we should be able to transition the page into the
1733 * GTT without touching the GPU (and so avoid further
1734 * EIO/EGAIN). If the GPU is wedged, then there is no issue
1735 * with coherency, just lost writes.
1736 */
1737 set_need_resched();
1738 case 0:
1739 case -ERESTARTSYS:
1740 case -EINTR:
1741 case -EBUSY:
1742 /*
1743 * EBUSY is ok: this just means that another thread
1744 * already did the job.
1745 */
1746 return VM_FAULT_NOPAGE;
1747 case -ENOMEM:
1748 return VM_FAULT_OOM;
1749 case -ENOSPC:
1750 return VM_FAULT_SIGBUS;
1751 default:
1752 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1753 return VM_FAULT_SIGBUS;
1754 }
1755 }
1756 #endif
1757
1758 /**
1759 * i915_gem_release_mmap - remove physical page mappings
1760 * @obj: obj in question
1761 *
1762 * Preserve the reservation of the mmapping with the DRM core code, but
1763 * relinquish ownership of the pages back to the system.
1764 *
1765 * It is vital that we remove the page mapping if we have mapped a tiled
1766 * object through the GTT and then lose the fence register due to
1767 * resource pressure. Similarly if the object has been moved out of the
1768 * aperture, than pages mapped into userspace must be revoked. Removing the
1769 * mapping will then trigger a page fault on the next user access, allowing
1770 * fixup by i915_gem_fault().
1771 */
1772 void
1773 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1774 {
1775 if (!obj->fault_mappable)
1776 return;
1777
1778 #ifdef __NetBSD__ /* XXX gem gtt fault */
1779 {
1780 struct vm_page *page;
1781
1782 mutex_enter(obj->base.gemo_shm_uao->vmobjlock);
1783 KASSERT(obj->pages != NULL);
1784 /* Force a fresh fault for each page. */
1785 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue)
1786 pmap_page_protect(page, VM_PROT_NONE);
1787 mutex_exit(obj->base.gemo_shm_uao->vmobjlock);
1788 }
1789 #else
1790 if (obj->base.dev->dev_mapping)
1791 unmap_mapping_range(obj->base.dev->dev_mapping,
1792 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1793 obj->base.size, 1);
1794 #endif
1795
1796 obj->fault_mappable = false;
1797 }
1798
1799 static uint32_t
1800 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1801 {
1802 uint32_t gtt_size;
1803
1804 if (INTEL_INFO(dev)->gen >= 4 ||
1805 tiling_mode == I915_TILING_NONE)
1806 return size;
1807
1808 /* Previous chips need a power-of-two fence region when tiling */
1809 if (INTEL_INFO(dev)->gen == 3)
1810 gtt_size = 1024*1024;
1811 else
1812 gtt_size = 512*1024;
1813
1814 while (gtt_size < size)
1815 gtt_size <<= 1;
1816
1817 return gtt_size;
1818 }
1819
1820 /**
1821 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1822 * @obj: object to check
1823 *
1824 * Return the required GTT alignment for an object, taking into account
1825 * potential fence register mapping.
1826 */
1827 static uint32_t
1828 i915_gem_get_gtt_alignment(struct drm_device *dev,
1829 uint32_t size,
1830 int tiling_mode)
1831 {
1832 /*
1833 * Minimum alignment is 4k (GTT page size), but might be greater
1834 * if a fence register is needed for the object.
1835 */
1836 if (INTEL_INFO(dev)->gen >= 4 ||
1837 tiling_mode == I915_TILING_NONE)
1838 return 4096;
1839
1840 /*
1841 * Previous chips need to be aligned to the size of the smallest
1842 * fence register that can contain the object.
1843 */
1844 return i915_gem_get_gtt_size(dev, size, tiling_mode);
1845 }
1846
1847 /**
1848 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1849 * unfenced object
1850 * @dev: the device
1851 * @size: size of the object
1852 * @tiling_mode: tiling mode of the object
1853 *
1854 * Return the required GTT alignment for an object, only taking into account
1855 * unfenced tiled surface requirements.
1856 */
1857 uint32_t
1858 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1859 uint32_t size,
1860 int tiling_mode)
1861 {
1862 /*
1863 * Minimum alignment is 4k (GTT page size) for sane hw.
1864 */
1865 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1866 tiling_mode == I915_TILING_NONE)
1867 return 4096;
1868
1869 /* Previous hardware however needs to be aligned to a power-of-two
1870 * tile height. The simplest method for determining this is to reuse
1871 * the power-of-tile object size.
1872 */
1873 return i915_gem_get_gtt_size(dev, size, tiling_mode);
1874 }
1875
1876 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1877 {
1878 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1879 int ret;
1880
1881 if (obj->base.map_list.map)
1882 return 0;
1883
1884 dev_priv->mm.shrinker_no_lock_stealing = true;
1885
1886 ret = drm_gem_create_mmap_offset(&obj->base);
1887 if (ret != -ENOSPC)
1888 goto out;
1889
1890 /* Badly fragmented mmap space? The only way we can recover
1891 * space is by destroying unwanted objects. We can't randomly release
1892 * mmap_offsets as userspace expects them to be persistent for the
1893 * lifetime of the objects. The closest we can is to release the
1894 * offsets on purgeable objects by truncating it and marking it purged,
1895 * which prevents userspace from ever using that object again.
1896 */
1897 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
1898 ret = drm_gem_create_mmap_offset(&obj->base);
1899 if (ret != -ENOSPC)
1900 goto out;
1901
1902 i915_gem_shrink_all(dev_priv);
1903 ret = drm_gem_create_mmap_offset(&obj->base);
1904 out:
1905 dev_priv->mm.shrinker_no_lock_stealing = false;
1906
1907 return ret;
1908 }
1909
1910 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1911 {
1912 if (!obj->base.map_list.map)
1913 return;
1914
1915 drm_gem_free_mmap_offset(&obj->base);
1916 }
1917
1918 int
1919 i915_gem_mmap_gtt(struct drm_file *file,
1920 struct drm_device *dev,
1921 uint32_t handle,
1922 uint64_t *offset)
1923 {
1924 struct drm_i915_private *dev_priv = dev->dev_private;
1925 struct drm_i915_gem_object *obj;
1926 int ret;
1927
1928 ret = i915_mutex_lock_interruptible(dev);
1929 if (ret)
1930 return ret;
1931
1932 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1933 if (&obj->base == NULL) {
1934 ret = -ENOENT;
1935 goto unlock;
1936 }
1937
1938 if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1939 ret = -E2BIG;
1940 goto out;
1941 }
1942
1943 if (obj->madv != I915_MADV_WILLNEED) {
1944 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1945 ret = -EINVAL;
1946 goto out;
1947 }
1948
1949 ret = i915_gem_object_create_mmap_offset(obj);
1950 if (ret)
1951 goto out;
1952
1953 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1954
1955 out:
1956 drm_gem_object_unreference(&obj->base);
1957 unlock:
1958 mutex_unlock(&dev->struct_mutex);
1959 return ret;
1960 }
1961
1962 /**
1963 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1964 * @dev: DRM device
1965 * @data: GTT mapping ioctl data
1966 * @file: GEM object info
1967 *
1968 * Simply returns the fake offset to userspace so it can mmap it.
1969 * The mmap call will end up in drm_gem_mmap(), which will set things
1970 * up so we can get faults in the handler above.
1971 *
1972 * The fault handler will take care of binding the object into the GTT
1973 * (since it may have been evicted to make room for something), allocating
1974 * a fence register, and mapping the appropriate aperture address into
1975 * userspace.
1976 */
1977 int
1978 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1979 struct drm_file *file)
1980 {
1981 struct drm_i915_gem_mmap_gtt *args = data;
1982
1983 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1984 }
1985
1986 /* Immediately discard the backing storage */
1987 static void
1988 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1989 {
1990 #ifndef __NetBSD__
1991 struct inode *inode;
1992 #endif
1993
1994 i915_gem_object_free_mmap_offset(obj);
1995
1996 #ifdef __NetBSD__
1997 {
1998 struct uvm_object *const uobj = obj->base.gemo_shm_uao;
1999
2000 if (uobj != NULL) {
2001 /* XXX Calling pgo_put like this is bogus. */
2002 mutex_enter(uobj->vmobjlock);
2003 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size,
2004 (PGO_ALLPAGES | PGO_FREE));
2005 }
2006 }
2007 #else
2008 if (obj->base.filp == NULL)
2009 return;
2010
2011 /* Our goal here is to return as much of the memory as
2012 * is possible back to the system as we are called from OOM.
2013 * To do this we must instruct the shmfs to drop all of its
2014 * backing pages, *now*.
2015 */
2016 inode = obj->base.filp->f_path.dentry->d_inode;
2017 shmem_truncate_range(inode, 0, (loff_t)-1);
2018 #endif
2019
2020 obj->madv = __I915_MADV_PURGED;
2021 }
2022
2023 static inline int
2024 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
2025 {
2026 return obj->madv == I915_MADV_DONTNEED;
2027 }
2028
2029 #ifdef __NetBSD__
2030 static void
2031 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2032 {
2033 struct drm_device *const dev = obj->base.dev;
2034 int ret;
2035
2036 /* XXX Cargo-culted from the Linux code. */
2037 BUG_ON(obj->madv == __I915_MADV_PURGED);
2038
2039 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2040 if (ret) {
2041 WARN_ON(ret != -EIO);
2042 i915_gem_clflush_object(obj);
2043 obj->base.read_domains = obj->base.write_domain =
2044 I915_GEM_DOMAIN_CPU;
2045 }
2046
2047 if (i915_gem_object_needs_bit17_swizzle(obj))
2048 i915_gem_object_save_bit_17_swizzle(obj);
2049
2050 /* XXX Maintain dirty flag? */
2051
2052 bus_dmamap_destroy(dev->dmat, obj->igo_dmamap);
2053 bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2054 obj->base.size, obj->pages, obj->igo_nsegs);
2055
2056 kfree(obj->pages);
2057 }
2058 #else
2059 static void
2060 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2061 {
2062 int page_count = obj->base.size / PAGE_SIZE;
2063 struct scatterlist *sg;
2064 int ret, i;
2065
2066 BUG_ON(obj->madv == __I915_MADV_PURGED);
2067
2068 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2069 if (ret) {
2070 /* In the event of a disaster, abandon all caches and
2071 * hope for the best.
2072 */
2073 WARN_ON(ret != -EIO);
2074 i915_gem_clflush_object(obj);
2075 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2076 }
2077
2078 if (i915_gem_object_needs_bit17_swizzle(obj))
2079 i915_gem_object_save_bit_17_swizzle(obj);
2080
2081 if (obj->madv == I915_MADV_DONTNEED)
2082 obj->dirty = 0;
2083
2084 for_each_sg(obj->pages->sgl, sg, page_count, i) {
2085 struct page *page = sg_page(sg);
2086
2087 if (obj->dirty)
2088 set_page_dirty(page);
2089
2090 if (obj->madv == I915_MADV_WILLNEED)
2091 mark_page_accessed(page);
2092
2093 page_cache_release(page);
2094 }
2095 obj->dirty = 0;
2096
2097 sg_free_table(obj->pages);
2098 kfree(obj->pages);
2099 }
2100 #endif
2101
2102 static int
2103 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2104 {
2105 const struct drm_i915_gem_object_ops *ops = obj->ops;
2106
2107 if (obj->pages == NULL)
2108 return 0;
2109
2110 BUG_ON(obj->gtt_space);
2111
2112 if (obj->pages_pin_count)
2113 return -EBUSY;
2114
2115 /* ->put_pages might need to allocate memory for the bit17 swizzle
2116 * array, hence protect them from being reaped by removing them from gtt
2117 * lists early. */
2118 list_del(&obj->gtt_list);
2119
2120 ops->put_pages(obj);
2121 obj->pages = NULL;
2122
2123 if (i915_gem_object_is_purgeable(obj))
2124 i915_gem_object_truncate(obj);
2125
2126 return 0;
2127 }
2128
2129 static long
2130 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
2131 bool purgeable_only)
2132 {
2133 struct drm_i915_gem_object *obj, *next;
2134 long count = 0;
2135
2136 list_for_each_entry_safe(obj, next,
2137 &dev_priv->mm.unbound_list,
2138 gtt_list) {
2139 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2140 i915_gem_object_put_pages(obj) == 0) {
2141 count += obj->base.size >> PAGE_SHIFT;
2142 if (count >= target)
2143 return count;
2144 }
2145 }
2146
2147 list_for_each_entry_safe(obj, next,
2148 &dev_priv->mm.inactive_list,
2149 mm_list) {
2150 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2151 i915_gem_object_unbind(obj) == 0 &&
2152 i915_gem_object_put_pages(obj) == 0) {
2153 count += obj->base.size >> PAGE_SHIFT;
2154 if (count >= target)
2155 return count;
2156 }
2157 }
2158
2159 return count;
2160 }
2161
2162 static long
2163 i915_gem_purge(struct drm_i915_private *dev_priv, long target)
2164 {
2165 return __i915_gem_shrink(dev_priv, target, true);
2166 }
2167
2168 static void
2169 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
2170 {
2171 struct drm_i915_gem_object *obj, *next;
2172
2173 i915_gem_evict_everything(dev_priv->dev);
2174
2175 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list)
2176 i915_gem_object_put_pages(obj);
2177 }
2178
2179 #ifdef __NetBSD__
2180 static int
2181 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2182 {
2183 struct drm_device *const dev = obj->base.dev;
2184 struct vm_page *page;
2185 int error;
2186
2187 /* XXX Cargo-culted from the Linux code. */
2188 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2189 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2190
2191 KASSERT(obj->pages == NULL);
2192 TAILQ_INIT(&obj->igo_pageq);
2193 obj->pages = kcalloc((obj->base.size / PAGE_SIZE),
2194 sizeof(obj->pages[0]), GFP_KERNEL);
2195 if (obj->pages == NULL) {
2196 error = -ENOMEM;
2197 goto fail0;
2198 }
2199
2200 /* XXX errno NetBSD->Linux */
2201 error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao,
2202 0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages,
2203 (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT);
2204 if (error)
2205 /* XXX Try i915_gem_purge, i915_gem_shrink_all. */
2206 goto fail1;
2207 KASSERT(0 < obj->igo_nsegs);
2208 KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE));
2209
2210 /*
2211 * Check that the paddrs will fit in 40 bits, or 32 bits on i965.
2212 *
2213 * XXX This is wrong; we ought to pass this constraint to
2214 * bus_dmamem_wire_uvm_object instead.
2215 */
2216 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) {
2217 const uint64_t mask =
2218 (IS_BROADWATER(dev) || IS_CRESTLINE(dev)?
2219 0xffffffffULL : 0xffffffffffULL);
2220 if (VM_PAGE_TO_PHYS(page) & ~mask) {
2221 DRM_ERROR("GEM physical address exceeds %u bits"
2222 ": %"PRIxMAX"\n",
2223 popcount64(mask),
2224 (uintmax_t)VM_PAGE_TO_PHYS(page));
2225 error = -EIO;
2226 goto fail2;
2227 }
2228 }
2229
2230 /* XXX errno NetBSD->Linux */
2231 error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs,
2232 PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap);
2233 if (error)
2234 goto fail2;
2235
2236 /* XXX Cargo-culted from the Linux code. */
2237 if (i915_gem_object_needs_bit17_swizzle(obj))
2238 i915_gem_object_do_bit_17_swizzle(obj);
2239
2240 /* Success! */
2241 return 0;
2242
2243 fail2: bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2244 obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE));
2245 fail1: kfree(obj->pages);
2246 obj->pages = NULL;
2247 fail0: KASSERT(error);
2248 return error;
2249 }
2250 #else
2251 static int
2252 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2253 {
2254 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2255 int page_count, i;
2256 struct address_space *mapping;
2257 struct sg_table *st;
2258 struct scatterlist *sg;
2259 struct page *page;
2260 gfp_t gfp;
2261
2262 /* Assert that the object is not currently in any GPU domain. As it
2263 * wasn't in the GTT, there shouldn't be any way it could have been in
2264 * a GPU cache
2265 */
2266 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2267 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2268
2269 st = kmalloc(sizeof(*st), GFP_KERNEL);
2270 if (st == NULL)
2271 return -ENOMEM;
2272
2273 page_count = obj->base.size / PAGE_SIZE;
2274 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2275 sg_free_table(st);
2276 kfree(st);
2277 return -ENOMEM;
2278 }
2279
2280 /* Get the list of pages out of our struct file. They'll be pinned
2281 * at this point until we release them.
2282 *
2283 * Fail silently without starting the shrinker
2284 */
2285 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
2286 gfp = mapping_gfp_mask(mapping);
2287 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2288 gfp &= ~(__GFP_IO | __GFP_WAIT);
2289 for_each_sg(st->sgl, sg, page_count, i) {
2290 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2291 if (IS_ERR(page)) {
2292 i915_gem_purge(dev_priv, page_count);
2293 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2294 }
2295 if (IS_ERR(page)) {
2296 /* We've tried hard to allocate the memory by reaping
2297 * our own buffer, now let the real VM do its job and
2298 * go down in flames if truly OOM.
2299 */
2300 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
2301 gfp |= __GFP_IO | __GFP_WAIT;
2302
2303 i915_gem_shrink_all(dev_priv);
2304 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2305 if (IS_ERR(page))
2306 goto err_pages;
2307
2308 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2309 gfp &= ~(__GFP_IO | __GFP_WAIT);
2310 }
2311
2312 sg_set_page(sg, page, PAGE_SIZE, 0);
2313 }
2314
2315 obj->pages = st;
2316
2317 if (i915_gem_object_needs_bit17_swizzle(obj))
2318 i915_gem_object_do_bit_17_swizzle(obj);
2319
2320 return 0;
2321
2322 err_pages:
2323 for_each_sg(st->sgl, sg, i, page_count)
2324 page_cache_release(sg_page(sg));
2325 sg_free_table(st);
2326 kfree(st);
2327 return PTR_ERR(page);
2328 }
2329 #endif
2330
2331 /* Ensure that the associated pages are gathered from the backing storage
2332 * and pinned into our object. i915_gem_object_get_pages() may be called
2333 * multiple times before they are released by a single call to
2334 * i915_gem_object_put_pages() - once the pages are no longer referenced
2335 * either as a result of memory pressure (reaping pages under the shrinker)
2336 * or as the object is itself released.
2337 */
2338 int
2339 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2340 {
2341 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2342 const struct drm_i915_gem_object_ops *ops = obj->ops;
2343 int ret;
2344
2345 if (obj->pages)
2346 return 0;
2347
2348 BUG_ON(obj->pages_pin_count);
2349
2350 ret = ops->get_pages(obj);
2351 if (ret)
2352 return ret;
2353
2354 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
2355 return 0;
2356 }
2357
2358 void
2359 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
2360 struct intel_ring_buffer *ring)
2361 {
2362 struct drm_device *dev = obj->base.dev;
2363 struct drm_i915_private *dev_priv = dev->dev_private;
2364 u32 seqno = intel_ring_get_seqno(ring);
2365
2366 BUG_ON(ring == NULL);
2367 obj->ring = ring;
2368
2369 /* Add a reference if we're newly entering the active list. */
2370 if (!obj->active) {
2371 drm_gem_object_reference(&obj->base);
2372 obj->active = 1;
2373 }
2374
2375 /* Move from whatever list we were on to the tail of execution. */
2376 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
2377 list_move_tail(&obj->ring_list, &ring->active_list);
2378
2379 obj->last_read_seqno = seqno;
2380
2381 if (obj->fenced_gpu_access) {
2382 obj->last_fenced_seqno = seqno;
2383
2384 /* Bump MRU to take account of the delayed flush */
2385 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2386 struct drm_i915_fence_reg *reg;
2387
2388 reg = &dev_priv->fence_regs[obj->fence_reg];
2389 list_move_tail(®->lru_list,
2390 &dev_priv->mm.fence_list);
2391 }
2392 }
2393 }
2394
2395 static void
2396 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
2397 {
2398 struct drm_device *dev = obj->base.dev;
2399 struct drm_i915_private *dev_priv = dev->dev_private;
2400
2401 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
2402 BUG_ON(!obj->active);
2403
2404 if (obj->pin_count) /* are we a framebuffer? */
2405 intel_mark_fb_idle(obj);
2406
2407 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2408
2409 list_del_init(&obj->ring_list);
2410 obj->ring = NULL;
2411
2412 obj->last_read_seqno = 0;
2413 obj->last_write_seqno = 0;
2414 obj->base.write_domain = 0;
2415
2416 obj->last_fenced_seqno = 0;
2417 obj->fenced_gpu_access = false;
2418
2419 obj->active = 0;
2420 drm_gem_object_unreference(&obj->base);
2421
2422 WARN_ON(i915_verify_lists(dev));
2423 }
2424
2425 static int
2426 i915_gem_handle_seqno_wrap(struct drm_device *dev)
2427 {
2428 struct drm_i915_private *dev_priv = dev->dev_private;
2429 struct intel_ring_buffer *ring;
2430 int ret, i, j;
2431
2432 /* The hardware uses various monotonic 32-bit counters, if we
2433 * detect that they will wraparound we need to idle the GPU
2434 * and reset those counters.
2435 */
2436 ret = 0;
2437 for_each_ring(ring, dev_priv, i) {
2438 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
2439 ret |= ring->sync_seqno[j] != 0;
2440 }
2441 if (ret == 0)
2442 return ret;
2443
2444 ret = i915_gpu_idle(dev);
2445 if (ret)
2446 return ret;
2447
2448 i915_gem_retire_requests(dev);
2449 for_each_ring(ring, dev_priv, i) {
2450 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
2451 ring->sync_seqno[j] = 0;
2452 }
2453
2454 return 0;
2455 }
2456
2457 int
2458 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2459 {
2460 struct drm_i915_private *dev_priv = dev->dev_private;
2461
2462 /* reserve 0 for non-seqno */
2463 if (dev_priv->next_seqno == 0) {
2464 int ret = i915_gem_handle_seqno_wrap(dev);
2465 if (ret)
2466 return ret;
2467
2468 dev_priv->next_seqno = 1;
2469 }
2470
2471 *seqno = dev_priv->next_seqno++;
2472 return 0;
2473 }
2474
2475 int
2476 i915_add_request(struct intel_ring_buffer *ring,
2477 struct drm_file *file,
2478 u32 *out_seqno)
2479 {
2480 drm_i915_private_t *dev_priv = ring->dev->dev_private;
2481 struct drm_i915_gem_request *request;
2482 u32 request_ring_position;
2483 int was_empty;
2484 int ret;
2485
2486 /*
2487 * Emit any outstanding flushes - execbuf can fail to emit the flush
2488 * after having emitted the batchbuffer command. Hence we need to fix
2489 * things up similar to emitting the lazy request. The difference here
2490 * is that the flush _must_ happen before the next request, no matter
2491 * what.
2492 */
2493 ret = intel_ring_flush_all_caches(ring);
2494 if (ret)
2495 return ret;
2496
2497 request = kmalloc(sizeof(*request), GFP_KERNEL);
2498 if (request == NULL)
2499 return -ENOMEM;
2500
2501
2502 /* Record the position of the start of the request so that
2503 * should we detect the updated seqno part-way through the
2504 * GPU processing the request, we never over-estimate the
2505 * position of the head.
2506 */
2507 request_ring_position = intel_ring_get_tail(ring);
2508
2509 ret = ring->add_request(ring);
2510 if (ret) {
2511 kfree(request);
2512 return ret;
2513 }
2514
2515 request->seqno = intel_ring_get_seqno(ring);
2516 request->ring = ring;
2517 request->tail = request_ring_position;
2518 request->emitted_jiffies = jiffies;
2519 was_empty = list_empty(&ring->request_list);
2520 list_add_tail(&request->list, &ring->request_list);
2521 request->file_priv = NULL;
2522
2523 if (file) {
2524 struct drm_i915_file_private *file_priv = file->driver_priv;
2525
2526 spin_lock(&file_priv->mm.lock);
2527 request->file_priv = file_priv;
2528 list_add_tail(&request->client_list,
2529 &file_priv->mm.request_list);
2530 spin_unlock(&file_priv->mm.lock);
2531 }
2532
2533 trace_i915_gem_request_add(ring, request->seqno);
2534 ring->outstanding_lazy_request = 0;
2535
2536 if (!dev_priv->mm.suspended) {
2537 if (i915_enable_hangcheck) {
2538 mod_timer(&dev_priv->hangcheck_timer,
2539 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
2540 }
2541 if (was_empty) {
2542 queue_delayed_work(dev_priv->wq,
2543 &dev_priv->mm.retire_work,
2544 round_jiffies_up_relative(HZ));
2545 intel_mark_busy(dev_priv->dev);
2546 }
2547 }
2548
2549 if (out_seqno)
2550 *out_seqno = request->seqno;
2551 return 0;
2552 }
2553
2554 static inline void
2555 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
2556 {
2557 struct drm_i915_file_private *file_priv = request->file_priv;
2558
2559 if (!file_priv)
2560 return;
2561
2562 spin_lock(&file_priv->mm.lock);
2563 if (request->file_priv) {
2564 list_del(&request->client_list);
2565 request->file_priv = NULL;
2566 }
2567 spin_unlock(&file_priv->mm.lock);
2568 }
2569
2570 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
2571 struct intel_ring_buffer *ring)
2572 {
2573 while (!list_empty(&ring->request_list)) {
2574 struct drm_i915_gem_request *request;
2575
2576 request = list_first_entry(&ring->request_list,
2577 struct drm_i915_gem_request,
2578 list);
2579
2580 list_del(&request->list);
2581 i915_gem_request_remove_from_client(request);
2582 kfree(request);
2583 }
2584
2585 while (!list_empty(&ring->active_list)) {
2586 struct drm_i915_gem_object *obj;
2587
2588 obj = list_first_entry(&ring->active_list,
2589 struct drm_i915_gem_object,
2590 ring_list);
2591
2592 i915_gem_object_move_to_inactive(obj);
2593 }
2594 }
2595
2596 static void i915_gem_reset_fences(struct drm_device *dev)
2597 {
2598 struct drm_i915_private *dev_priv = dev->dev_private;
2599 int i;
2600
2601 for (i = 0; i < dev_priv->num_fence_regs; i++) {
2602 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2603
2604 i915_gem_write_fence(dev, i, NULL);
2605
2606 if (reg->obj)
2607 i915_gem_object_fence_lost(reg->obj);
2608
2609 reg->pin_count = 0;
2610 reg->obj = NULL;
2611 INIT_LIST_HEAD(®->lru_list);
2612 }
2613
2614 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
2615 }
2616
2617 void i915_gem_reset(struct drm_device *dev)
2618 {
2619 struct drm_i915_private *dev_priv = dev->dev_private;
2620 struct drm_i915_gem_object *obj;
2621 struct intel_ring_buffer *ring;
2622 int i;
2623
2624 for_each_ring(ring, dev_priv, i)
2625 i915_gem_reset_ring_lists(dev_priv, ring);
2626
2627 /* Move everything out of the GPU domains to ensure we do any
2628 * necessary invalidation upon reuse.
2629 */
2630 list_for_each_entry(obj,
2631 &dev_priv->mm.inactive_list,
2632 mm_list)
2633 {
2634 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
2635 }
2636
2637 /* The fence registers are invalidated so clear them out */
2638 i915_gem_reset_fences(dev);
2639 }
2640
2641 /**
2642 * This function clears the request list as sequence numbers are passed.
2643 */
2644 void
2645 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
2646 {
2647 uint32_t seqno;
2648
2649 if (list_empty(&ring->request_list))
2650 return;
2651
2652 WARN_ON(i915_verify_lists(ring->dev));
2653
2654 seqno = ring->get_seqno(ring, true);
2655
2656 while (!list_empty(&ring->request_list)) {
2657 struct drm_i915_gem_request *request;
2658
2659 request = list_first_entry(&ring->request_list,
2660 struct drm_i915_gem_request,
2661 list);
2662
2663 if (!i915_seqno_passed(seqno, request->seqno))
2664 break;
2665
2666 trace_i915_gem_request_retire(ring, request->seqno);
2667 /* We know the GPU must have read the request to have
2668 * sent us the seqno + interrupt, so use the position
2669 * of tail of the request to update the last known position
2670 * of the GPU head.
2671 */
2672 ring->last_retired_head = request->tail;
2673
2674 list_del(&request->list);
2675 i915_gem_request_remove_from_client(request);
2676 kfree(request);
2677 }
2678
2679 /* Move any buffers on the active list that are no longer referenced
2680 * by the ringbuffer to the flushing/inactive lists as appropriate.
2681 */
2682 while (!list_empty(&ring->active_list)) {
2683 struct drm_i915_gem_object *obj;
2684
2685 obj = list_first_entry(&ring->active_list,
2686 struct drm_i915_gem_object,
2687 ring_list);
2688
2689 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
2690 break;
2691
2692 i915_gem_object_move_to_inactive(obj);
2693 }
2694
2695 if (unlikely(ring->trace_irq_seqno &&
2696 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
2697 ring->irq_put(ring);
2698 ring->trace_irq_seqno = 0;
2699 }
2700
2701 WARN_ON(i915_verify_lists(ring->dev));
2702 }
2703
2704 void
2705 i915_gem_retire_requests(struct drm_device *dev)
2706 {
2707 drm_i915_private_t *dev_priv = dev->dev_private;
2708 struct intel_ring_buffer *ring;
2709 int i;
2710
2711 for_each_ring(ring, dev_priv, i)
2712 i915_gem_retire_requests_ring(ring);
2713 }
2714
2715 static void
2716 i915_gem_retire_work_handler(struct work_struct *work)
2717 {
2718 drm_i915_private_t *dev_priv;
2719 struct drm_device *dev;
2720 struct intel_ring_buffer *ring;
2721 bool idle;
2722 int i;
2723
2724 dev_priv = container_of(work, drm_i915_private_t,
2725 mm.retire_work.work);
2726 dev = dev_priv->dev;
2727
2728 /* Come back later if the device is busy... */
2729 if (!mutex_trylock(&dev->struct_mutex)) {
2730 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2731 round_jiffies_up_relative(HZ));
2732 return;
2733 }
2734
2735 i915_gem_retire_requests(dev);
2736
2737 /* Send a periodic flush down the ring so we don't hold onto GEM
2738 * objects indefinitely.
2739 */
2740 idle = true;
2741 for_each_ring(ring, dev_priv, i) {
2742 if (ring->gpu_caches_dirty)
2743 i915_add_request(ring, NULL, NULL);
2744
2745 idle &= list_empty(&ring->request_list);
2746 }
2747
2748 if (!dev_priv->mm.suspended && !idle)
2749 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2750 round_jiffies_up_relative(HZ));
2751 if (idle)
2752 intel_mark_idle(dev);
2753
2754 mutex_unlock(&dev->struct_mutex);
2755 }
2756
2757 /**
2758 * Ensures that an object will eventually get non-busy by flushing any required
2759 * write domains, emitting any outstanding lazy request and retiring and
2760 * completed requests.
2761 */
2762 static int
2763 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2764 {
2765 int ret;
2766
2767 if (obj->active) {
2768 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
2769 if (ret)
2770 return ret;
2771
2772 i915_gem_retire_requests_ring(obj->ring);
2773 }
2774
2775 return 0;
2776 }
2777
2778 /**
2779 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2780 * @DRM_IOCTL_ARGS: standard ioctl arguments
2781 *
2782 * Returns 0 if successful, else an error is returned with the remaining time in
2783 * the timeout parameter.
2784 * -ETIME: object is still busy after timeout
2785 * -ERESTARTSYS: signal interrupted the wait
2786 * -ENONENT: object doesn't exist
2787 * Also possible, but rare:
2788 * -EAGAIN: GPU wedged
2789 * -ENOMEM: damn
2790 * -ENODEV: Internal IRQ fail
2791 * -E?: The add request failed
2792 *
2793 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2794 * non-zero timeout parameter the wait ioctl will wait for the given number of
2795 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2796 * without holding struct_mutex the object may become re-busied before this
2797 * function completes. A similar but shorter * race condition exists in the busy
2798 * ioctl
2799 */
2800 int
2801 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2802 {
2803 struct drm_i915_gem_wait *args = data;
2804 struct drm_i915_gem_object *obj;
2805 struct intel_ring_buffer *ring = NULL;
2806 struct timespec timeout_stack, *timeout = NULL;
2807 u32 seqno = 0;
2808 int ret = 0;
2809
2810 if (args->timeout_ns >= 0) {
2811 timeout_stack = ns_to_timespec(args->timeout_ns);
2812 timeout = &timeout_stack;
2813 }
2814
2815 ret = i915_mutex_lock_interruptible(dev);
2816 if (ret)
2817 return ret;
2818
2819 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2820 if (&obj->base == NULL) {
2821 mutex_unlock(&dev->struct_mutex);
2822 return -ENOENT;
2823 }
2824
2825 /* Need to make sure the object gets inactive eventually. */
2826 ret = i915_gem_object_flush_active(obj);
2827 if (ret)
2828 goto out;
2829
2830 if (obj->active) {
2831 seqno = obj->last_read_seqno;
2832 ring = obj->ring;
2833 }
2834
2835 if (seqno == 0)
2836 goto out;
2837
2838 /* Do this after OLR check to make sure we make forward progress polling
2839 * on this IOCTL with a 0 timeout (like busy ioctl)
2840 */
2841 if (!args->timeout_ns) {
2842 ret = -ETIME;
2843 goto out;
2844 }
2845
2846 drm_gem_object_unreference(&obj->base);
2847 mutex_unlock(&dev->struct_mutex);
2848
2849 ret = __wait_seqno(ring, seqno, true, timeout);
2850 if (timeout) {
2851 WARN_ON(!timespec_valid(timeout));
2852 args->timeout_ns = timespec_to_ns(timeout);
2853 }
2854 return ret;
2855
2856 out:
2857 drm_gem_object_unreference(&obj->base);
2858 mutex_unlock(&dev->struct_mutex);
2859 return ret;
2860 }
2861
2862 /**
2863 * i915_gem_object_sync - sync an object to a ring.
2864 *
2865 * @obj: object which may be in use on another ring.
2866 * @to: ring we wish to use the object on. May be NULL.
2867 *
2868 * This code is meant to abstract object synchronization with the GPU.
2869 * Calling with NULL implies synchronizing the object with the CPU
2870 * rather than a particular GPU ring.
2871 *
2872 * Returns 0 if successful, else propagates up the lower layer error.
2873 */
2874 int
2875 i915_gem_object_sync(struct drm_i915_gem_object *obj,
2876 struct intel_ring_buffer *to)
2877 {
2878 struct intel_ring_buffer *from = obj->ring;
2879 u32 seqno;
2880 int ret, idx;
2881
2882 if (from == NULL || to == from)
2883 return 0;
2884
2885 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
2886 return i915_gem_object_wait_rendering(obj, false);
2887
2888 idx = intel_ring_sync_index(from, to);
2889
2890 seqno = obj->last_read_seqno;
2891 if (seqno <= from->sync_seqno[idx])
2892 return 0;
2893
2894 ret = i915_gem_check_olr(obj->ring, seqno);
2895 if (ret)
2896 return ret;
2897
2898 ret = to->sync_to(to, from, seqno);
2899 if (!ret)
2900 /* We use last_read_seqno because sync_to()
2901 * might have just caused seqno wrap under
2902 * the radar.
2903 */
2904 from->sync_seqno[idx] = obj->last_read_seqno;
2905
2906 return ret;
2907 }
2908
2909 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2910 {
2911 u32 old_write_domain, old_read_domains;
2912
2913 /* Act a barrier for all accesses through the GTT */
2914 mb();
2915
2916 /* Force a pagefault for domain tracking on next user access */
2917 i915_gem_release_mmap(obj);
2918
2919 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2920 return;
2921
2922 old_read_domains = obj->base.read_domains;
2923 old_write_domain = obj->base.write_domain;
2924
2925 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2926 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2927
2928 trace_i915_gem_object_change_domain(obj,
2929 old_read_domains,
2930 old_write_domain);
2931 }
2932
2933 /**
2934 * Unbinds an object from the GTT aperture.
2935 */
2936 int
2937 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2938 {
2939 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2940 int ret = 0;
2941
2942 if (obj->gtt_space == NULL)
2943 return 0;
2944
2945 if (obj->pin_count)
2946 return -EBUSY;
2947
2948 BUG_ON(obj->pages == NULL);
2949
2950 ret = i915_gem_object_finish_gpu(obj);
2951 if (ret)
2952 return ret;
2953 /* Continue on if we fail due to EIO, the GPU is hung so we
2954 * should be safe and we need to cleanup or else we might
2955 * cause memory corruption through use-after-free.
2956 */
2957
2958 i915_gem_object_finish_gtt(obj);
2959
2960 /* release the fence reg _after_ flushing */
2961 ret = i915_gem_object_put_fence(obj);
2962 if (ret)
2963 return ret;
2964
2965 trace_i915_gem_object_unbind(obj);
2966
2967 if (obj->has_global_gtt_mapping)
2968 i915_gem_gtt_unbind_object(obj);
2969 if (obj->has_aliasing_ppgtt_mapping) {
2970 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2971 obj->has_aliasing_ppgtt_mapping = 0;
2972 }
2973 i915_gem_gtt_finish_object(obj);
2974
2975 list_del(&obj->mm_list);
2976 list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
2977 /* Avoid an unnecessary call to unbind on rebind. */
2978 obj->map_and_fenceable = true;
2979
2980 drm_mm_put_block(obj->gtt_space);
2981 obj->gtt_space = NULL;
2982 obj->gtt_offset = 0;
2983
2984 return 0;
2985 }
2986
2987 int i915_gpu_idle(struct drm_device *dev)
2988 {
2989 drm_i915_private_t *dev_priv = dev->dev_private;
2990 struct intel_ring_buffer *ring;
2991 int ret, i;
2992
2993 /* Flush everything onto the inactive list. */
2994 for_each_ring(ring, dev_priv, i) {
2995 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
2996 if (ret)
2997 return ret;
2998
2999 ret = intel_ring_idle(ring);
3000 if (ret)
3001 return ret;
3002 }
3003
3004 return 0;
3005 }
3006
3007 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
3008 struct drm_i915_gem_object *obj)
3009 {
3010 drm_i915_private_t *dev_priv = dev->dev_private;
3011 uint64_t val;
3012
3013 if (obj) {
3014 u32 size = obj->gtt_space->size;
3015
3016 val = (uint64_t)((obj->gtt_offset + size - 4096) &
3017 0xfffff000) << 32;
3018 val |= obj->gtt_offset & 0xfffff000;
3019 val |= (uint64_t)((obj->stride / 128) - 1) <<
3020 SANDYBRIDGE_FENCE_PITCH_SHIFT;
3021
3022 if (obj->tiling_mode == I915_TILING_Y)
3023 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3024 val |= I965_FENCE_REG_VALID;
3025 } else
3026 val = 0;
3027
3028 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
3029 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
3030 }
3031
3032 static void i965_write_fence_reg(struct drm_device *dev, int reg,
3033 struct drm_i915_gem_object *obj)
3034 {
3035 drm_i915_private_t *dev_priv = dev->dev_private;
3036 uint64_t val;
3037
3038 if (obj) {
3039 u32 size = obj->gtt_space->size;
3040
3041 val = (uint64_t)((obj->gtt_offset + size - 4096) &
3042 0xfffff000) << 32;
3043 val |= obj->gtt_offset & 0xfffff000;
3044 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
3045 if (obj->tiling_mode == I915_TILING_Y)
3046 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3047 val |= I965_FENCE_REG_VALID;
3048 } else
3049 val = 0;
3050
3051 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
3052 POSTING_READ(FENCE_REG_965_0 + reg * 8);
3053 }
3054
3055 static void i915_write_fence_reg(struct drm_device *dev, int reg,
3056 struct drm_i915_gem_object *obj)
3057 {
3058 drm_i915_private_t *dev_priv = dev->dev_private;
3059 u32 val;
3060
3061 if (obj) {
3062 u32 size = obj->gtt_space->size;
3063 int pitch_val;
3064 int tile_width;
3065
3066 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
3067 (size & -size) != size ||
3068 (obj->gtt_offset & (size - 1)),
3069 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3070 obj->gtt_offset, obj->map_and_fenceable, size);
3071
3072 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
3073 tile_width = 128;
3074 else
3075 tile_width = 512;
3076
3077 /* Note: pitch better be a power of two tile widths */
3078 pitch_val = obj->stride / tile_width;
3079 pitch_val = ffs(pitch_val) - 1;
3080
3081 val = obj->gtt_offset;
3082 if (obj->tiling_mode == I915_TILING_Y)
3083 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3084 val |= I915_FENCE_SIZE_BITS(size);
3085 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3086 val |= I830_FENCE_REG_VALID;
3087 } else
3088 val = 0;
3089
3090 if (reg < 8)
3091 reg = FENCE_REG_830_0 + reg * 4;
3092 else
3093 reg = FENCE_REG_945_8 + (reg - 8) * 4;
3094
3095 I915_WRITE(reg, val);
3096 POSTING_READ(reg);
3097 }
3098
3099 static void i830_write_fence_reg(struct drm_device *dev, int reg,
3100 struct drm_i915_gem_object *obj)
3101 {
3102 drm_i915_private_t *dev_priv = dev->dev_private;
3103 uint32_t val;
3104
3105 if (obj) {
3106 u32 size = obj->gtt_space->size;
3107 uint32_t pitch_val;
3108
3109 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
3110 (size & -size) != size ||
3111 (obj->gtt_offset & (size - 1)),
3112 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
3113 obj->gtt_offset, size);
3114
3115 pitch_val = obj->stride / 128;
3116 pitch_val = ffs(pitch_val) - 1;
3117
3118 val = obj->gtt_offset;
3119 if (obj->tiling_mode == I915_TILING_Y)
3120 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3121 val |= I830_FENCE_SIZE_BITS(size);
3122 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3123 val |= I830_FENCE_REG_VALID;
3124 } else
3125 val = 0;
3126
3127 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
3128 POSTING_READ(FENCE_REG_830_0 + reg * 4);
3129 }
3130
3131 static void i915_gem_write_fence(struct drm_device *dev, int reg,
3132 struct drm_i915_gem_object *obj)
3133 {
3134 switch (INTEL_INFO(dev)->gen) {
3135 case 7:
3136 case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
3137 case 5:
3138 case 4: i965_write_fence_reg(dev, reg, obj); break;
3139 case 3: i915_write_fence_reg(dev, reg, obj); break;
3140 case 2: i830_write_fence_reg(dev, reg, obj); break;
3141 default: break;
3142 }
3143 }
3144
3145 static inline int fence_number(struct drm_i915_private *dev_priv,
3146 struct drm_i915_fence_reg *fence)
3147 {
3148 return fence - dev_priv->fence_regs;
3149 }
3150
3151 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
3152 struct drm_i915_fence_reg *fence,
3153 bool enable)
3154 {
3155 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3156 int reg = fence_number(dev_priv, fence);
3157
3158 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
3159
3160 if (enable) {
3161 obj->fence_reg = reg;
3162 fence->obj = obj;
3163 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
3164 } else {
3165 obj->fence_reg = I915_FENCE_REG_NONE;
3166 fence->obj = NULL;
3167 list_del_init(&fence->lru_list);
3168 }
3169 }
3170
3171 static int
3172 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
3173 {
3174 if (obj->last_fenced_seqno) {
3175 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
3176 if (ret)
3177 return ret;
3178
3179 obj->last_fenced_seqno = 0;
3180 }
3181
3182 /* Ensure that all CPU reads are completed before installing a fence
3183 * and all writes before removing the fence.
3184 */
3185 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
3186 mb();
3187
3188 obj->fenced_gpu_access = false;
3189 return 0;
3190 }
3191
3192 int
3193 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
3194 {
3195 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3196 int ret;
3197
3198 ret = i915_gem_object_flush_fence(obj);
3199 if (ret)
3200 return ret;
3201
3202 if (obj->fence_reg == I915_FENCE_REG_NONE)
3203 return 0;
3204
3205 i915_gem_object_update_fence(obj,
3206 &dev_priv->fence_regs[obj->fence_reg],
3207 false);
3208 i915_gem_object_fence_lost(obj);
3209
3210 return 0;
3211 }
3212
3213 static struct drm_i915_fence_reg *
3214 i915_find_fence_reg(struct drm_device *dev)
3215 {
3216 struct drm_i915_private *dev_priv = dev->dev_private;
3217 struct drm_i915_fence_reg *reg, *avail;
3218 int i;
3219
3220 /* First try to find a free reg */
3221 avail = NULL;
3222 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
3223 reg = &dev_priv->fence_regs[i];
3224 if (!reg->obj)
3225 return reg;
3226
3227 if (!reg->pin_count)
3228 avail = reg;
3229 }
3230
3231 if (avail == NULL)
3232 return NULL;
3233
3234 /* None available, try to steal one or wait for a user to finish */
3235 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
3236 if (reg->pin_count)
3237 continue;
3238
3239 return reg;
3240 }
3241
3242 return NULL;
3243 }
3244
3245 /**
3246 * i915_gem_object_get_fence - set up fencing for an object
3247 * @obj: object to map through a fence reg
3248 *
3249 * When mapping objects through the GTT, userspace wants to be able to write
3250 * to them without having to worry about swizzling if the object is tiled.
3251 * This function walks the fence regs looking for a free one for @obj,
3252 * stealing one if it can't find any.
3253 *
3254 * It then sets up the reg based on the object's properties: address, pitch
3255 * and tiling format.
3256 *
3257 * For an untiled surface, this removes any existing fence.
3258 */
3259 int
3260 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
3261 {
3262 struct drm_device *dev = obj->base.dev;
3263 struct drm_i915_private *dev_priv = dev->dev_private;
3264 bool enable = obj->tiling_mode != I915_TILING_NONE;
3265 struct drm_i915_fence_reg *reg;
3266 int ret;
3267
3268 /* Have we updated the tiling parameters upon the object and so
3269 * will need to serialise the write to the associated fence register?
3270 */
3271 if (obj->fence_dirty) {
3272 ret = i915_gem_object_flush_fence(obj);
3273 if (ret)
3274 return ret;
3275 }
3276
3277 /* Just update our place in the LRU if our fence is getting reused. */
3278 if (obj->fence_reg != I915_FENCE_REG_NONE) {
3279 reg = &dev_priv->fence_regs[obj->fence_reg];
3280 if (!obj->fence_dirty) {
3281 list_move_tail(®->lru_list,
3282 &dev_priv->mm.fence_list);
3283 return 0;
3284 }
3285 } else if (enable) {
3286 reg = i915_find_fence_reg(dev);
3287 if (reg == NULL)
3288 return -EDEADLK;
3289
3290 if (reg->obj) {
3291 struct drm_i915_gem_object *old = reg->obj;
3292
3293 ret = i915_gem_object_flush_fence(old);
3294 if (ret)
3295 return ret;
3296
3297 i915_gem_object_fence_lost(old);
3298 }
3299 } else
3300 return 0;
3301
3302 i915_gem_object_update_fence(obj, reg, enable);
3303 obj->fence_dirty = false;
3304
3305 return 0;
3306 }
3307
3308 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
3309 struct drm_mm_node *gtt_space,
3310 unsigned long cache_level)
3311 {
3312 struct drm_mm_node *other;
3313
3314 /* On non-LLC machines we have to be careful when putting differing
3315 * types of snoopable memory together to avoid the prefetcher
3316 * crossing memory domains and dieing.
3317 */
3318 if (HAS_LLC(dev))
3319 return true;
3320
3321 if (gtt_space == NULL)
3322 return true;
3323
3324 if (list_empty(>t_space->node_list))
3325 return true;
3326
3327 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3328 if (other->allocated && !other->hole_follows && other->color != cache_level)
3329 return false;
3330
3331 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3332 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3333 return false;
3334
3335 return true;
3336 }
3337
3338 static void i915_gem_verify_gtt(struct drm_device *dev)
3339 {
3340 #if WATCH_GTT
3341 struct drm_i915_private *dev_priv = dev->dev_private;
3342 struct drm_i915_gem_object *obj;
3343 int err = 0;
3344
3345 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
3346 if (obj->gtt_space == NULL) {
3347 printk(KERN_ERR "object found on GTT list with no space reserved\n");
3348 err++;
3349 continue;
3350 }
3351
3352 if (obj->cache_level != obj->gtt_space->color) {
3353 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
3354 obj->gtt_space->start,
3355 obj->gtt_space->start + obj->gtt_space->size,
3356 obj->cache_level,
3357 obj->gtt_space->color);
3358 err++;
3359 continue;
3360 }
3361
3362 if (!i915_gem_valid_gtt_space(dev,
3363 obj->gtt_space,
3364 obj->cache_level)) {
3365 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
3366 obj->gtt_space->start,
3367 obj->gtt_space->start + obj->gtt_space->size,
3368 obj->cache_level);
3369 err++;
3370 continue;
3371 }
3372 }
3373
3374 WARN_ON(err);
3375 #endif
3376 }
3377
3378 /**
3379 * Finds free space in the GTT aperture and binds the object there.
3380 */
3381 static int
3382 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
3383 unsigned alignment,
3384 bool map_and_fenceable,
3385 bool nonblocking)
3386 {
3387 struct drm_device *dev = obj->base.dev;
3388 drm_i915_private_t *dev_priv = dev->dev_private;
3389 struct drm_mm_node *node;
3390 u32 size, fence_size, fence_alignment, unfenced_alignment;
3391 bool mappable, fenceable;
3392 int ret;
3393
3394 if (obj->madv != I915_MADV_WILLNEED) {
3395 DRM_ERROR("Attempting to bind a purgeable object\n");
3396 return -EINVAL;
3397 }
3398
3399 fence_size = i915_gem_get_gtt_size(dev,
3400 obj->base.size,
3401 obj->tiling_mode);
3402 fence_alignment = i915_gem_get_gtt_alignment(dev,
3403 obj->base.size,
3404 obj->tiling_mode);
3405 unfenced_alignment =
3406 i915_gem_get_unfenced_gtt_alignment(dev,
3407 obj->base.size,
3408 obj->tiling_mode);
3409
3410 if (alignment == 0)
3411 alignment = map_and_fenceable ? fence_alignment :
3412 unfenced_alignment;
3413 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
3414 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
3415 return -EINVAL;
3416 }
3417
3418 size = map_and_fenceable ? fence_size : obj->base.size;
3419
3420 /* If the object is bigger than the entire aperture, reject it early
3421 * before evicting everything in a vain attempt to find space.
3422 */
3423 if (obj->base.size >
3424 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
3425 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
3426 return -E2BIG;
3427 }
3428
3429 ret = i915_gem_object_get_pages(obj);
3430 if (ret)
3431 return ret;
3432
3433 i915_gem_object_pin_pages(obj);
3434
3435 node = kzalloc(sizeof(*node), GFP_KERNEL);
3436 if (node == NULL) {
3437 i915_gem_object_unpin_pages(obj);
3438 return -ENOMEM;
3439 }
3440
3441 search_free:
3442 if (map_and_fenceable)
3443 ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node,
3444 size, alignment, obj->cache_level,
3445 0, dev_priv->mm.gtt_mappable_end);
3446 else
3447 ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node,
3448 size, alignment, obj->cache_level);
3449 if (ret) {
3450 ret = i915_gem_evict_something(dev, size, alignment,
3451 obj->cache_level,
3452 map_and_fenceable,
3453 nonblocking);
3454 if (ret == 0)
3455 goto search_free;
3456
3457 i915_gem_object_unpin_pages(obj);
3458 kfree(node);
3459 return ret;
3460 }
3461 if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) {
3462 i915_gem_object_unpin_pages(obj);
3463 drm_mm_put_block(node);
3464 return -EINVAL;
3465 }
3466
3467 ret = i915_gem_gtt_prepare_object(obj);
3468 if (ret) {
3469 i915_gem_object_unpin_pages(obj);
3470 drm_mm_put_block(node);
3471 return ret;
3472 }
3473
3474 list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
3475 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3476
3477 obj->gtt_space = node;
3478 obj->gtt_offset = node->start;
3479
3480 fenceable =
3481 node->size == fence_size &&
3482 (node->start & (fence_alignment - 1)) == 0;
3483
3484 mappable =
3485 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
3486
3487 obj->map_and_fenceable = mappable && fenceable;
3488
3489 i915_gem_object_unpin_pages(obj);
3490 trace_i915_gem_object_bind(obj, map_and_fenceable);
3491 i915_gem_verify_gtt(dev);
3492 return 0;
3493 }
3494
3495 void
3496 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
3497 {
3498 /* If we don't have a page list set up, then we're not pinned
3499 * to GPU, and we can ignore the cache flush because it'll happen
3500 * again at bind time.
3501 */
3502 if (obj->pages == NULL)
3503 return;
3504
3505 /* If the GPU is snooping the contents of the CPU cache,
3506 * we do not need to manually clear the CPU cache lines. However,
3507 * the caches are only snooped when the render cache is
3508 * flushed/invalidated. As we always have to emit invalidations
3509 * and flushes when moving into and out of the RENDER domain, correct
3510 * snooping behaviour occurs naturally as the result of our domain
3511 * tracking.
3512 */
3513 if (obj->cache_level != I915_CACHE_NONE)
3514 return;
3515
3516 trace_i915_gem_object_clflush(obj);
3517
3518 #ifdef __NetBSD__
3519 drm_clflush_pglist(&obj->igo_pageq);
3520 #else
3521 drm_clflush_sg(obj->pages);
3522 #endif
3523 }
3524
3525 /** Flushes the GTT write domain for the object if it's dirty. */
3526 static void
3527 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3528 {
3529 uint32_t old_write_domain;
3530
3531 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3532 return;
3533
3534 /* No actual flushing is required for the GTT write domain. Writes
3535 * to it immediately go to main memory as far as we know, so there's
3536 * no chipset flush. It also doesn't land in render cache.
3537 *
3538 * However, we do have to enforce the order so that all writes through
3539 * the GTT land before any writes to the device, such as updates to
3540 * the GATT itself.
3541 */
3542 wmb();
3543
3544 old_write_domain = obj->base.write_domain;
3545 obj->base.write_domain = 0;
3546
3547 trace_i915_gem_object_change_domain(obj,
3548 obj->base.read_domains,
3549 old_write_domain);
3550 }
3551
3552 /** Flushes the CPU write domain for the object if it's dirty. */
3553 static void
3554 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3555 {
3556 uint32_t old_write_domain;
3557
3558 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3559 return;
3560
3561 i915_gem_clflush_object(obj);
3562 i915_gem_chipset_flush(obj->base.dev);
3563 old_write_domain = obj->base.write_domain;
3564 obj->base.write_domain = 0;
3565
3566 trace_i915_gem_object_change_domain(obj,
3567 obj->base.read_domains,
3568 old_write_domain);
3569 }
3570
3571 /**
3572 * Moves a single object to the GTT read, and possibly write domain.
3573 *
3574 * This function returns when the move is complete, including waiting on
3575 * flushes to occur.
3576 */
3577 int
3578 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3579 {
3580 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
3581 uint32_t old_write_domain, old_read_domains;
3582 int ret;
3583
3584 /* Not valid to be called on unbound objects. */
3585 if (obj->gtt_space == NULL)
3586 return -EINVAL;
3587
3588 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3589 return 0;
3590
3591 ret = i915_gem_object_wait_rendering(obj, !write);
3592 if (ret)
3593 return ret;
3594
3595 i915_gem_object_flush_cpu_write_domain(obj);
3596
3597 old_write_domain = obj->base.write_domain;
3598 old_read_domains = obj->base.read_domains;
3599
3600 /* It should now be out of any other write domains, and we can update
3601 * the domain values for our changes.
3602 */
3603 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3604 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3605 if (write) {
3606 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3607 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3608 obj->dirty = 1;
3609 }
3610
3611 trace_i915_gem_object_change_domain(obj,
3612 old_read_domains,
3613 old_write_domain);
3614
3615 /* And bump the LRU for this access */
3616 if (i915_gem_object_is_inactive(obj))
3617 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3618
3619 return 0;
3620 }
3621
3622 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3623 enum i915_cache_level cache_level)
3624 {
3625 struct drm_device *dev = obj->base.dev;
3626 drm_i915_private_t *dev_priv = dev->dev_private;
3627 int ret;
3628
3629 if (obj->cache_level == cache_level)
3630 return 0;
3631
3632 if (obj->pin_count) {
3633 DRM_DEBUG("can not change the cache level of pinned objects\n");
3634 return -EBUSY;
3635 }
3636
3637 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
3638 ret = i915_gem_object_unbind(obj);
3639 if (ret)
3640 return ret;
3641 }
3642
3643 if (obj->gtt_space) {
3644 ret = i915_gem_object_finish_gpu(obj);
3645 if (ret)
3646 return ret;
3647
3648 i915_gem_object_finish_gtt(obj);
3649
3650 /* Before SandyBridge, you could not use tiling or fence
3651 * registers with snooped memory, so relinquish any fences
3652 * currently pointing to our region in the aperture.
3653 */
3654 if (INTEL_INFO(dev)->gen < 6) {
3655 ret = i915_gem_object_put_fence(obj);
3656 if (ret)
3657 return ret;
3658 }
3659
3660 if (obj->has_global_gtt_mapping)
3661 i915_gem_gtt_bind_object(obj, cache_level);
3662 if (obj->has_aliasing_ppgtt_mapping)
3663 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
3664 obj, cache_level);
3665
3666 obj->gtt_space->color = cache_level;
3667 }
3668
3669 if (cache_level == I915_CACHE_NONE) {
3670 u32 old_read_domains, old_write_domain;
3671
3672 /* If we're coming from LLC cached, then we haven't
3673 * actually been tracking whether the data is in the
3674 * CPU cache or not, since we only allow one bit set
3675 * in obj->write_domain and have been skipping the clflushes.
3676 * Just set it to the CPU cache for now.
3677 */
3678 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
3679 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
3680
3681 old_read_domains = obj->base.read_domains;
3682 old_write_domain = obj->base.write_domain;
3683
3684 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3685 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3686
3687 trace_i915_gem_object_change_domain(obj,
3688 old_read_domains,
3689 old_write_domain);
3690 }
3691
3692 obj->cache_level = cache_level;
3693 i915_gem_verify_gtt(dev);
3694 return 0;
3695 }
3696
3697 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3698 struct drm_file *file)
3699 {
3700 struct drm_i915_gem_caching *args = data;
3701 struct drm_i915_gem_object *obj;
3702 int ret;
3703
3704 ret = i915_mutex_lock_interruptible(dev);
3705 if (ret)
3706 return ret;
3707
3708 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3709 if (&obj->base == NULL) {
3710 ret = -ENOENT;
3711 goto unlock;
3712 }
3713
3714 args->caching = obj->cache_level != I915_CACHE_NONE;
3715
3716 drm_gem_object_unreference(&obj->base);
3717 unlock:
3718 mutex_unlock(&dev->struct_mutex);
3719 return ret;
3720 }
3721
3722 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3723 struct drm_file *file)
3724 {
3725 struct drm_i915_gem_caching *args = data;
3726 struct drm_i915_gem_object *obj;
3727 enum i915_cache_level level;
3728 int ret;
3729
3730 switch (args->caching) {
3731 case I915_CACHING_NONE:
3732 level = I915_CACHE_NONE;
3733 break;
3734 case I915_CACHING_CACHED:
3735 level = I915_CACHE_LLC;
3736 break;
3737 default:
3738 return -EINVAL;
3739 }
3740
3741 ret = i915_mutex_lock_interruptible(dev);
3742 if (ret)
3743 return ret;
3744
3745 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3746 if (&obj->base == NULL) {
3747 ret = -ENOENT;
3748 goto unlock;
3749 }
3750
3751 ret = i915_gem_object_set_cache_level(obj, level);
3752
3753 drm_gem_object_unreference(&obj->base);
3754 unlock:
3755 mutex_unlock(&dev->struct_mutex);
3756 return ret;
3757 }
3758
3759 /*
3760 * Prepare buffer for display plane (scanout, cursors, etc).
3761 * Can be called from an uninterruptible phase (modesetting) and allows
3762 * any flushes to be pipelined (for pageflips).
3763 */
3764 int
3765 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3766 u32 alignment,
3767 struct intel_ring_buffer *pipelined)
3768 {
3769 u32 old_read_domains, old_write_domain;
3770 int ret;
3771
3772 if (pipelined != obj->ring) {
3773 ret = i915_gem_object_sync(obj, pipelined);
3774 if (ret)
3775 return ret;
3776 }
3777
3778 /* The display engine is not coherent with the LLC cache on gen6. As
3779 * a result, we make sure that the pinning that is about to occur is
3780 * done with uncached PTEs. This is lowest common denominator for all
3781 * chipsets.
3782 *
3783 * However for gen6+, we could do better by using the GFDT bit instead
3784 * of uncaching, which would allow us to flush all the LLC-cached data
3785 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3786 */
3787 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
3788 if (ret)
3789 return ret;
3790
3791 /* As the user may map the buffer once pinned in the display plane
3792 * (e.g. libkms for the bootup splash), we have to ensure that we
3793 * always use map_and_fenceable for all scanout buffers.
3794 */
3795 ret = i915_gem_object_pin(obj, alignment, true, false);
3796 if (ret)
3797 return ret;
3798
3799 i915_gem_object_flush_cpu_write_domain(obj);
3800
3801 old_write_domain = obj->base.write_domain;
3802 old_read_domains = obj->base.read_domains;
3803
3804 /* It should now be out of any other write domains, and we can update
3805 * the domain values for our changes.
3806 */
3807 obj->base.write_domain = 0;
3808 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3809
3810 trace_i915_gem_object_change_domain(obj,
3811 old_read_domains,
3812 old_write_domain);
3813
3814 return 0;
3815 }
3816
3817 int
3818 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
3819 {
3820 int ret;
3821
3822 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
3823 return 0;
3824
3825 ret = i915_gem_object_wait_rendering(obj, false);
3826 if (ret)
3827 return ret;
3828
3829 /* Ensure that we invalidate the GPU's caches and TLBs. */
3830 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
3831 return 0;
3832 }
3833
3834 /**
3835 * Moves a single object to the CPU read, and possibly write domain.
3836 *
3837 * This function returns when the move is complete, including waiting on
3838 * flushes to occur.
3839 */
3840 int
3841 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3842 {
3843 uint32_t old_write_domain, old_read_domains;
3844 int ret;
3845
3846 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3847 return 0;
3848
3849 ret = i915_gem_object_wait_rendering(obj, !write);
3850 if (ret)
3851 return ret;
3852
3853 i915_gem_object_flush_gtt_write_domain(obj);
3854
3855 old_write_domain = obj->base.write_domain;
3856 old_read_domains = obj->base.read_domains;
3857
3858 /* Flush the CPU cache if it's still invalid. */
3859 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3860 i915_gem_clflush_object(obj);
3861
3862 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3863 }
3864
3865 /* It should now be out of any other write domains, and we can update
3866 * the domain values for our changes.
3867 */
3868 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3869
3870 /* If we're writing through the CPU, then the GPU read domains will
3871 * need to be invalidated at next use.
3872 */
3873 if (write) {
3874 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3875 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3876 }
3877
3878 trace_i915_gem_object_change_domain(obj,
3879 old_read_domains,
3880 old_write_domain);
3881
3882 return 0;
3883 }
3884
3885 /* Throttle our rendering by waiting until the ring has completed our requests
3886 * emitted over 20 msec ago.
3887 *
3888 * Note that if we were to use the current jiffies each time around the loop,
3889 * we wouldn't escape the function with any frames outstanding if the time to
3890 * render a frame was over 20ms.
3891 *
3892 * This should get us reasonable parallelism between CPU and GPU but also
3893 * relatively low latency when blocking on a particular request to finish.
3894 */
3895 static int
3896 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3897 {
3898 struct drm_i915_private *dev_priv = dev->dev_private;
3899 struct drm_i915_file_private *file_priv = file->driver_priv;
3900 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3901 struct drm_i915_gem_request *request;
3902 struct intel_ring_buffer *ring = NULL;
3903 u32 seqno = 0;
3904 int ret;
3905
3906 if (atomic_read(&dev_priv->mm.wedged))
3907 return -EIO;
3908
3909 spin_lock(&file_priv->mm.lock);
3910 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3911 if (time_after_eq(request->emitted_jiffies, recent_enough))
3912 break;
3913
3914 ring = request->ring;
3915 seqno = request->seqno;
3916 }
3917 spin_unlock(&file_priv->mm.lock);
3918
3919 if (seqno == 0)
3920 return 0;
3921
3922 ret = __wait_seqno(ring, seqno, true, NULL);
3923 if (ret == 0)
3924 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3925
3926 return ret;
3927 }
3928
3929 int
3930 i915_gem_object_pin(struct drm_i915_gem_object *obj,
3931 uint32_t alignment,
3932 bool map_and_fenceable,
3933 bool nonblocking)
3934 {
3935 int ret;
3936
3937 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3938 return -EBUSY;
3939
3940 if (obj->gtt_space != NULL) {
3941 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3942 (map_and_fenceable && !obj->map_and_fenceable)) {
3943 WARN(obj->pin_count,
3944 "bo is already pinned with incorrect alignment:"
3945 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3946 " obj->map_and_fenceable=%d\n",
3947 obj->gtt_offset, alignment,
3948 map_and_fenceable,
3949 obj->map_and_fenceable);
3950 ret = i915_gem_object_unbind(obj);
3951 if (ret)
3952 return ret;
3953 }
3954 }
3955
3956 if (obj->gtt_space == NULL) {
3957 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3958
3959 ret = i915_gem_object_bind_to_gtt(obj, alignment,
3960 map_and_fenceable,
3961 nonblocking);
3962 if (ret)
3963 return ret;
3964
3965 if (!dev_priv->mm.aliasing_ppgtt)
3966 i915_gem_gtt_bind_object(obj, obj->cache_level);
3967 }
3968
3969 if (!obj->has_global_gtt_mapping && map_and_fenceable)
3970 i915_gem_gtt_bind_object(obj, obj->cache_level);
3971
3972 obj->pin_count++;
3973 obj->pin_mappable |= map_and_fenceable;
3974
3975 return 0;
3976 }
3977
3978 void
3979 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3980 {
3981 BUG_ON(obj->pin_count == 0);
3982 BUG_ON(obj->gtt_space == NULL);
3983
3984 if (--obj->pin_count == 0)
3985 obj->pin_mappable = false;
3986 }
3987
3988 int
3989 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3990 struct drm_file *file)
3991 {
3992 struct drm_i915_gem_pin *args = data;
3993 struct drm_i915_gem_object *obj;
3994 int ret;
3995
3996 ret = i915_mutex_lock_interruptible(dev);
3997 if (ret)
3998 return ret;
3999
4000 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4001 if (&obj->base == NULL) {
4002 ret = -ENOENT;
4003 goto unlock;
4004 }
4005
4006 if (obj->madv != I915_MADV_WILLNEED) {
4007 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4008 ret = -EINVAL;
4009 goto out;
4010 }
4011
4012 if (obj->pin_filp != NULL && obj->pin_filp != file) {
4013 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4014 args->handle);
4015 ret = -EINVAL;
4016 goto out;
4017 }
4018
4019 if (obj->user_pin_count == 0) {
4020 ret = i915_gem_object_pin(obj, args->alignment, true, false);
4021 if (ret)
4022 goto out;
4023 }
4024
4025 obj->user_pin_count++;
4026 obj->pin_filp = file;
4027
4028 /* XXX - flush the CPU caches for pinned objects
4029 * as the X server doesn't manage domains yet
4030 */
4031 i915_gem_object_flush_cpu_write_domain(obj);
4032 args->offset = obj->gtt_offset;
4033 out:
4034 drm_gem_object_unreference(&obj->base);
4035 unlock:
4036 mutex_unlock(&dev->struct_mutex);
4037 return ret;
4038 }
4039
4040 int
4041 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4042 struct drm_file *file)
4043 {
4044 struct drm_i915_gem_pin *args = data;
4045 struct drm_i915_gem_object *obj;
4046 int ret;
4047
4048 ret = i915_mutex_lock_interruptible(dev);
4049 if (ret)
4050 return ret;
4051
4052 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4053 if (&obj->base == NULL) {
4054 ret = -ENOENT;
4055 goto unlock;
4056 }
4057
4058 if (obj->pin_filp != file) {
4059 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4060 args->handle);
4061 ret = -EINVAL;
4062 goto out;
4063 }
4064 obj->user_pin_count--;
4065 if (obj->user_pin_count == 0) {
4066 obj->pin_filp = NULL;
4067 i915_gem_object_unpin(obj);
4068 }
4069
4070 out:
4071 drm_gem_object_unreference(&obj->base);
4072 unlock:
4073 mutex_unlock(&dev->struct_mutex);
4074 return ret;
4075 }
4076
4077 int
4078 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4079 struct drm_file *file)
4080 {
4081 struct drm_i915_gem_busy *args = data;
4082 struct drm_i915_gem_object *obj;
4083 int ret;
4084
4085 ret = i915_mutex_lock_interruptible(dev);
4086 if (ret)
4087 return ret;
4088
4089 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4090 if (&obj->base == NULL) {
4091 ret = -ENOENT;
4092 goto unlock;
4093 }
4094
4095 /* Count all active objects as busy, even if they are currently not used
4096 * by the gpu. Users of this interface expect objects to eventually
4097 * become non-busy without any further actions, therefore emit any
4098 * necessary flushes here.
4099 */
4100 ret = i915_gem_object_flush_active(obj);
4101
4102 args->busy = obj->active;
4103 if (obj->ring) {
4104 BUILD_BUG_ON(I915_NUM_RINGS > 16);
4105 args->busy |= intel_ring_flag(obj->ring) << 16;
4106 }
4107
4108 drm_gem_object_unreference(&obj->base);
4109 unlock:
4110 mutex_unlock(&dev->struct_mutex);
4111 return ret;
4112 }
4113
4114 int
4115 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4116 struct drm_file *file_priv)
4117 {
4118 return i915_gem_ring_throttle(dev, file_priv);
4119 }
4120
4121 int
4122 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4123 struct drm_file *file_priv)
4124 {
4125 struct drm_i915_gem_madvise *args = data;
4126 struct drm_i915_gem_object *obj;
4127 int ret;
4128
4129 switch (args->madv) {
4130 case I915_MADV_DONTNEED:
4131 case I915_MADV_WILLNEED:
4132 break;
4133 default:
4134 return -EINVAL;
4135 }
4136
4137 ret = i915_mutex_lock_interruptible(dev);
4138 if (ret)
4139 return ret;
4140
4141 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4142 if (&obj->base == NULL) {
4143 ret = -ENOENT;
4144 goto unlock;
4145 }
4146
4147 if (obj->pin_count) {
4148 ret = -EINVAL;
4149 goto out;
4150 }
4151
4152 if (obj->madv != __I915_MADV_PURGED)
4153 obj->madv = args->madv;
4154
4155 /* if the object is no longer attached, discard its backing storage */
4156 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
4157 i915_gem_object_truncate(obj);
4158
4159 args->retained = obj->madv != __I915_MADV_PURGED;
4160
4161 out:
4162 drm_gem_object_unreference(&obj->base);
4163 unlock:
4164 mutex_unlock(&dev->struct_mutex);
4165 return ret;
4166 }
4167
4168 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4169 const struct drm_i915_gem_object_ops *ops)
4170 {
4171 INIT_LIST_HEAD(&obj->mm_list);
4172 INIT_LIST_HEAD(&obj->gtt_list);
4173 INIT_LIST_HEAD(&obj->ring_list);
4174 INIT_LIST_HEAD(&obj->exec_list);
4175
4176 obj->ops = ops;
4177
4178 obj->fence_reg = I915_FENCE_REG_NONE;
4179 obj->madv = I915_MADV_WILLNEED;
4180 /* Avoid an unnecessary call to unbind on the first bind. */
4181 obj->map_and_fenceable = true;
4182
4183 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4184 }
4185
4186 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4187 .get_pages = i915_gem_object_get_pages_gtt,
4188 .put_pages = i915_gem_object_put_pages_gtt,
4189 };
4190
4191 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4192 size_t size)
4193 {
4194 struct drm_i915_gem_object *obj;
4195 #ifdef __NetBSD__
4196 uint64_t maxaddr;
4197 #else
4198 struct address_space *mapping;
4199 u32 mask;
4200 #endif
4201
4202 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4203 if (obj == NULL)
4204 return NULL;
4205
4206 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4207 kfree(obj);
4208 return NULL;
4209 }
4210
4211 #ifdef __NetBSD__
4212 /*
4213 * 965GM can't handle >32-bit paddrs; all other models can't
4214 * handle >40-bit paddrs.
4215 *
4216 * XXX I think this table is incomplete. It should be
4217 * synchronized with the other DMA address constraints
4218 * scattered throughout DRM.
4219 *
4220 * XXX DMA limits
4221 */
4222 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev))
4223 maxaddr = 0xffffffffULL;
4224 else
4225 maxaddr = 0xffffffffffULL;
4226 uao_set_pgfl(obj->base.gemo_shm_uao, x86_select_freelist(maxaddr));
4227 #else
4228 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4229 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4230 /* 965gm cannot relocate objects above 4GiB. */
4231 mask &= ~__GFP_HIGHMEM;
4232 mask |= __GFP_DMA32;
4233 }
4234
4235 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4236 mapping_set_gfp_mask(mapping, mask);
4237 #endif
4238
4239 i915_gem_object_init(obj, &i915_gem_object_ops);
4240
4241 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4242 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4243
4244 if (HAS_LLC(dev)) {
4245 /* On some devices, we can have the GPU use the LLC (the CPU
4246 * cache) for about a 10% performance improvement
4247 * compared to uncached. Graphics requests other than
4248 * display scanout are coherent with the CPU in
4249 * accessing this cache. This means in this mode we
4250 * don't need to clflush on the CPU side, and on the
4251 * GPU side we only need to flush internal caches to
4252 * get data visible to the CPU.
4253 *
4254 * However, we maintain the display planes as UC, and so
4255 * need to rebind when first used as such.
4256 */
4257 obj->cache_level = I915_CACHE_LLC;
4258 } else
4259 obj->cache_level = I915_CACHE_NONE;
4260
4261 return obj;
4262 }
4263
4264 int i915_gem_init_object(struct drm_gem_object *obj)
4265 {
4266 BUG();
4267
4268 return 0;
4269 }
4270
4271 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4272 {
4273 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4274 struct drm_device *dev = obj->base.dev;
4275 drm_i915_private_t *dev_priv = dev->dev_private;
4276
4277 trace_i915_gem_object_destroy(obj);
4278
4279 if (obj->phys_obj)
4280 i915_gem_detach_phys_object(dev, obj);
4281
4282 obj->pin_count = 0;
4283 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
4284 bool was_interruptible;
4285
4286 was_interruptible = dev_priv->mm.interruptible;
4287 dev_priv->mm.interruptible = false;
4288
4289 WARN_ON(i915_gem_object_unbind(obj));
4290
4291 dev_priv->mm.interruptible = was_interruptible;
4292 }
4293
4294 obj->pages_pin_count = 0;
4295 i915_gem_object_put_pages(obj);
4296 i915_gem_object_free_mmap_offset(obj);
4297
4298 BUG_ON(obj->pages);
4299
4300 #ifndef __NetBSD__ /* XXX drm prime */
4301 if (obj->base.import_attach)
4302 drm_prime_gem_destroy(&obj->base, NULL);
4303 #endif
4304
4305 drm_gem_object_release(&obj->base);
4306 i915_gem_info_remove_obj(dev_priv, obj->base.size);
4307
4308 kfree(obj->bit_17);
4309 kfree(obj);
4310 }
4311
4312 int
4313 i915_gem_idle(struct drm_device *dev)
4314 {
4315 drm_i915_private_t *dev_priv = dev->dev_private;
4316 int ret;
4317
4318 mutex_lock(&dev->struct_mutex);
4319
4320 if (dev_priv->mm.suspended) {
4321 mutex_unlock(&dev->struct_mutex);
4322 return 0;
4323 }
4324
4325 ret = i915_gpu_idle(dev);
4326 if (ret) {
4327 mutex_unlock(&dev->struct_mutex);
4328 return ret;
4329 }
4330 i915_gem_retire_requests(dev);
4331
4332 /* Under UMS, be paranoid and evict. */
4333 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4334 i915_gem_evict_everything(dev);
4335
4336 i915_gem_reset_fences(dev);
4337
4338 /* Hack! Don't let anybody do execbuf while we don't control the chip.
4339 * We need to replace this with a semaphore, or something.
4340 * And not confound mm.suspended!
4341 */
4342 dev_priv->mm.suspended = 1;
4343 del_timer_sync(&dev_priv->hangcheck_timer);
4344
4345 i915_kernel_lost_context(dev);
4346 i915_gem_cleanup_ringbuffer(dev);
4347
4348 mutex_unlock(&dev->struct_mutex);
4349
4350 /* Cancel the retire work handler, which should be idle now. */
4351 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4352
4353 return 0;
4354 }
4355
4356 void i915_gem_l3_remap(struct drm_device *dev)
4357 {
4358 drm_i915_private_t *dev_priv = dev->dev_private;
4359 u32 misccpctl;
4360 int i;
4361
4362 if (!IS_IVYBRIDGE(dev))
4363 return;
4364
4365 if (!dev_priv->l3_parity.remap_info)
4366 return;
4367
4368 misccpctl = I915_READ(GEN7_MISCCPCTL);
4369 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
4370 POSTING_READ(GEN7_MISCCPCTL);
4371
4372 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
4373 u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
4374 if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
4375 DRM_DEBUG("0x%x was already programmed to %x\n",
4376 GEN7_L3LOG_BASE + i, remap);
4377 if (remap && !dev_priv->l3_parity.remap_info[i/4])
4378 DRM_DEBUG_DRIVER("Clearing remapped register\n");
4379 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
4380 }
4381
4382 /* Make sure all the writes land before disabling dop clock gating */
4383 POSTING_READ(GEN7_L3LOG_BASE);
4384
4385 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
4386 }
4387
4388 void i915_gem_init_swizzling(struct drm_device *dev)
4389 {
4390 drm_i915_private_t *dev_priv = dev->dev_private;
4391
4392 if (INTEL_INFO(dev)->gen < 5 ||
4393 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4394 return;
4395
4396 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4397 DISP_TILE_SURFACE_SWIZZLING);
4398
4399 if (IS_GEN5(dev))
4400 return;
4401
4402 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4403 if (IS_GEN6(dev))
4404 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4405 else
4406 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4407 }
4408
4409 static bool
4410 intel_enable_blt(struct drm_device *dev)
4411 {
4412 if (!HAS_BLT(dev))
4413 return false;
4414
4415 /* The blitter was dysfunctional on early prototypes */
4416 if (IS_GEN6(dev) && dev->pdev->revision < 8) {
4417 DRM_INFO("BLT not supported on this pre-production hardware;"
4418 " graphics performance will be degraded.\n");
4419 return false;
4420 }
4421
4422 return true;
4423 }
4424
4425 int
4426 i915_gem_init_hw(struct drm_device *dev)
4427 {
4428 drm_i915_private_t *dev_priv = dev->dev_private;
4429 int ret;
4430
4431 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
4432 return -EIO;
4433
4434 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
4435 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
4436
4437 i915_gem_l3_remap(dev);
4438
4439 i915_gem_init_swizzling(dev);
4440
4441 ret = intel_init_render_ring_buffer(dev);
4442 if (ret)
4443 return ret;
4444
4445 if (HAS_BSD(dev)) {
4446 ret = intel_init_bsd_ring_buffer(dev);
4447 if (ret)
4448 goto cleanup_render_ring;
4449 }
4450
4451 if (intel_enable_blt(dev)) {
4452 ret = intel_init_blt_ring_buffer(dev);
4453 if (ret)
4454 goto cleanup_bsd_ring;
4455 }
4456
4457 dev_priv->next_seqno = 1;
4458
4459 /*
4460 * XXX: There was some w/a described somewhere suggesting loading
4461 * contexts before PPGTT.
4462 */
4463 i915_gem_context_init(dev);
4464 i915_gem_init_ppgtt(dev);
4465
4466 return 0;
4467
4468 cleanup_bsd_ring:
4469 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
4470 cleanup_render_ring:
4471 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
4472 return ret;
4473 }
4474
4475 static bool
4476 intel_enable_ppgtt(struct drm_device *dev)
4477 {
4478 #ifdef __NetBSD__ /* XXX ppgtt */
4479 return false;
4480 #else
4481 if (i915_enable_ppgtt >= 0)
4482 return i915_enable_ppgtt;
4483
4484 #ifdef CONFIG_INTEL_IOMMU
4485 /* Disable ppgtt on SNB if VT-d is on. */
4486 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
4487 return false;
4488 #endif
4489
4490 return true;
4491 #endif
4492 }
4493
4494 int i915_gem_init(struct drm_device *dev)
4495 {
4496 struct drm_i915_private *dev_priv = dev->dev_private;
4497 unsigned long gtt_size, mappable_size;
4498 int ret;
4499
4500 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
4501 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
4502
4503 mutex_lock(&dev->struct_mutex);
4504 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
4505 /* PPGTT pdes are stolen from global gtt ptes, so shrink the
4506 * aperture accordingly when using aliasing ppgtt. */
4507 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
4508
4509 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
4510
4511 ret = i915_gem_init_aliasing_ppgtt(dev);
4512 if (ret) {
4513 i915_gem_fini_global_gtt(dev);
4514 mutex_unlock(&dev->struct_mutex);
4515 return ret;
4516 }
4517 } else {
4518 /* Let GEM Manage all of the aperture.
4519 *
4520 * However, leave one page at the end still bound to the scratch
4521 * page. There are a number of places where the hardware
4522 * apparently prefetches past the end of the object, and we've
4523 * seen multiple hangs with the GPU head pointer stuck in a
4524 * batchbuffer bound at the last page of the aperture. One page
4525 * should be enough to keep any prefetching inside of the
4526 * aperture.
4527 */
4528 i915_gem_init_global_gtt(dev, 0, mappable_size,
4529 gtt_size);
4530 }
4531
4532 ret = i915_gem_init_hw(dev);
4533 #ifdef __NetBSD__ /* XXX fini global gtt */
4534 if (ret)
4535 i915_gem_fini_global_gtt(dev);
4536 #endif
4537 mutex_unlock(&dev->struct_mutex);
4538 if (ret) {
4539 i915_gem_cleanup_aliasing_ppgtt(dev);
4540 return ret;
4541 }
4542
4543 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
4544 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4545 dev_priv->dri1.allow_batchbuffer = 1;
4546 return 0;
4547 }
4548
4549 void
4550 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4551 {
4552 drm_i915_private_t *dev_priv = dev->dev_private;
4553 struct intel_ring_buffer *ring;
4554 int i;
4555
4556 for_each_ring(ring, dev_priv, i)
4557 intel_cleanup_ring_buffer(ring);
4558 }
4559
4560 int
4561 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4562 struct drm_file *file_priv)
4563 {
4564 drm_i915_private_t *dev_priv = dev->dev_private;
4565 int ret;
4566
4567 if (drm_core_check_feature(dev, DRIVER_MODESET))
4568 return 0;
4569
4570 if (atomic_read(&dev_priv->mm.wedged)) {
4571 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4572 atomic_set(&dev_priv->mm.wedged, 0);
4573 }
4574
4575 mutex_lock(&dev->struct_mutex);
4576 dev_priv->mm.suspended = 0;
4577
4578 ret = i915_gem_init_hw(dev);
4579 if (ret != 0) {
4580 mutex_unlock(&dev->struct_mutex);
4581 return ret;
4582 }
4583
4584 BUG_ON(!list_empty(&dev_priv->mm.active_list));
4585 mutex_unlock(&dev->struct_mutex);
4586
4587 ret = drm_irq_install(dev);
4588 if (ret)
4589 goto cleanup_ringbuffer;
4590
4591 return 0;
4592
4593 cleanup_ringbuffer:
4594 mutex_lock(&dev->struct_mutex);
4595 i915_gem_cleanup_ringbuffer(dev);
4596 dev_priv->mm.suspended = 1;
4597 mutex_unlock(&dev->struct_mutex);
4598
4599 return ret;
4600 }
4601
4602 int
4603 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4604 struct drm_file *file_priv)
4605 {
4606 if (drm_core_check_feature(dev, DRIVER_MODESET))
4607 return 0;
4608
4609 drm_irq_uninstall(dev);
4610 return i915_gem_idle(dev);
4611 }
4612
4613 void
4614 i915_gem_lastclose(struct drm_device *dev)
4615 {
4616 int ret;
4617
4618 if (drm_core_check_feature(dev, DRIVER_MODESET))
4619 return;
4620
4621 ret = i915_gem_idle(dev);
4622 if (ret)
4623 DRM_ERROR("failed to idle hardware: %d\n", ret);
4624 }
4625
4626 static void
4627 init_ring_lists(struct intel_ring_buffer *ring)
4628 {
4629 INIT_LIST_HEAD(&ring->active_list);
4630 INIT_LIST_HEAD(&ring->request_list);
4631 }
4632
4633 void
4634 i915_gem_load(struct drm_device *dev)
4635 {
4636 int i;
4637 drm_i915_private_t *dev_priv = dev->dev_private;
4638
4639 INIT_LIST_HEAD(&dev_priv->mm.active_list);
4640 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4641 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4642 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4643 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4644 for (i = 0; i < I915_NUM_RINGS; i++)
4645 init_ring_lists(&dev_priv->ring[i]);
4646 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
4647 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4648 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4649 i915_gem_retire_work_handler);
4650 init_completion(&dev_priv->error_completion);
4651
4652 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4653 if (IS_GEN3(dev)) {
4654 I915_WRITE(MI_ARB_STATE,
4655 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
4656 }
4657
4658 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4659
4660 /* Old X drivers will take 0-2 for front, back, depth buffers */
4661 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4662 dev_priv->fence_reg_start = 3;
4663
4664 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4665 dev_priv->num_fence_regs = 16;
4666 else
4667 dev_priv->num_fence_regs = 8;
4668
4669 /* Initialize fence registers to zero */
4670 i915_gem_reset_fences(dev);
4671
4672 i915_gem_detect_bit_6_swizzle(dev);
4673 #ifdef __NetBSD__
4674 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip");
4675 spin_lock_init(&dev_priv->pending_flip_lock);
4676 #else
4677 init_waitqueue_head(&dev_priv->pending_flip_queue);
4678 #endif
4679
4680 dev_priv->mm.interruptible = true;
4681
4682 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
4683 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
4684 register_shrinker(&dev_priv->mm.inactive_shrinker);
4685 }
4686
4687 /*
4688 * Create a physically contiguous memory object for this object
4689 * e.g. for cursor + overlay regs
4690 */
4691 static int i915_gem_init_phys_object(struct drm_device *dev,
4692 int id, int size, int align)
4693 {
4694 drm_i915_private_t *dev_priv = dev->dev_private;
4695 struct drm_i915_gem_phys_object *phys_obj;
4696 int ret;
4697
4698 if (dev_priv->mm.phys_objs[id - 1] || !size)
4699 return 0;
4700
4701 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4702 if (!phys_obj)
4703 return -ENOMEM;
4704
4705 phys_obj->id = id;
4706
4707 phys_obj->handle = drm_pci_alloc(dev, size, align);
4708 if (!phys_obj->handle) {
4709 ret = -ENOMEM;
4710 goto kfree_obj;
4711 }
4712 #ifndef __NetBSD__ /* XXX x86 wc? */
4713 #ifdef CONFIG_X86
4714 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4715 #endif
4716 #endif
4717
4718 dev_priv->mm.phys_objs[id - 1] = phys_obj;
4719
4720 return 0;
4721 kfree_obj:
4722 kfree(phys_obj);
4723 return ret;
4724 }
4725
4726 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4727 {
4728 drm_i915_private_t *dev_priv = dev->dev_private;
4729 struct drm_i915_gem_phys_object *phys_obj;
4730
4731 if (!dev_priv->mm.phys_objs[id - 1])
4732 return;
4733
4734 phys_obj = dev_priv->mm.phys_objs[id - 1];
4735 if (phys_obj->cur_obj) {
4736 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4737 }
4738
4739 #ifndef __NetBSD__ /* XXX x86 wb? */
4740 #ifdef CONFIG_X86
4741 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4742 #endif
4743 #endif
4744 drm_pci_free(dev, phys_obj->handle);
4745 kfree(phys_obj);
4746 dev_priv->mm.phys_objs[id - 1] = NULL;
4747 }
4748
4749 void i915_gem_free_all_phys_object(struct drm_device *dev)
4750 {
4751 int i;
4752
4753 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4754 i915_gem_free_phys_object(dev, i);
4755 }
4756
4757 void i915_gem_detach_phys_object(struct drm_device *dev,
4758 struct drm_i915_gem_object *obj)
4759 {
4760 #ifndef __NetBSD__
4761 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4762 #endif
4763 char *vaddr;
4764 int i;
4765 int page_count;
4766
4767 if (!obj->phys_obj)
4768 return;
4769 vaddr = obj->phys_obj->handle->vaddr;
4770
4771 page_count = obj->base.size / PAGE_SIZE;
4772 for (i = 0; i < page_count; i++) {
4773 #ifdef __NetBSD__
4774 /* XXX Just use ubc_uiomove? */
4775 struct pglist pages;
4776 int error;
4777
4778 TAILQ_INIT(&pages);
4779 error = uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
4780 (i+1)*PAGE_SIZE, &pages);
4781 if (error) {
4782 printf("unable to map page %d of i915 gem obj: %d\n",
4783 i, error);
4784 continue;
4785 }
4786
4787 KASSERT(!TAILQ_EMPTY(&pages));
4788 struct vm_page *const page = TAILQ_FIRST(&pages);
4789 TAILQ_REMOVE(&pages, page, pageq.queue);
4790 KASSERT(TAILQ_EMPTY(&pages));
4791
4792 char *const dst = kmap_atomic(container_of(page, struct page,
4793 p_vmp));
4794 (void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE);
4795 kunmap_atomic(dst);
4796
4797 drm_clflush_page(container_of(page, struct page, p_vmp));
4798 page->flags &= ~PG_CLEAN;
4799 /* XXX mark page accessed */
4800 uvm_obj_unwirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
4801 (i+1)*PAGE_SIZE);
4802 #else
4803 struct page *page = shmem_read_mapping_page(mapping, i);
4804 if (!IS_ERR(page)) {
4805 char *dst = kmap_atomic(page);
4806 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
4807 kunmap_atomic(dst);
4808
4809 drm_clflush_pages(&page, 1);
4810
4811 set_page_dirty(page);
4812 mark_page_accessed(page);
4813 page_cache_release(page);
4814 }
4815 #endif
4816 }
4817 i915_gem_chipset_flush(dev);
4818
4819 obj->phys_obj->cur_obj = NULL;
4820 obj->phys_obj = NULL;
4821 }
4822
4823 int
4824 i915_gem_attach_phys_object(struct drm_device *dev,
4825 struct drm_i915_gem_object *obj,
4826 int id,
4827 int align)
4828 {
4829 #ifndef __NetBSD__
4830 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4831 #endif
4832 drm_i915_private_t *dev_priv = dev->dev_private;
4833 int ret = 0;
4834 int page_count;
4835 int i;
4836
4837 if (id > I915_MAX_PHYS_OBJECT)
4838 return -EINVAL;
4839
4840 if (obj->phys_obj) {
4841 if (obj->phys_obj->id == id)
4842 return 0;
4843 i915_gem_detach_phys_object(dev, obj);
4844 }
4845
4846 /* create a new object */
4847 if (!dev_priv->mm.phys_objs[id - 1]) {
4848 ret = i915_gem_init_phys_object(dev, id,
4849 obj->base.size, align);
4850 if (ret) {
4851 DRM_ERROR("failed to init phys object %d size: %zu\n",
4852 id, obj->base.size);
4853 return ret;
4854 }
4855 }
4856
4857 /* bind to the object */
4858 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4859 obj->phys_obj->cur_obj = obj;
4860
4861 page_count = obj->base.size / PAGE_SIZE;
4862
4863 for (i = 0; i < page_count; i++) {
4864 #ifdef __NetBSD__
4865 char *const vaddr = obj->phys_obj->handle->vaddr;
4866 struct pglist pages;
4867 int error;
4868
4869 TAILQ_INIT(&pages);
4870 error = uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
4871 (i+1)*PAGE_SIZE, &pages);
4872 if (error)
4873 /* XXX errno NetBSD->Linux */
4874 return -error;
4875
4876 KASSERT(!TAILQ_EMPTY(&pages));
4877 struct vm_page *const page = TAILQ_FIRST(&pages);
4878 TAILQ_REMOVE(&pages, page, pageq.queue);
4879 KASSERT(TAILQ_EMPTY(&pages));
4880
4881 char *const src = kmap_atomic(container_of(page, struct page,
4882 p_vmp));
4883 (void)memcpy(vaddr + (i*PAGE_SIZE), src, PAGE_SIZE);
4884 kunmap_atomic(src);
4885
4886 /* XXX mark page accessed */
4887 uvm_obj_unwirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
4888 (i+1)*PAGE_SIZE);
4889 #else
4890 struct page *page;
4891 char *dst, *src;
4892
4893 page = shmem_read_mapping_page(mapping, i);
4894 if (IS_ERR(page))
4895 return PTR_ERR(page);
4896
4897 src = kmap_atomic(page);
4898 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4899 memcpy(dst, src, PAGE_SIZE);
4900 kunmap_atomic(src);
4901
4902 mark_page_accessed(page);
4903 page_cache_release(page);
4904 #endif
4905 }
4906
4907 return 0;
4908 }
4909
4910 static int
4911 i915_gem_phys_pwrite(struct drm_device *dev,
4912 struct drm_i915_gem_object *obj,
4913 struct drm_i915_gem_pwrite *args,
4914 struct drm_file *file_priv)
4915 {
4916 void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset;
4917 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
4918
4919 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
4920 unsigned long unwritten;
4921
4922 /* The physical object once assigned is fixed for the lifetime
4923 * of the obj, so we can safely drop the lock and continue
4924 * to access vaddr.
4925 */
4926 mutex_unlock(&dev->struct_mutex);
4927 unwritten = copy_from_user(vaddr, user_data, args->size);
4928 mutex_lock(&dev->struct_mutex);
4929 if (unwritten)
4930 return -EFAULT;
4931 }
4932
4933 i915_gem_chipset_flush(dev);
4934 return 0;
4935 }
4936
4937 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4938 {
4939 struct drm_i915_file_private *file_priv = file->driver_priv;
4940
4941 /* Clean up our request list when the client is going away, so that
4942 * later retire_requests won't dereference our soon-to-be-gone
4943 * file_priv.
4944 */
4945 spin_lock(&file_priv->mm.lock);
4946 while (!list_empty(&file_priv->mm.request_list)) {
4947 struct drm_i915_gem_request *request;
4948
4949 request = list_first_entry(&file_priv->mm.request_list,
4950 struct drm_i915_gem_request,
4951 client_list);
4952 list_del(&request->client_list);
4953 request->file_priv = NULL;
4954 }
4955 spin_unlock(&file_priv->mm.lock);
4956 }
4957
4958 #ifndef __NetBSD__ /* XXX */
4959 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
4960 {
4961 if (!mutex_is_locked(mutex))
4962 return false;
4963
4964 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
4965 return mutex->owner == task;
4966 #else
4967 /* Since UP may be pre-empted, we cannot assume that we own the lock */
4968 return false;
4969 #endif
4970 }
4971 #endif
4972
4973 static int
4974 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
4975 {
4976 #ifdef __NetBSD__ /* XXX shrinkers */
4977 return 0;
4978 #else
4979 struct drm_i915_private *dev_priv =
4980 container_of(shrinker,
4981 struct drm_i915_private,
4982 mm.inactive_shrinker);
4983 struct drm_device *dev = dev_priv->dev;
4984 struct drm_i915_gem_object *obj;
4985 int nr_to_scan = sc->nr_to_scan;
4986 bool unlock = true;
4987 int cnt;
4988
4989 if (!mutex_trylock(&dev->struct_mutex)) {
4990 if (!mutex_is_locked_by(&dev->struct_mutex, current))
4991 return 0;
4992
4993 if (dev_priv->mm.shrinker_no_lock_stealing)
4994 return 0;
4995
4996 unlock = false;
4997 }
4998
4999 if (nr_to_scan) {
5000 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
5001 if (nr_to_scan > 0)
5002 nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan,
5003 false);
5004 if (nr_to_scan > 0)
5005 i915_gem_shrink_all(dev_priv);
5006 }
5007
5008 cnt = 0;
5009 list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
5010 if (obj->pages_pin_count == 0)
5011 cnt += obj->base.size >> PAGE_SHIFT;
5012 list_for_each_entry(obj, &dev_priv->mm.inactive_list, gtt_list)
5013 if (obj->pin_count == 0 && obj->pages_pin_count == 0)
5014 cnt += obj->base.size >> PAGE_SHIFT;
5015
5016 if (unlock)
5017 mutex_unlock(&dev->struct_mutex);
5018 return cnt;
5019 #endif
5020 }
5021