1/*
2 * Copyright (c) 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "sna.h"
33#include "sna_reg.h"
34
35#include <unistd.h>
36#include <sys/ioctl.h>
37#include <sys/mman.h>
38#include <sys/stat.h>
39#include <time.h>
40#include <sched.h>
41#include <errno.h>
42#include <fcntl.h>
43
44#include <xf86drm.h>
45
46#ifdef HAVE_VALGRIND
47#include <valgrind.h>
48#include <memcheck.h>
49#endif
50
51#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
52#include <sys/sysinfo.h>
53#endif
54
55#include "sna_cpuid.h"
56
57static struct kgem_bo *
58search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
59
60static struct kgem_bo *
61search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
62
63#define DBG_NO_HW 0
64#define DBG_NO_EXEC 0
65#define DBG_NO_TILING 0
66#define DBG_NO_CACHE 0
67#define DBG_NO_SNOOP_CACHE 0
68#define DBG_NO_CACHE_LEVEL 0
69#define DBG_NO_CPU 0
70#define DBG_NO_CREATE2 0
71#define DBG_NO_USERPTR 0
72#define DBG_NO_UNSYNCHRONIZED_USERPTR 0
73#define DBG_NO_LLC 0
74#define DBG_NO_SEMAPHORES 0
75#define DBG_NO_MADV 0
76#define DBG_NO_UPLOAD_CACHE 0
77#define DBG_NO_UPLOAD_ACTIVE 0
78#define DBG_NO_MAP_UPLOAD 0
79#define DBG_NO_RELAXED_FENCING 0
80#define DBG_NO_SECURE_BATCHES 0
81#define DBG_NO_PINNED_BATCHES 0
82#define DBG_NO_SHRINK_BATCHES 0
83#define DBG_NO_FAST_RELOC 0
84#define DBG_NO_HANDLE_LUT 0
85#define DBG_NO_WT 0
86#define DBG_NO_WC_MMAP 0
87#define DBG_DUMP 0
88#define DBG_NO_MALLOC_CACHE 0
89
90#define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */
91
92#ifndef DEBUG_SYNC
93#define DEBUG_SYNC 0
94#endif
95
96#define SHOW_BATCH_BEFORE 0
97#define SHOW_BATCH_AFTER 0
98
99#if !USE_WC_MMAP
100#undef DBG_NO_WC_MMAP
101#define DBG_NO_WC_MMAP 1
102#endif
103
104#if 0
105#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
106#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
107#else
108#define ASSERT_IDLE(kgem__, handle__)
109#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
110#endif
111
112/* Worst case seems to be 965gm where we cannot write within a cacheline that
113 * is being simultaneously being read by the GPU, or within the sampler
114 * prefetch. In general, the chipsets seem to have a requirement that sampler
115 * offsets be aligned to a cacheline (64 bytes).
116 *
117 * Actually, it turns out the BLT color pattern (BR15) has the most severe
118 * alignment restrictions, 64 bytes for 8-bpp, 128 bytes for 16-bpp and 256
119 * bytes for 32-bpp.
120 */
121#define UPLOAD_ALIGNMENT 256
122
123#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
124#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE)
125
126#define MAX_GTT_VMA_CACHE 512
127#define MAX_CPU_VMA_CACHE INT16_MAX
128#define MAP_PRESERVE_TIME 10
129
130#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
131#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 1)
132
133#define LOCAL_I915_PARAM_HAS_BLT		11
134#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING	12
135#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA	15
136#define LOCAL_I915_PARAM_HAS_LLC		17
137#define LOCAL_I915_PARAM_HAS_SEMAPHORES		20
138#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES	23
139#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES	24
140#define LOCAL_I915_PARAM_HAS_NO_RELOC		25
141#define LOCAL_I915_PARAM_HAS_HANDLE_LUT		26
142#define LOCAL_I915_PARAM_HAS_WT			27
143#define LOCAL_I915_PARAM_MMAP_VERSION		30
144
145#define LOCAL_I915_EXEC_IS_PINNED		(1<<10)
146#define LOCAL_I915_EXEC_NO_RELOC		(1<<11)
147#define LOCAL_I915_EXEC_HANDLE_LUT		(1<<12)
148
149#define LOCAL_I915_GEM_CREATE2       0x34
150#define LOCAL_IOCTL_I915_GEM_CREATE2 DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CREATE2, struct local_i915_gem_create2)
151struct local_i915_gem_create2 {
152	uint64_t size;
153	uint32_t placement;
154#define LOCAL_I915_CREATE_PLACEMENT_SYSTEM 0
155#define LOCAL_I915_CREATE_PLACEMENT_STOLEN 1 /* Cannot use CPU mmaps or pread/pwrite */
156	uint32_t domain;
157	uint32_t caching;
158	uint32_t tiling_mode;
159	uint32_t stride;
160	uint32_t flags;
161	uint32_t pad;
162	uint32_t handle;
163};
164
165#define LOCAL_I915_GEM_USERPTR       0x33
166#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
167struct local_i915_gem_userptr {
168	uint64_t user_ptr;
169	uint64_t user_size;
170	uint32_t flags;
171#define I915_USERPTR_READ_ONLY		0x1
172#define I915_USERPTR_UNSYNCHRONIZED	0x80000000
173	uint32_t handle;
174};
175
176#define UNCACHED	0
177#define SNOOPED		1
178#define DISPLAY		2
179
180struct local_i915_gem_caching {
181	uint32_t handle;
182	uint32_t caching;
183};
184
185#define LOCAL_I915_GEM_SET_CACHING	0x2f
186#define LOCAL_I915_GEM_GET_CACHING	0x30
187#define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching)
188#define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching)
189
190struct local_i915_gem_mmap2 {
191	uint32_t handle;
192	uint32_t pad;
193	uint64_t offset;
194	uint64_t size;
195	uint64_t addr_ptr;
196	uint64_t flags;
197#define I915_MMAP_WC 0x1
198};
199#define LOCAL_IOCTL_I915_GEM_MMAP_v2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap2)
200
201struct kgem_buffer {
202	struct kgem_bo base;
203	void *mem;
204	uint32_t used;
205	uint32_t need_io : 1;
206	uint32_t write : 2;
207	uint32_t mmapped : 2;
208};
209enum {
210	MMAPPED_NONE,
211	MMAPPED_GTT,
212	MMAPPED_CPU
213};
214
215static struct kgem_bo *__kgem_freed_bo;
216static struct kgem_request *__kgem_freed_request;
217static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
218
219static inline int bytes(struct kgem_bo *bo)
220{
221	return __kgem_bo_size(bo);
222}
223
224#define bucket(B) (B)->size.pages.bucket
225#define num_pages(B) (B)->size.pages.count
226
227static int do_ioctl(int fd, unsigned long req, void *arg)
228{
229	int err;
230
231restart:
232	if (ioctl(fd, req, arg) == 0)
233		return 0;
234
235	err = errno;
236
237	if (err == EINTR)
238		goto restart;
239
240	if (err == EAGAIN) {
241		sched_yield();
242		goto restart;
243	}
244
245	return -err;
246}
247
248#ifdef DEBUG_MEMORY
249static void debug_alloc(struct kgem *kgem, size_t size)
250{
251	kgem->debug_memory.bo_allocs++;
252	kgem->debug_memory.bo_bytes += size;
253}
254static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo)
255{
256	debug_alloc(kgem, bytes(bo));
257}
258#else
259#define debug_alloc__bo(k, b)
260#endif
261
262#ifndef NDEBUG
263static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo)
264{
265	struct drm_i915_gem_get_tiling tiling;
266
267	assert(bo);
268
269	VG_CLEAR(tiling);
270	tiling.handle = bo->handle;
271	tiling.tiling_mode = bo->tiling;
272	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling);
273	assert(tiling.tiling_mode == bo->tiling);
274}
275
276static void assert_cacheing(struct kgem *kgem, struct kgem_bo *bo)
277{
278	struct local_i915_gem_caching arg;
279	int expect = kgem->has_llc ? SNOOPED : UNCACHED;
280
281	VG_CLEAR(arg);
282	arg.handle = bo->handle;
283	arg.caching = expect;
284
285	(void)do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &arg);
286
287	assert(arg.caching == expect);
288}
289
290static void assert_bo_retired(struct kgem_bo *bo)
291{
292	DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
293	     bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
294	assert(bo->refcnt);
295	assert(bo->rq == NULL);
296	assert(bo->exec == NULL);
297	assert(list_is_empty(&bo->request));
298}
299#else
300#define assert_tiling(kgem, bo)
301#define assert_cacheing(kgem, bo)
302#define assert_bo_retired(bo)
303#endif
304
305static void
306__kgem_set_wedged(struct kgem *kgem)
307{
308	kgem->wedged = true;
309	sna_render_mark_wedged(container_of(kgem, struct sna, kgem));
310}
311
312static void kgem_sna_reset(struct kgem *kgem)
313{
314	struct sna *sna = container_of(kgem, struct sna, kgem);
315
316	sna->render.reset(sna);
317	sna->blt_state.fill_bo = 0;
318}
319
320static void kgem_sna_flush(struct kgem *kgem)
321{
322	struct sna *sna = container_of(kgem, struct sna, kgem);
323
324	sna->render.flush(sna);
325
326	if (sna->render.solid_cache.dirty)
327		sna_render_flush_solid(sna);
328}
329
330static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
331{
332	struct drm_i915_gem_set_tiling set_tiling;
333	int err;
334
335	if (DBG_NO_TILING)
336		return false;
337
338	VG_CLEAR(set_tiling);
339restart:
340	set_tiling.handle = handle;
341	set_tiling.tiling_mode = tiling;
342	set_tiling.stride = stride;
343
344	if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0)
345		return true;
346
347	err = errno;
348	if (err == EINTR)
349		goto restart;
350
351	if (err == EAGAIN) {
352		sched_yield();
353		goto restart;
354	}
355
356	return false;
357}
358
359static bool gem_set_caching(int fd, uint32_t handle, int caching)
360{
361	struct local_i915_gem_caching arg;
362
363	VG_CLEAR(arg);
364	arg.handle = handle;
365	arg.caching = caching;
366	return do_ioctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0;
367}
368
369static uint32_t gem_userptr(int fd, void *ptr, int size, int read_only)
370{
371	struct local_i915_gem_userptr arg;
372
373	VG_CLEAR(arg);
374	arg.user_ptr = (uintptr_t)ptr;
375	arg.user_size = size;
376	arg.flags = I915_USERPTR_UNSYNCHRONIZED;
377	if (read_only)
378		arg.flags |= I915_USERPTR_READ_ONLY;
379
380	if (DBG_NO_UNSYNCHRONIZED_USERPTR ||
381	    do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
382		arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED;
383		if (do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
384			DBG(("%s: failed to map %p + %d bytes: %d\n",
385			     __FUNCTION__, ptr, size, errno));
386			return 0;
387		}
388	}
389
390	return arg.handle;
391}
392
393static bool __kgem_throttle(struct kgem *kgem, bool harder)
394{
395	/* Let this be woken up by sigtimer so that we don't block here
396	 * too much and completely starve X. We will sleep again shortly,
397	 * and so catch up or detect the hang.
398	 */
399	do {
400		if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE) == 0) {
401			kgem->need_throttle = 0;
402			return false;
403		}
404
405		if (errno == EIO)
406			return true;
407	} while (harder);
408
409	return false;
410}
411
412static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
413{
414	if (flags & CREATE_NO_RETIRE || !kgem->need_retire) {
415		DBG(("%s: not retiring\n", __FUNCTION__));
416		return false;
417	}
418
419	if (kgem_retire(kgem))
420		return true;
421
422	if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) {
423		DBG(("%s: not throttling\n", __FUNCTION__));
424		return false;
425	}
426
427	__kgem_throttle(kgem, false);
428	return kgem_retire(kgem);
429}
430
431static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
432{
433	struct drm_i915_gem_mmap_gtt gtt;
434	void *ptr;
435	int err;
436
437	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
438	     bo->handle, bytes(bo)));
439
440	VG_CLEAR(gtt);
441retry_gtt:
442	gtt.handle = bo->handle;
443	if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gtt))) {
444		assert(err != EINVAL);
445
446		(void)__kgem_throttle_retire(kgem, 0);
447		if (kgem_expire_cache(kgem))
448			goto retry_gtt;
449
450		if (kgem_cleanup_cache(kgem))
451			goto retry_gtt;
452
453		ERR(("%s: failed to retrieve GTT offset for handle=%d: %d\n",
454		     __FUNCTION__, bo->handle, -err));
455		return NULL;
456	}
457
458retry_mmap:
459	ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED,
460		   kgem->fd, gtt.offset);
461	if (ptr == MAP_FAILED) {
462		err = errno;
463		assert(err != EINVAL);
464
465		if (__kgem_throttle_retire(kgem, 0))
466			goto retry_mmap;
467
468		if (kgem_cleanup_cache(kgem))
469			goto retry_mmap;
470
471		ERR(("%s: failed to mmap handle=%d, %d bytes, into GTT domain: %d\n",
472		     __FUNCTION__, bo->handle, bytes(bo), err));
473		ptr = NULL;
474	}
475
476	/* Cache this mapping to avoid the overhead of an
477	 * excruciatingly slow GTT pagefault. This is more an
478	 * issue with compositing managers which need to
479	 * frequently flush CPU damage to their GPU bo.
480	 */
481	return bo->map__gtt = ptr;
482}
483
484static void *__kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
485{
486	struct local_i915_gem_mmap2 wc;
487	int err;
488
489	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
490	     bo->handle, bytes(bo)));
491	assert(kgem->has_wc_mmap);
492
493	VG_CLEAR(wc);
494
495retry_wc:
496	wc.handle = bo->handle;
497	wc.offset = 0;
498	wc.size = bytes(bo);
499	wc.flags = I915_MMAP_WC;
500	if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) {
501		assert(err != EINVAL);
502
503		if (__kgem_throttle_retire(kgem, 0))
504			goto retry_wc;
505
506		if (kgem_cleanup_cache(kgem))
507			goto retry_wc;
508
509		ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU(wc) domain: %d\n",
510		     __FUNCTION__, bo->handle, bytes(bo), -err));
511		return NULL;
512	}
513
514	VG(VALGRIND_MAKE_MEM_DEFINED(wc.addr_ptr, bytes(bo)));
515
516	DBG(("%s: caching CPU(wc) vma for %d\n", __FUNCTION__, bo->handle));
517	return bo->map__wc = (void *)(uintptr_t)wc.addr_ptr;
518}
519
520static void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
521{
522	struct drm_i915_gem_mmap mmap_arg;
523	int err;
524
525retry:
526	VG_CLEAR(mmap_arg);
527	mmap_arg.handle = bo->handle;
528	mmap_arg.offset = 0;
529	mmap_arg.size = bytes(bo);
530	mmap_arg.flags = 0;
531	if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) {
532		assert(err != EINVAL);
533
534		if (__kgem_throttle_retire(kgem, 0))
535			goto retry;
536
537		if (kgem_cleanup_cache(kgem))
538			goto retry;
539
540		ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n",
541		     __FUNCTION__, bo->handle, bytes(bo), -err));
542		return NULL;
543	}
544
545	VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
546
547	DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
548	return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr;
549}
550
551static int gem_write(int fd, uint32_t handle,
552		     int offset, int length,
553		     const void *src)
554{
555	struct drm_i915_gem_pwrite pwrite;
556
557	DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
558	     handle, offset, length));
559
560	VG_CLEAR(pwrite);
561	pwrite.handle = handle;
562	pwrite.offset = offset;
563	pwrite.size = length;
564	pwrite.data_ptr = (uintptr_t)src;
565	return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
566}
567
568static int gem_write__cachealigned(int fd, uint32_t handle,
569				   int offset, int length,
570				   const void *src)
571{
572	struct drm_i915_gem_pwrite pwrite;
573
574	DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
575	     handle, offset, length));
576
577	VG_CLEAR(pwrite);
578	pwrite.handle = handle;
579	/* align the transfer to cachelines; fortuitously this is safe! */
580	if ((offset | length) & 63) {
581		pwrite.offset = offset & ~63;
582		pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
583		pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
584	} else {
585		pwrite.offset = offset;
586		pwrite.size = length;
587		pwrite.data_ptr = (uintptr_t)src;
588	}
589	return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
590}
591
592static int gem_read(int fd, uint32_t handle, const void *dst,
593		    int offset, int length)
594{
595	struct drm_i915_gem_pread pread;
596	int ret;
597
598	DBG(("%s(handle=%d, len=%d)\n", __FUNCTION__,
599	     handle, length));
600
601	VG_CLEAR(pread);
602	pread.handle = handle;
603	pread.offset = offset;
604	pread.size = length;
605	pread.data_ptr = (uintptr_t)dst;
606	ret = do_ioctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
607	if (ret) {
608		DBG(("%s: failed, errno=%d\n", __FUNCTION__, -ret));
609		return ret;
610	}
611
612	VG(VALGRIND_MAKE_MEM_DEFINED(dst, length));
613	return 0;
614}
615
616bool __kgem_busy(struct kgem *kgem, int handle)
617{
618	struct drm_i915_gem_busy busy;
619
620	VG_CLEAR(busy);
621	busy.handle = handle;
622	busy.busy = !kgem->wedged;
623	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
624	DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
625	     __FUNCTION__, handle, busy.busy, kgem->wedged));
626
627	return busy.busy;
628}
629
630static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
631{
632	DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
633	     __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
634	     __kgem_busy(kgem, bo->handle)));
635	assert(bo->exec == NULL);
636	assert(list_is_empty(&bo->vma));
637
638	if (bo->rq) {
639		__kgem_bo_clear_busy(bo);
640		kgem_retire(kgem);
641		assert_bo_retired(bo);
642	} else {
643		assert(bo->exec == NULL);
644		assert(list_is_empty(&bo->request));
645		assert(!bo->needs_flush);
646		ASSERT_IDLE(kgem, bo->handle);
647	}
648}
649
650static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo)
651{
652	DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
653	     __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
654	     __kgem_busy(kgem, bo->handle)));
655	assert(bo->exec == NULL);
656	assert(list_is_empty(&bo->vma));
657
658	if (bo->rq) {
659		if (!__kgem_busy(kgem, bo->handle)) {
660			__kgem_bo_clear_busy(bo);
661			kgem_retire(kgem);
662		}
663	} else {
664		assert(!bo->needs_flush);
665		ASSERT_IDLE(kgem, bo->handle);
666	}
667}
668
669bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
670		   const void *data, int length)
671{
672	void *ptr;
673	int err;
674
675	assert(bo->refcnt);
676	assert(bo->proxy == NULL);
677	ASSERT_IDLE(kgem, bo->handle);
678
679	assert(length <= bytes(bo));
680retry:
681	ptr = NULL;
682	if (bo->domain == DOMAIN_CPU || (kgem->has_llc && !bo->scanout)) {
683		ptr = bo->map__cpu;
684		if (ptr == NULL)
685			ptr = __kgem_bo_map__cpu(kgem, bo);
686	} else if (kgem->has_wc_mmap) {
687		ptr = bo->map__wc;
688		if (ptr == NULL)
689			ptr = __kgem_bo_map__wc(kgem, bo);
690	}
691	if (ptr) {
692		/* XXX unsynchronized? */
693		memcpy(ptr, data, length);
694		return true;
695	}
696
697	if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) {
698		assert(err != EINVAL);
699
700		(void)__kgem_throttle_retire(kgem, 0);
701		if (kgem_expire_cache(kgem))
702			goto retry;
703
704		if (kgem_cleanup_cache(kgem))
705			goto retry;
706
707		ERR(("%s: failed to write %d bytes into BO handle=%d: %d\n",
708		     __FUNCTION__, length, bo->handle, -err));
709		return false;
710	}
711
712	DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
713	if (bo->exec == NULL)
714		kgem_bo_maybe_retire(kgem, bo);
715	bo->domain = DOMAIN_NONE;
716	bo->gtt_dirty = true;
717	return true;
718}
719
720static uint32_t gem_create(int fd, int num_pages)
721{
722	struct drm_i915_gem_create create;
723
724	VG_CLEAR(create);
725	create.handle = 0;
726	create.size = PAGE_SIZE * num_pages;
727	(void)do_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
728
729	return create.handle;
730}
731
732static bool
733kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
734{
735#if DBG_NO_MADV
736	return true;
737#else
738	struct drm_i915_gem_madvise madv;
739
740	assert(bo->exec == NULL);
741	assert(!bo->purged);
742
743	VG_CLEAR(madv);
744	madv.handle = bo->handle;
745	madv.madv = I915_MADV_DONTNEED;
746	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
747		bo->purged = 1;
748		kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
749		return madv.retained;
750	}
751
752	return true;
753#endif
754}
755
756static bool
757kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
758{
759#if DBG_NO_MADV
760	return true;
761#else
762	struct drm_i915_gem_madvise madv;
763
764	if (!bo->purged)
765		return true;
766
767	VG_CLEAR(madv);
768	madv.handle = bo->handle;
769	madv.madv = I915_MADV_DONTNEED;
770	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
771		return madv.retained;
772
773	return false;
774#endif
775}
776
777static bool
778kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
779{
780#if DBG_NO_MADV
781	return true;
782#else
783	struct drm_i915_gem_madvise madv;
784
785	assert(bo->purged);
786
787	VG_CLEAR(madv);
788	madv.handle = bo->handle;
789	madv.madv = I915_MADV_WILLNEED;
790	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
791		bo->purged = !madv.retained;
792		kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
793		return madv.retained;
794	}
795
796	return false;
797#endif
798}
799
800static void gem_close(int fd, uint32_t handle)
801{
802	struct drm_gem_close close;
803
804	VG_CLEAR(close);
805	close.handle = handle;
806	(void)do_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
807}
808
809constant inline static unsigned long __fls(unsigned long word)
810{
811#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
812	asm("bsr %1,%0"
813	    : "=r" (word)
814	    : "rm" (word));
815	return word;
816#else
817	unsigned int v = 0;
818
819	while (word >>= 1)
820		v++;
821
822	return v;
823#endif
824}
825
826constant inline static int cache_bucket(int num_pages)
827{
828	return __fls(num_pages);
829}
830
831static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
832				      int handle, int num_pages)
833{
834	DBG(("%s(handle=%d, num_pages=%d)\n", __FUNCTION__, handle, num_pages));
835
836	assert(num_pages);
837	memset(bo, 0, sizeof(*bo));
838
839	bo->refcnt = 1;
840	bo->handle = handle;
841	bo->target_handle = -1;
842	num_pages(bo) = num_pages;
843	bucket(bo) = cache_bucket(num_pages);
844	bo->reusable = true;
845	bo->domain = DOMAIN_CPU;
846	list_init(&bo->request);
847	list_init(&bo->list);
848	list_init(&bo->vma);
849
850	return bo;
851}
852
853static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
854{
855	struct kgem_bo *bo;
856
857	if (__kgem_freed_bo) {
858		bo = __kgem_freed_bo;
859		__kgem_freed_bo = *(struct kgem_bo **)bo;
860	} else {
861		bo = malloc(sizeof(*bo));
862		if (bo == NULL)
863			return NULL;
864	}
865
866	return __kgem_bo_init(bo, handle, num_pages);
867}
868
869static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
870{
871	struct kgem_request *rq;
872
873	rq = __kgem_freed_request;
874	if (rq) {
875		__kgem_freed_request = *(struct kgem_request **)rq;
876	} else {
877		rq = malloc(sizeof(*rq));
878		if (rq == NULL)
879			rq = &kgem->static_request;
880	}
881
882	list_init(&rq->buffers);
883	rq->bo = NULL;
884	rq->ring = 0;
885
886	return rq;
887}
888
889static void __kgem_request_free(struct kgem_request *rq)
890{
891	_list_del(&rq->list);
892	if (DBG_NO_MALLOC_CACHE) {
893		free(rq);
894	} else {
895		*(struct kgem_request **)rq = __kgem_freed_request;
896		__kgem_freed_request = rq;
897	}
898}
899
900static struct list *inactive(struct kgem *kgem, int num_pages)
901{
902	assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
903	assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
904	return &kgem->inactive[cache_bucket(num_pages)];
905}
906
907static struct list *active(struct kgem *kgem, int num_pages, int tiling)
908{
909	assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
910	assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
911	return &kgem->active[cache_bucket(num_pages)][tiling];
912}
913
914static size_t
915agp_aperture_size(struct pci_device *dev, unsigned gen)
916{
917	/* XXX assume that only future chipsets are unknown and follow
918	 * the post gen2 PCI layout.
919	 */
920	return dev->regions[gen < 030 ? 0 : 2].size;
921}
922
923static size_t
924total_ram_size(void)
925{
926#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
927	struct sysinfo info;
928	if (sysinfo(&info) == 0)
929		return info.totalram * info.mem_unit;
930#endif
931
932#ifdef _SC_PHYS_PAGES
933	 return sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE);
934#endif
935
936	return 0;
937}
938
939static unsigned
940cpu_cache_size__cpuid4(void)
941{
942	/* Deterministic Cache Parameters (Function 04h)":
943	 *    When EAX is initialized to a value of 4, the CPUID instruction
944	 *    returns deterministic cache information in the EAX, EBX, ECX
945	 *    and EDX registers.  This function requires ECX be initialized
946	 *    with an index which indicates which cache to return information
947	 *    about. The OS is expected to call this function (CPUID.4) with
948	 *    ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches.
949	 *    The order in which the caches are returned is not specified
950	 *    and may change at Intel's discretion.
951	 *
952	 * Calculating the Cache Size in bytes:
953	 *          = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1)
954	 */
955
956	 unsigned int eax, ebx, ecx, edx;
957	 unsigned int llc_size = 0;
958	 int cnt;
959
960	 if (__get_cpuid_max(BASIC_CPUID, NULL) < 4)
961		 return 0;
962
963	 cnt = 0;
964	 do {
965		 unsigned associativity, line_partitions, line_size, sets;
966
967		 __cpuid_count(4, cnt++, eax, ebx, ecx, edx);
968
969		 if ((eax & 0x1f) == 0)
970			 break;
971
972		 associativity = ((ebx >> 22) & 0x3ff) + 1;
973		 line_partitions = ((ebx >> 12) & 0x3ff) + 1;
974		 line_size = (ebx & 0xfff) + 1;
975		 sets = ecx + 1;
976
977		 llc_size = associativity * line_partitions * line_size * sets;
978	 } while (1);
979
980	 return llc_size;
981}
982
983static unsigned
984cpu_cache_size(void)
985{
986	unsigned size;
987	FILE *file;
988
989	size = cpu_cache_size__cpuid4();
990	if (size)
991		return size;
992
993	file = fopen("/proc/cpuinfo", "r");
994	if (file) {
995		size_t len = 0;
996		char *line = NULL;
997		while (getline(&line, &len, file) != -1) {
998			int kb;
999			if (sscanf(line, "cache size : %d KB", &kb) == 1) {
1000				/* Paranoid check against gargantuan caches */
1001				if (kb <= 1<<20)
1002					size = kb * 1024;
1003				break;
1004			}
1005		}
1006		free(line);
1007		fclose(file);
1008	}
1009
1010	if (size == 0)
1011		size = 64 * 1024;
1012
1013	return size;
1014}
1015
1016static int gem_param(struct kgem *kgem, int name)
1017{
1018	drm_i915_getparam_t gp;
1019	int v = -1; /* No param uses the sign bit, reserve it for errors */
1020
1021	VG_CLEAR(gp);
1022	gp.param = name;
1023	gp.value = &v;
1024	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp))
1025		return -1;
1026
1027	VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
1028	return v;
1029}
1030
1031static bool test_has_execbuffer2(struct kgem *kgem)
1032{
1033	struct drm_i915_gem_execbuffer2 execbuf;
1034
1035	memset(&execbuf, 0, sizeof(execbuf));
1036	execbuf.buffer_count = 1;
1037
1038	return do_ioctl(kgem->fd,
1039			 DRM_IOCTL_I915_GEM_EXECBUFFER2,
1040			 &execbuf) == -EFAULT;
1041}
1042
1043static bool test_has_no_reloc(struct kgem *kgem)
1044{
1045	if (DBG_NO_FAST_RELOC)
1046		return false;
1047
1048	return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
1049}
1050
1051static bool test_has_handle_lut(struct kgem *kgem)
1052{
1053	if (DBG_NO_HANDLE_LUT)
1054		return false;
1055
1056	return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
1057}
1058
1059static bool test_has_wt(struct kgem *kgem)
1060{
1061	if (DBG_NO_WT)
1062		return false;
1063
1064	return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0;
1065}
1066
1067static bool test_has_semaphores_enabled(struct kgem *kgem)
1068{
1069	FILE *file;
1070	bool detected = false;
1071	int ret;
1072
1073	if (DBG_NO_SEMAPHORES)
1074		return false;
1075
1076	ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
1077	if (ret != -1)
1078		return ret > 0;
1079
1080	file = fopen("/sys/module/i915/parameters/semaphores", "r");
1081	if (file) {
1082		int value;
1083		if (fscanf(file, "%d", &value) == 1)
1084			detected = value != 0;
1085		fclose(file);
1086	}
1087
1088	return detected;
1089}
1090
1091static bool is_hw_supported(struct kgem *kgem,
1092			    struct pci_device *dev)
1093{
1094	if (DBG_NO_HW)
1095		return false;
1096
1097	if (!test_has_execbuffer2(kgem))
1098		return false;
1099
1100	if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
1101		return kgem->has_blt;
1102
1103	/* Although pre-855gm the GMCH is fubar, it works mostly. So
1104	 * let the user decide through "NoAccel" whether or not to risk
1105	 * hw acceleration.
1106	 */
1107
1108	if (kgem->gen == 060 && dev && dev->revision < 8) {
1109		/* pre-production SNB with dysfunctional BLT */
1110		return false;
1111	}
1112
1113	if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
1114		return kgem->has_blt;
1115
1116	return true;
1117}
1118
1119static bool test_has_relaxed_fencing(struct kgem *kgem)
1120{
1121	if (kgem->gen < 040) {
1122		if (DBG_NO_RELAXED_FENCING)
1123			return false;
1124
1125		return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
1126	} else
1127		return true;
1128}
1129
1130static bool test_has_llc(struct kgem *kgem)
1131{
1132	int has_llc = -1;
1133
1134	if (DBG_NO_LLC)
1135		return false;
1136
1137	has_llc = gem_param(kgem, LOCAL_I915_PARAM_HAS_LLC);
1138	if (has_llc == -1) {
1139		DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
1140		has_llc = kgem->gen >= 060;
1141	}
1142
1143	return has_llc;
1144}
1145
1146static bool test_has_wc_mmap(struct kgem *kgem)
1147{
1148	struct local_i915_gem_mmap2 wc;
1149	bool ret;
1150
1151	if (DBG_NO_WC_MMAP)
1152		return false;
1153
1154	if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1)
1155		return false;
1156
1157	VG_CLEAR(wc);
1158	wc.handle = gem_create(kgem->fd, 1);
1159	wc.offset = 0;
1160	wc.size = 4096;
1161	wc.flags = I915_MMAP_WC;
1162	ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc) == 0;
1163	gem_close(kgem->fd, wc.handle);
1164
1165	return ret;
1166}
1167
1168static bool test_has_caching(struct kgem *kgem)
1169{
1170	uint32_t handle;
1171	bool ret;
1172
1173	if (DBG_NO_CACHE_LEVEL)
1174		return false;
1175
1176	/* Incoherent blt and sampler hangs the GPU */
1177	if (kgem->gen == 040)
1178		return false;
1179
1180	handle = gem_create(kgem->fd, 1);
1181	if (handle == 0)
1182		return false;
1183
1184	ret = gem_set_caching(kgem->fd, handle, UNCACHED);
1185	gem_close(kgem->fd, handle);
1186	return ret;
1187}
1188
1189static bool test_has_userptr(struct kgem *kgem)
1190{
1191	uint32_t handle;
1192	void *ptr;
1193
1194	if (DBG_NO_USERPTR)
1195		return false;
1196
1197	/* Incoherent blt and sampler hangs the GPU */
1198	if (kgem->gen == 040)
1199		return false;
1200
1201	if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE))
1202		return false;
1203
1204	handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false);
1205	gem_close(kgem->fd, handle);
1206	free(ptr);
1207
1208	return handle != 0;
1209}
1210
1211static bool test_has_create2(struct kgem *kgem)
1212{
1213#if defined(USE_CREATE2)
1214	struct local_i915_gem_create2 args;
1215
1216	if (DBG_NO_CREATE2)
1217		return false;
1218
1219	memset(&args, 0, sizeof(args));
1220	args.size = PAGE_SIZE;
1221	args.caching = DISPLAY;
1222	if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0)
1223		gem_close(kgem->fd, args.handle);
1224
1225	return args.handle != 0;
1226#else
1227	return false;
1228#endif
1229}
1230
1231static bool test_has_secure_batches(struct kgem *kgem)
1232{
1233	if (DBG_NO_SECURE_BATCHES)
1234		return false;
1235
1236	return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
1237}
1238
1239static bool test_has_pinned_batches(struct kgem *kgem)
1240{
1241	if (DBG_NO_PINNED_BATCHES)
1242		return false;
1243
1244	return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
1245}
1246
1247static int kgem_get_screen_index(struct kgem *kgem)
1248{
1249	struct sna *sna = container_of(kgem, struct sna, kgem);
1250	return sna->scrn->scrnIndex;
1251}
1252
1253static int __find_debugfs(struct kgem *kgem)
1254{
1255	int i;
1256
1257	for (i = 0; i < DRM_MAX_MINOR; i++) {
1258		char path[80];
1259
1260		sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i);
1261		if (access(path, R_OK) == 0)
1262			return i;
1263
1264		sprintf(path, "/debug/dri/%d/i915_wedged", i);
1265		if (access(path, R_OK) == 0)
1266			return i;
1267	}
1268
1269	return -1;
1270}
1271
1272static int kgem_get_minor(struct kgem *kgem)
1273{
1274	struct stat st;
1275
1276	if (fstat(kgem->fd, &st))
1277		return __find_debugfs(kgem);
1278
1279	if (!S_ISCHR(st.st_mode))
1280		return __find_debugfs(kgem);
1281
1282	return st.st_rdev & 0x63;
1283}
1284
1285static bool kgem_init_pinned_batches(struct kgem *kgem)
1286{
1287	int count[2] = { 16, 4 };
1288	int size[2] = { 1, 4 };
1289	int n, i;
1290
1291	if (kgem->wedged)
1292		return true;
1293
1294	for (n = 0; n < ARRAY_SIZE(count); n++) {
1295		for (i = 0; i < count[n]; i++) {
1296			struct drm_i915_gem_pin pin;
1297			struct kgem_bo *bo;
1298
1299			VG_CLEAR(pin);
1300
1301			pin.handle = gem_create(kgem->fd, size[n]);
1302			if (pin.handle == 0)
1303				goto err;
1304
1305			DBG(("%s: new handle=%d, num_pages=%d\n",
1306			     __FUNCTION__, pin.handle, size[n]));
1307
1308			bo = __kgem_bo_alloc(pin.handle, size[n]);
1309			if (bo == NULL) {
1310				gem_close(kgem->fd, pin.handle);
1311				goto err;
1312			}
1313
1314			pin.alignment = 0;
1315			if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) {
1316				gem_close(kgem->fd, pin.handle);
1317				free(bo);
1318				goto err;
1319			}
1320			bo->presumed_offset = pin.offset;
1321			debug_alloc__bo(kgem, bo);
1322			list_add(&bo->list, &kgem->pinned_batches[n]);
1323		}
1324	}
1325
1326	return true;
1327
1328err:
1329	for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
1330		while (!list_is_empty(&kgem->pinned_batches[n])) {
1331			kgem_bo_destroy(kgem,
1332					list_first_entry(&kgem->pinned_batches[n],
1333							 struct kgem_bo, list));
1334		}
1335	}
1336
1337	/* For simplicity populate the lists with a single unpinned bo */
1338	for (n = 0; n < ARRAY_SIZE(count); n++) {
1339		struct kgem_bo *bo;
1340		uint32_t handle;
1341
1342		handle = gem_create(kgem->fd, size[n]);
1343		if (handle == 0)
1344			break;
1345
1346		bo = __kgem_bo_alloc(handle, size[n]);
1347		if (bo == NULL) {
1348			gem_close(kgem->fd, handle);
1349			break;
1350		}
1351
1352		debug_alloc__bo(kgem, bo);
1353		list_add(&bo->list, &kgem->pinned_batches[n]);
1354	}
1355	return false;
1356}
1357
1358static void kgem_init_swizzling(struct kgem *kgem)
1359{
1360	struct local_i915_gem_get_tiling_v2 {
1361		uint32_t handle;
1362		uint32_t tiling_mode;
1363		uint32_t swizzle_mode;
1364		uint32_t phys_swizzle_mode;
1365	} tiling;
1366#define LOCAL_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct local_i915_gem_get_tiling_v2)
1367
1368	VG_CLEAR(tiling);
1369	tiling.handle = gem_create(kgem->fd, 1);
1370	if (!tiling.handle)
1371		return;
1372
1373	if (!gem_set_tiling(kgem->fd, tiling.handle, I915_TILING_X, 512))
1374		goto out;
1375
1376	if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_TILING, &tiling))
1377		goto out;
1378
1379	if (kgem->gen < 50 && tiling.phys_swizzle_mode != tiling.swizzle_mode)
1380		goto out;
1381
1382	choose_memcpy_tiled_x(kgem, tiling.swizzle_mode);
1383out:
1384	gem_close(kgem->fd, tiling.handle);
1385}
1386
1387static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink)
1388{
1389	int n;
1390
1391	bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
1392
1393	assert(kgem->nreloc__self <= 256);
1394	if (kgem->nreloc__self == 0)
1395		return;
1396
1397	DBG(("%s: fixing up %d%s self-relocations to handle=%p, presumed-offset=%llx\n",
1398	     __FUNCTION__, kgem->nreloc__self,
1399	     kgem->nreloc__self == 256 ? "+" : "",
1400	     bo->handle, (long long)bo->presumed_offset));
1401	for (n = 0; n < kgem->nreloc__self; n++) {
1402		int i = kgem->reloc__self[n];
1403
1404		assert(kgem->reloc[i].target_handle == ~0U);
1405		kgem->reloc[i].target_handle = bo->target_handle;
1406		kgem->reloc[i].presumed_offset = bo->presumed_offset;
1407
1408		if (kgem->reloc[i].read_domains == I915_GEM_DOMAIN_INSTRUCTION) {
1409			DBG(("%s: moving base of self-reloc[%d:%d] %d -> %d\n",
1410			     __FUNCTION__, n, i,
1411			     kgem->reloc[i].delta,
1412			     kgem->reloc[i].delta - shrink));
1413
1414			kgem->reloc[i].delta -= shrink;
1415		}
1416		kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] =
1417			kgem->reloc[i].delta + bo->presumed_offset;
1418	}
1419
1420	if (n == 256) {
1421		for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
1422			if (kgem->reloc[n].target_handle == ~0U) {
1423				kgem->reloc[n].target_handle = bo->target_handle;
1424				kgem->reloc[n].presumed_offset = bo->presumed_offset;
1425
1426				if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION) {
1427					DBG(("%s: moving base of reloc[%d] %d -> %d\n",
1428					     __FUNCTION__, n,
1429					     kgem->reloc[n].delta,
1430					     kgem->reloc[n].delta - shrink));
1431					kgem->reloc[n].delta -= shrink;
1432				}
1433				kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] =
1434					kgem->reloc[n].delta + bo->presumed_offset;
1435			}
1436		}
1437	}
1438
1439	if (shrink) {
1440		DBG(("%s: shrinking by %d\n", __FUNCTION__, shrink));
1441		for (n = 0; n < kgem->nreloc; n++) {
1442			if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
1443				kgem->reloc[n].offset -= shrink;
1444		}
1445	}
1446}
1447
1448static struct kgem_bo *kgem_new_batch(struct kgem *kgem)
1449{
1450	struct kgem_bo *last;
1451	unsigned flags;
1452
1453	last = kgem->batch_bo;
1454	if (last) {
1455		kgem_fixup_relocs(kgem, last, 0);
1456		kgem->batch = NULL;
1457	}
1458
1459	if (kgem->batch) {
1460		assert(last == NULL);
1461		return NULL;
1462	}
1463
1464	flags = CREATE_CPU_MAP | CREATE_NO_THROTTLE;
1465	if (!kgem->has_llc)
1466		flags |= CREATE_UNCACHED;
1467
1468	kgem->batch_bo = kgem_create_linear(kgem,
1469					    sizeof(uint32_t)*kgem->batch_size,
1470					    flags);
1471	if (kgem->batch_bo)
1472		kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo);
1473	if (kgem->batch == NULL) {
1474		DBG(("%s: unable to map batch bo, mallocing(size=%d)\n",
1475		     __FUNCTION__,
1476		     sizeof(uint32_t)*kgem->batch_size));
1477		if (kgem->batch_bo) {
1478			kgem_bo_destroy(kgem, kgem->batch_bo);
1479			kgem->batch_bo = NULL;
1480		}
1481
1482		if (posix_memalign((void **)&kgem->batch, PAGE_SIZE,
1483				   ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) {
1484			ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__));
1485			__kgem_set_wedged(kgem);
1486		}
1487	} else {
1488		DBG(("%s: allocated and mapped batch handle=%d [size=%d]\n",
1489		     __FUNCTION__, kgem->batch_bo->handle,
1490		     sizeof(uint32_t)*kgem->batch_size));
1491		kgem_bo_sync__cpu(kgem, kgem->batch_bo);
1492	}
1493
1494	DBG(("%s: using last batch handle=%d\n",
1495	     __FUNCTION__, last ? last->handle : 0));
1496	return last;
1497}
1498
1499void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
1500{
1501	struct drm_i915_gem_get_aperture aperture;
1502	size_t totalram;
1503	unsigned half_gpu_max;
1504	unsigned int i, j;
1505
1506	DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen));
1507
1508	kgem->fd = fd;
1509	kgem->gen = gen;
1510
1511	list_init(&kgem->requests[0]);
1512	list_init(&kgem->requests[1]);
1513	list_init(&kgem->batch_buffers);
1514	list_init(&kgem->active_buffers);
1515	list_init(&kgem->flushing);
1516	list_init(&kgem->large);
1517	list_init(&kgem->large_inactive);
1518	list_init(&kgem->snoop);
1519	list_init(&kgem->scanout);
1520	for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
1521		list_init(&kgem->pinned_batches[i]);
1522	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
1523		list_init(&kgem->inactive[i]);
1524	for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
1525		for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
1526			list_init(&kgem->active[i][j]);
1527	}
1528	for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
1529		for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
1530			list_init(&kgem->vma[i].inactive[j]);
1531	}
1532	kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
1533	kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
1534
1535	kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
1536	DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
1537	     kgem->has_blt));
1538
1539	kgem->has_relaxed_delta =
1540		gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
1541	DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
1542	     kgem->has_relaxed_delta));
1543
1544	kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem);
1545	DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
1546	     kgem->has_relaxed_fencing));
1547
1548	kgem->has_llc = test_has_llc(kgem);
1549	DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__,
1550	     kgem->has_llc));
1551
1552	kgem->has_wt = test_has_wt(kgem);
1553	DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__,
1554	     kgem->has_wt));
1555
1556	kgem->has_wc_mmap = test_has_wc_mmap(kgem);
1557	DBG(("%s: has wc-mmapping? %d\n", __FUNCTION__,
1558	     kgem->has_wc_mmap));
1559
1560	kgem->has_caching = test_has_caching(kgem);
1561	DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
1562	     kgem->has_caching));
1563
1564	kgem->has_userptr = test_has_userptr(kgem);
1565	DBG(("%s: has userptr? %d\n", __FUNCTION__,
1566	     kgem->has_userptr));
1567
1568	kgem->has_create2 = test_has_create2(kgem);
1569	DBG(("%s: has create2? %d\n", __FUNCTION__,
1570	     kgem->has_create2));
1571
1572	kgem->has_no_reloc = test_has_no_reloc(kgem);
1573	DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
1574	     kgem->has_no_reloc));
1575
1576	kgem->has_handle_lut = test_has_handle_lut(kgem);
1577	DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
1578	     kgem->has_handle_lut));
1579
1580	kgem->has_semaphores = false;
1581	if (kgem->has_blt && test_has_semaphores_enabled(kgem))
1582		kgem->has_semaphores = true;
1583	DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
1584	     kgem->has_semaphores));
1585
1586	kgem->can_blt_cpu = gen >= 030;
1587	DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
1588	     kgem->can_blt_cpu));
1589
1590	kgem->can_render_y = gen != 021 && (gen >> 3) != 4;
1591	DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__,
1592	     kgem->can_render_y));
1593
1594	kgem->has_secure_batches = test_has_secure_batches(kgem);
1595	DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
1596	     kgem->has_secure_batches));
1597
1598	kgem->has_pinned_batches = test_has_pinned_batches(kgem);
1599	DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
1600	     kgem->has_pinned_batches));
1601
1602	if (!is_hw_supported(kgem, dev)) {
1603		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
1604			   "Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
1605		__kgem_set_wedged(kgem);
1606	} else if (__kgem_throttle(kgem, false)) {
1607		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
1608			   "Detected a hung GPU, disabling acceleration.\n");
1609		__kgem_set_wedged(kgem);
1610	}
1611
1612	kgem->batch_size = UINT16_MAX & ~7;
1613	if (gen == 020 && !kgem->has_pinned_batches)
1614		/* Limited to what we can pin */
1615		kgem->batch_size = 4*1024;
1616	if (gen == 022)
1617		/* 865g cannot handle a batch spanning multiple pages */
1618		kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
1619	if (gen >= 070)
1620		kgem->batch_size = 16*1024;
1621	if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
1622		kgem->batch_size = 4*1024;
1623
1624	if (!kgem_init_pinned_batches(kgem) && gen == 020) {
1625		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
1626			   "Unable to reserve memory for GPU, disabling acceleration.\n");
1627		__kgem_set_wedged(kgem);
1628	}
1629
1630	DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
1631	     kgem->batch_size));
1632	kgem_new_batch(kgem);
1633
1634	kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
1635	DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n",
1636	     __FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages));
1637
1638	kgem->next_request = __kgem_request_alloc(kgem);
1639
1640	DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
1641	     !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching),
1642	     kgem->has_llc, kgem->has_caching, kgem->has_userptr));
1643
1644	VG_CLEAR(aperture);
1645	aperture.aper_size = 0;
1646	(void)do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
1647	if (aperture.aper_size == 0)
1648		aperture.aper_size = 64*1024*1024;
1649
1650	DBG(("%s: aperture size %lld, available now %lld\n",
1651	     __FUNCTION__,
1652	     (long long)aperture.aper_size,
1653	     (long long)aperture.aper_available_size));
1654
1655	kgem->aperture_total = aperture.aper_size;
1656	kgem->aperture_high = aperture.aper_size * 3/4;
1657	kgem->aperture_low = aperture.aper_size * 1/3;
1658	if (gen < 033) {
1659		/* Severe alignment penalties */
1660		kgem->aperture_high /= 2;
1661		kgem->aperture_low /= 2;
1662	}
1663	DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
1664	     kgem->aperture_low, kgem->aperture_low / (1024*1024),
1665	     kgem->aperture_high, kgem->aperture_high / (1024*1024)));
1666
1667	kgem->aperture_mappable = 256 * 1024 * 1024;
1668	if (dev != NULL)
1669		kgem->aperture_mappable = agp_aperture_size(dev, gen);
1670	if (kgem->aperture_mappable == 0 ||
1671	    kgem->aperture_mappable > aperture.aper_size)
1672		kgem->aperture_mappable = aperture.aper_size;
1673	DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__,
1674	     kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024)));
1675
1676	kgem->aperture_fenceable = MIN(256*1024*1024, kgem->aperture_mappable);
1677	DBG(("%s: aperture fenceable=%d [%d MiB]\n", __FUNCTION__,
1678	     kgem->aperture_fenceable, kgem->aperture_fenceable / (1024*1024)));
1679
1680	kgem->buffer_size = 64 * 1024;
1681	while (kgem->buffer_size < kgem->aperture_mappable >> 10)
1682		kgem->buffer_size *= 2;
1683	if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
1684		kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
1685	kgem->buffer_size = 1 << __fls(kgem->buffer_size);
1686	DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
1687	     kgem->buffer_size, kgem->buffer_size / 1024));
1688	assert(kgem->buffer_size);
1689
1690	kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
1691	kgem->max_gpu_size = kgem->max_object_size;
1692	if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE)
1693		kgem->max_gpu_size = MAX_CACHE_SIZE;
1694
1695	totalram = total_ram_size();
1696	if (totalram == 0) {
1697		DBG(("%s: total ram size unknown, assuming maximum of total aperture\n",
1698		     __FUNCTION__));
1699		totalram = kgem->aperture_total;
1700	}
1701	DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram));
1702	if (kgem->max_object_size > totalram / 2)
1703		kgem->max_object_size = totalram / 2;
1704	if (kgem->max_gpu_size > totalram / 4)
1705		kgem->max_gpu_size = totalram / 4;
1706
1707	if (kgem->aperture_high > totalram / 2) {
1708		kgem->aperture_high = totalram / 2;
1709		kgem->aperture_low = kgem->aperture_high / 4;
1710		DBG(("%s: reduced aperture watermaks to fit into ram; low=%d [%d], high=%d [%d]\n", __FUNCTION__,
1711		     kgem->aperture_low, kgem->aperture_low / (1024*1024),
1712		     kgem->aperture_high, kgem->aperture_high / (1024*1024)));
1713	}
1714
1715	kgem->max_cpu_size = kgem->max_object_size;
1716
1717	half_gpu_max = kgem->max_gpu_size / 2;
1718	kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
1719	if (kgem->max_copy_tile_size > half_gpu_max)
1720		kgem->max_copy_tile_size = half_gpu_max;
1721
1722	if (kgem->has_llc)
1723		kgem->max_upload_tile_size = kgem->max_copy_tile_size;
1724	else
1725		kgem->max_upload_tile_size = kgem->aperture_fenceable / 4;
1726	if (kgem->max_upload_tile_size > half_gpu_max)
1727		kgem->max_upload_tile_size = half_gpu_max;
1728	if (kgem->max_upload_tile_size > kgem->aperture_high/2)
1729		kgem->max_upload_tile_size = kgem->aperture_high/2;
1730	if (kgem->max_upload_tile_size > kgem->aperture_low)
1731		kgem->max_upload_tile_size = kgem->aperture_low;
1732	if (kgem->max_upload_tile_size < 16*PAGE_SIZE)
1733		kgem->max_upload_tile_size = 16*PAGE_SIZE;
1734
1735	kgem->large_object_size = MAX_CACHE_SIZE;
1736	if (kgem->large_object_size > half_gpu_max)
1737		kgem->large_object_size = half_gpu_max;
1738	if (kgem->max_copy_tile_size > kgem->aperture_high/2)
1739		kgem->max_copy_tile_size = kgem->aperture_high/2;
1740	if (kgem->max_copy_tile_size > kgem->aperture_low)
1741		kgem->max_copy_tile_size = kgem->aperture_low;
1742	if (kgem->max_copy_tile_size < 16*PAGE_SIZE)
1743		kgem->max_copy_tile_size = 16*PAGE_SIZE;
1744
1745	if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) {
1746		if (kgem->large_object_size > kgem->max_cpu_size)
1747			kgem->large_object_size = kgem->max_cpu_size;
1748	} else
1749		kgem->max_cpu_size = 0;
1750	if (DBG_NO_CPU)
1751		kgem->max_cpu_size = 0;
1752
1753	DBG(("%s: maximum object size=%d\n",
1754	     __FUNCTION__, kgem->max_object_size));
1755	DBG(("%s: large object thresold=%d\n",
1756	     __FUNCTION__, kgem->large_object_size));
1757	DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n",
1758	     __FUNCTION__,
1759	     kgem->max_gpu_size, kgem->max_cpu_size,
1760	     kgem->max_upload_tile_size, kgem->max_copy_tile_size));
1761
1762	/* Convert the aperture thresholds to pages */
1763	kgem->aperture_mappable /= PAGE_SIZE;
1764	kgem->aperture_fenceable /= PAGE_SIZE;
1765	kgem->aperture_low /= PAGE_SIZE;
1766	kgem->aperture_high /= PAGE_SIZE;
1767	kgem->aperture_total /= PAGE_SIZE;
1768
1769	kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
1770	if ((int)kgem->fence_max < 0)
1771		kgem->fence_max = 5; /* minimum safe value for all hw */
1772	DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
1773
1774	kgem->batch_flags_base = 0;
1775	if (kgem->has_no_reloc)
1776		kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
1777	if (kgem->has_handle_lut)
1778		kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
1779	if (kgem->has_pinned_batches)
1780		kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
1781
1782	kgem_init_swizzling(kgem);
1783}
1784
1785/* XXX hopefully a good approximation */
1786static uint32_t kgem_get_unique_id(struct kgem *kgem)
1787{
1788	uint32_t id;
1789	id = ++kgem->unique_id;
1790	if (id == 0)
1791		id = ++kgem->unique_id;
1792	return id;
1793}
1794
1795inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags)
1796{
1797	if (flags & CREATE_PRIME)
1798		return 256;
1799	if (flags & CREATE_SCANOUT)
1800		return 64;
1801	if (kgem->gen >= 0100)
1802		return 32;
1803	return 8;
1804}
1805
1806void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch,
1807			int *tile_width, int *tile_height, int *tile_size)
1808{
1809	if (kgem->gen <= 030) {
1810		if (tiling) {
1811			if (kgem->gen < 030) {
1812				*tile_width = 128;
1813				*tile_height = 16;
1814				*tile_size = 2048;
1815			} else {
1816				*tile_width = 512;
1817				*tile_height = 8;
1818				*tile_size = 4096;
1819			}
1820		} else {
1821			*tile_width = 1;
1822			*tile_height = 1;
1823			*tile_size = 1;
1824		}
1825	} else switch (tiling) {
1826	default:
1827	case I915_TILING_NONE:
1828		*tile_width = 1;
1829		*tile_height = 1;
1830		*tile_size = 1;
1831		break;
1832	case I915_TILING_X:
1833		*tile_width = 512;
1834		*tile_height = 8;
1835		*tile_size = 4096;
1836		break;
1837	case I915_TILING_Y:
1838		*tile_width = 128;
1839		*tile_height = 32;
1840		*tile_size = 4096;
1841		break;
1842	}
1843
1844	/* Force offset alignment to tile-row */
1845	if (tiling && kgem->gen < 033)
1846		*tile_width = pitch;
1847}
1848
1849static uint32_t kgem_surface_size(struct kgem *kgem,
1850				  bool relaxed_fencing,
1851				  unsigned flags,
1852				  uint32_t width,
1853				  uint32_t height,
1854				  uint32_t bpp,
1855				  uint32_t tiling,
1856				  uint32_t *pitch)
1857{
1858	uint32_t tile_width, tile_height;
1859	uint32_t size;
1860
1861	assert(width <= MAXSHORT);
1862	assert(height <= MAXSHORT);
1863	assert(bpp >= 8);
1864
1865	if (kgem->gen <= 030) {
1866		if (tiling) {
1867			if (kgem->gen < 030) {
1868				tile_width = 128;
1869				tile_height = 16;
1870			} else {
1871				tile_width = 512;
1872				tile_height = 8;
1873			}
1874		} else {
1875			tile_width = 2 * bpp >> 3;
1876			tile_width = ALIGN(tile_width,
1877					   kgem_pitch_alignment(kgem, flags));
1878			tile_height = 1;
1879		}
1880	} else switch (tiling) {
1881	default:
1882	case I915_TILING_NONE:
1883		tile_width = 2 * bpp >> 3;
1884		tile_width = ALIGN(tile_width,
1885				   kgem_pitch_alignment(kgem, flags));
1886		tile_height = 1;
1887		break;
1888
1889	case I915_TILING_X:
1890		tile_width = 512;
1891		tile_height = 8;
1892		break;
1893	case I915_TILING_Y:
1894		tile_width = 128;
1895		tile_height = 32;
1896		break;
1897	}
1898	/* XXX align to an even tile row */
1899	if (!kgem->has_relaxed_fencing)
1900		tile_height *= 2;
1901
1902	*pitch = ALIGN(width * bpp / 8, tile_width);
1903	height = ALIGN(height, tile_height);
1904	DBG(("%s: tile_width=%d, tile_height=%d => aligned pitch=%d, height=%d\n",
1905	     __FUNCTION__, tile_width, tile_height, *pitch, height));
1906
1907	if (kgem->gen >= 040)
1908		return PAGE_ALIGN(*pitch * height);
1909
1910	/* If it is too wide for the blitter, don't even bother.  */
1911	if (tiling != I915_TILING_NONE) {
1912		if (*pitch > 8192) {
1913			DBG(("%s: too wide for tiled surface (pitch=%d, limit=%d)\n",
1914			     __FUNCTION__, *pitch, 8192));
1915			return 0;
1916		}
1917
1918		for (size = tile_width; size < *pitch; size <<= 1)
1919			;
1920		*pitch = size;
1921	} else {
1922		if (*pitch >= 32768) {
1923			DBG(("%s: too wide for linear surface (pitch=%d, limit=%d)\n",
1924			     __FUNCTION__, *pitch, 32767));
1925			return 0;
1926		}
1927	}
1928
1929	size = *pitch * height;
1930	if (relaxed_fencing || tiling == I915_TILING_NONE)
1931		return PAGE_ALIGN(size);
1932
1933	/* We need to allocate a pot fence region for a tiled buffer. */
1934	if (kgem->gen < 030)
1935		tile_width = 512 * 1024;
1936	else
1937		tile_width = 1024 * 1024;
1938	while (tile_width < size)
1939		tile_width *= 2;
1940	return tile_width;
1941}
1942
1943bool kgem_check_surface_size(struct kgem *kgem,
1944			     uint32_t width,
1945			     uint32_t height,
1946			     uint32_t bpp,
1947			     uint32_t tiling,
1948			     uint32_t pitch,
1949			     uint32_t size)
1950{
1951	uint32_t min_size, min_pitch;
1952	int tile_width, tile_height, tile_size;
1953
1954	DBG(("%s(width=%d, height=%d, bpp=%d, tiling=%d, pitch=%d, size=%d)\n",
1955	     __FUNCTION__, width, height, bpp, tiling, pitch, size));
1956
1957	if (pitch & 3)
1958		return false;
1959
1960	min_size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, 0,
1961				     width, height, bpp, tiling,
1962				     &min_pitch);
1963
1964	DBG(("%s: min_pitch=%d, min_size=%d\n", __FUNCTION__, min_pitch, min_size));
1965
1966	if (size < min_size)
1967		return false;
1968
1969	if (pitch < min_pitch)
1970		return false;
1971
1972	kgem_get_tile_size(kgem, tiling, min_pitch,
1973			   &tile_width, &tile_height, &tile_size);
1974
1975	DBG(("%s: tile_width=%d, tile_size=%d\n", __FUNCTION__, tile_width, tile_size));
1976	if (pitch & (tile_width - 1))
1977		return false;
1978	if (size & (tile_size - 1))
1979		return false;
1980
1981	return true;
1982}
1983
1984static uint32_t kgem_aligned_height(struct kgem *kgem,
1985				    uint32_t height, uint32_t tiling)
1986{
1987	uint32_t tile_height;
1988
1989	if (kgem->gen <= 030) {
1990		tile_height = tiling ? kgem->gen < 030 ? 16 : 8 : 1;
1991	} else switch (tiling) {
1992		/* XXX align to an even tile row */
1993	default:
1994	case I915_TILING_NONE:
1995		tile_height = 1;
1996		break;
1997	case I915_TILING_X:
1998		tile_height = 8;
1999		break;
2000	case I915_TILING_Y:
2001		tile_height = 32;
2002		break;
2003	}
2004
2005	/* XXX align to an even tile row */
2006	if (!kgem->has_relaxed_fencing)
2007		tile_height *= 2;
2008
2009	return ALIGN(height, tile_height);
2010}
2011
2012static struct drm_i915_gem_exec_object2 *
2013kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
2014{
2015	struct drm_i915_gem_exec_object2 *exec;
2016
2017	DBG(("%s: handle=%d, index=%d\n",
2018	     __FUNCTION__, bo->handle, kgem->nexec));
2019
2020	assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
2021	bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
2022	exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
2023	exec->handle = bo->handle;
2024	exec->offset = bo->presumed_offset;
2025
2026	kgem->aperture += num_pages(bo);
2027
2028	return exec;
2029}
2030
2031static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
2032{
2033	assert(bo->refcnt);
2034	assert(bo->proxy == NULL);
2035
2036	bo->exec = kgem_add_handle(kgem, bo);
2037	bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
2038
2039	list_move_tail(&bo->request, &kgem->next_request->buffers);
2040	if (bo->io && !list_is_empty(&bo->list))
2041		list_move(&bo->list, &kgem->batch_buffers);
2042
2043	/* XXX is it worth working around gcc here? */
2044	kgem->flush |= bo->flush;
2045}
2046
2047static uint32_t kgem_end_batch(struct kgem *kgem)
2048{
2049	kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
2050	if (kgem->nbatch & 1)
2051		kgem->batch[kgem->nbatch++] = MI_NOOP;
2052
2053	return kgem->nbatch;
2054}
2055
2056static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
2057{
2058	struct kgem_bo_binding *b;
2059
2060	b = bo->binding.next;
2061	while (b) {
2062		struct kgem_bo_binding *next = b->next;
2063		free(b);
2064		b = next;
2065	}
2066}
2067
2068static void kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo)
2069{
2070	if (bo->scanout && bo->delta) {
2071		DBG(("%s: releasing fb=%d for handle=%d\n",
2072		     __FUNCTION__, bo->delta, bo->handle));
2073		/* XXX will leak if we are not DRM_MASTER. *shrug* */
2074		do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta);
2075		bo->delta = 0;
2076	}
2077}
2078
2079static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
2080{
2081	DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo)));
2082	assert(bo->refcnt == 0);
2083	assert(bo->proxy == NULL);
2084	assert(bo->exec == NULL);
2085	assert(!bo->snoop || bo->rq == NULL);
2086
2087#ifdef DEBUG_MEMORY
2088	kgem->debug_memory.bo_allocs--;
2089	kgem->debug_memory.bo_bytes -= bytes(bo);
2090#endif
2091
2092	kgem_bo_binding_free(kgem, bo);
2093	kgem_bo_rmfb(kgem, bo);
2094
2095	if (IS_USER_MAP(bo->map__cpu)) {
2096		assert(bo->rq == NULL);
2097		assert(!__kgem_busy(kgem, bo->handle));
2098		assert(MAP(bo->map__cpu) != bo || bo->io || bo->flush);
2099		if (!(bo->io || bo->flush)) {
2100			DBG(("%s: freeing snooped base\n", __FUNCTION__));
2101			assert(bo != MAP(bo->map__cpu));
2102			free(MAP(bo->map__cpu));
2103		}
2104		bo->map__cpu = NULL;
2105	}
2106
2107	DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n",
2108	     __FUNCTION__, bo->map__gtt, bo->map__cpu,
2109	     bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count));
2110
2111	if (!list_is_empty(&bo->vma)) {
2112		_list_del(&bo->vma);
2113		kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
2114	}
2115
2116	if (bo->map__gtt)
2117		munmap(bo->map__gtt, bytes(bo));
2118	if (bo->map__wc) {
2119		VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
2120		munmap(bo->map__wc, bytes(bo));
2121	}
2122	if (bo->map__cpu) {
2123		VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
2124		munmap(MAP(bo->map__cpu), bytes(bo));
2125	}
2126
2127	_list_del(&bo->list);
2128	_list_del(&bo->request);
2129	gem_close(kgem->fd, bo->handle);
2130
2131	if (!bo->io && !DBG_NO_MALLOC_CACHE) {
2132		*(struct kgem_bo **)bo = __kgem_freed_bo;
2133		__kgem_freed_bo = bo;
2134	} else
2135		free(bo);
2136}
2137
2138inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
2139					    struct kgem_bo *bo)
2140{
2141	DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
2142
2143	assert(bo->refcnt == 0);
2144	assert(bo->reusable);
2145	assert(bo->rq == NULL);
2146	assert(bo->exec == NULL);
2147	assert(bo->domain != DOMAIN_GPU);
2148	assert(!bo->proxy);
2149	assert(!bo->io);
2150	assert(!bo->scanout);
2151	assert(!bo->snoop);
2152	assert(!bo->flush);
2153	assert(!bo->needs_flush);
2154	assert(list_is_empty(&bo->vma));
2155	assert_tiling(kgem, bo);
2156	assert_cacheing(kgem, bo);
2157	ASSERT_IDLE(kgem, bo->handle);
2158
2159	if (bucket(bo) >= NUM_CACHE_BUCKETS) {
2160		if (bo->map__gtt) {
2161			munmap(bo->map__gtt, bytes(bo));
2162			bo->map__gtt = NULL;
2163		}
2164
2165		list_move(&bo->list, &kgem->large_inactive);
2166	} else {
2167		assert(bo->flush == false);
2168		assert(list_is_empty(&bo->vma));
2169		list_move(&bo->list, &kgem->inactive[bucket(bo)]);
2170		if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) {
2171			munmap(bo->map__gtt, bytes(bo));
2172			bo->map__gtt = NULL;
2173		}
2174		if (bo->map__gtt || (bo->map__wc && !bo->tiling)) {
2175			list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
2176			kgem->vma[0].count++;
2177		}
2178		if (bo->map__cpu && list_is_empty(&bo->vma)) {
2179			list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]);
2180			kgem->vma[1].count++;
2181		}
2182	}
2183
2184	kgem->need_expire = true;
2185}
2186
2187static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
2188{
2189	struct kgem_bo *base;
2190
2191	if (!bo->io)
2192		return bo;
2193
2194	assert(!bo->snoop);
2195	if (__kgem_freed_bo) {
2196		base = __kgem_freed_bo;
2197		__kgem_freed_bo = *(struct kgem_bo **)base;
2198	} else
2199		base = malloc(sizeof(*base));
2200	if (base) {
2201		DBG(("%s: transferring io handle=%d to bo\n",
2202		     __FUNCTION__, bo->handle));
2203		/* transfer the handle to a minimum bo */
2204		memcpy(base, bo, sizeof(*base));
2205		base->io = false;
2206		list_init(&base->list);
2207		list_replace(&bo->request, &base->request);
2208		list_replace(&bo->vma, &base->vma);
2209		free(bo);
2210		bo = base;
2211	} else
2212		bo->reusable = false;
2213
2214	return bo;
2215}
2216
2217inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
2218						struct kgem_bo *bo)
2219{
2220	DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
2221
2222	list_del(&bo->list);
2223	assert(bo->rq == NULL);
2224	assert(bo->exec == NULL);
2225	if (!list_is_empty(&bo->vma)) {
2226		assert(bo->map__gtt || bo->map__wc || bo->map__cpu);
2227		list_del(&bo->vma);
2228		kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
2229	}
2230}
2231
2232inline static void kgem_bo_remove_from_active(struct kgem *kgem,
2233					      struct kgem_bo *bo)
2234{
2235	DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
2236
2237	list_del(&bo->list);
2238	assert(bo->rq != NULL);
2239	if (RQ(bo->rq) == (void *)kgem) {
2240		assert(bo->exec == NULL);
2241		list_del(&bo->request);
2242	}
2243	assert(list_is_empty(&bo->vma));
2244}
2245
2246static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
2247{
2248	struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
2249
2250	DBG(("%s: size=%d, offset=%d, parent used=%d\n",
2251	     __FUNCTION__, bo->size.bytes, bo->delta, io->used));
2252
2253	if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used)
2254		io->used = bo->delta;
2255}
2256
2257static bool check_scanout_size(struct kgem *kgem,
2258			       struct kgem_bo *bo,
2259			       int width, int height)
2260{
2261	struct drm_mode_fb_cmd info;
2262
2263	assert(bo->scanout);
2264
2265	VG_CLEAR(info);
2266	info.fb_id = bo->delta;
2267
2268	if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_GETFB, &info))
2269		return false;
2270
2271	gem_close(kgem->fd, info.handle);
2272
2273	if (width != info.width || height != info.height) {
2274		DBG(("%s: not using scanout %d (%dx%d), want (%dx%d)\n",
2275		     __FUNCTION__,
2276		     info.fb_id, info.width, info.height,
2277		     width, height));
2278		return false;
2279	}
2280
2281	return true;
2282}
2283
2284static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
2285{
2286	assert(bo->refcnt == 0);
2287	assert(bo->scanout);
2288	assert(!bo->flush);
2289	assert(!bo->snoop);
2290	assert(!bo->io);
2291
2292	if (bo->purged) { /* for stolen fb */
2293		if (!bo->exec) {
2294			DBG(("%s: discarding purged scanout - stolen?\n",
2295			     __FUNCTION__));
2296			kgem_bo_free(kgem, bo);
2297		}
2298		return;
2299	}
2300
2301	DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n",
2302	     __FUNCTION__, bo->handle, bo->delta, bo->rq != NULL));
2303	if (bo->rq)
2304		list_move_tail(&bo->list, &kgem->scanout);
2305	else
2306		list_move(&bo->list, &kgem->scanout);
2307
2308	kgem->need_expire = true;
2309
2310}
2311
2312static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
2313{
2314	assert(bo->reusable);
2315	assert(!bo->scanout);
2316	assert(!bo->flush);
2317	assert(!bo->needs_flush);
2318	assert(bo->refcnt == 0);
2319	assert(bo->exec == NULL);
2320
2321	if (DBG_NO_SNOOP_CACHE) {
2322		kgem_bo_free(kgem, bo);
2323		return;
2324	}
2325
2326	if (num_pages(bo) > kgem->max_cpu_size >> 13) {
2327		DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
2328		     __FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
2329		kgem_bo_free(kgem, bo);
2330		return;
2331	}
2332
2333	assert(bo->tiling == I915_TILING_NONE);
2334	assert(bo->rq == NULL);
2335
2336	DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle));
2337	list_add(&bo->list, &kgem->snoop);
2338	kgem->need_expire = true;
2339}
2340
2341static bool kgem_bo_move_to_cache(struct kgem *kgem, struct kgem_bo *bo)
2342{
2343	bool retired = false;
2344
2345	DBG(("%s: release handle=%d\n", __FUNCTION__, bo->handle));
2346
2347	if (bo->prime) {
2348		DBG(("%s: discarding imported prime handle=%d\n",
2349		     __FUNCTION__, bo->handle));
2350		kgem_bo_free(kgem, bo);
2351	} else if (bo->snoop) {
2352		kgem_bo_move_to_snoop(kgem, bo);
2353	} else if (bo->scanout) {
2354		kgem_bo_move_to_scanout(kgem, bo);
2355	} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
2356		   kgem_bo_set_purgeable(kgem, bo)) {
2357		kgem_bo_move_to_inactive(kgem, bo);
2358		retired = true;
2359	} else
2360		kgem_bo_free(kgem, bo);
2361
2362	return retired;
2363}
2364
2365static struct kgem_bo *
2366search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
2367{
2368	struct kgem_bo *bo, *first = NULL;
2369
2370	DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags));
2371
2372	if ((kgem->has_caching | kgem->has_userptr) == 0)
2373		return NULL;
2374
2375	if (list_is_empty(&kgem->snoop)) {
2376		DBG(("%s: inactive and cache empty\n", __FUNCTION__));
2377		if (!__kgem_throttle_retire(kgem, flags)) {
2378			DBG(("%s: nothing retired\n", __FUNCTION__));
2379			return NULL;
2380		}
2381	}
2382
2383	list_for_each_entry(bo, &kgem->snoop, list) {
2384		assert(bo->refcnt == 0);
2385		assert(bo->snoop);
2386		assert(!bo->scanout);
2387		assert(!bo->purged);
2388		assert(bo->proxy == NULL);
2389		assert(bo->tiling == I915_TILING_NONE);
2390		assert(bo->rq == NULL);
2391		assert(bo->exec == NULL);
2392
2393		if (num_pages > num_pages(bo))
2394			continue;
2395
2396		if (num_pages(bo) > 2*num_pages) {
2397			if (first == NULL)
2398				first = bo;
2399			continue;
2400		}
2401
2402		list_del(&bo->list);
2403		bo->pitch = 0;
2404		bo->delta = 0;
2405
2406		DBG(("  %s: found handle=%d (num_pages=%d) in snoop cache\n",
2407		     __FUNCTION__, bo->handle, num_pages(bo)));
2408		return bo;
2409	}
2410
2411	if (first) {
2412		list_del(&first->list);
2413		first->pitch = 0;
2414		first->delta = 0;
2415
2416		DBG(("  %s: found handle=%d (num_pages=%d) in snoop cache\n",
2417		     __FUNCTION__, first->handle, num_pages(first)));
2418		return first;
2419	}
2420
2421	return NULL;
2422}
2423
2424void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo)
2425{
2426	if (kgem->nexec != 1 || bo->exec == NULL)
2427		return;
2428
2429	assert(bo);
2430	DBG(("%s: only handle in batch, discarding last operations for handle=%d\n",
2431	     __FUNCTION__, bo->handle));
2432
2433	assert(bo->exec == &kgem->exec[0]);
2434	assert(kgem->exec[0].handle == bo->handle);
2435	assert(RQ(bo->rq) == kgem->next_request);
2436
2437	bo->refcnt++;
2438	kgem_reset(kgem);
2439	bo->refcnt--;
2440
2441	assert(kgem->nreloc == 0);
2442	assert(kgem->nexec == 0);
2443	assert(bo->exec == NULL);
2444}
2445
2446void kgem_bo_pair_undo(struct kgem *kgem, struct kgem_bo *a, struct kgem_bo *b)
2447{
2448	if (kgem->nexec > 2)
2449		return;
2450
2451	if (kgem->nexec == 1) {
2452		if (a)
2453			kgem_bo_undo(kgem, a);
2454		if (b)
2455			kgem_bo_undo(kgem, b);
2456		return;
2457	}
2458
2459	if (a == NULL || b == NULL)
2460		return;
2461	if (a->exec == NULL || b->exec == NULL)
2462		return;
2463
2464	DBG(("%s: only handles in batch, discarding last operations for handle=%d and handle=%d\n",
2465	     __FUNCTION__, a->handle, b->handle));
2466
2467	assert(a->exec == &kgem->exec[0] || a->exec == &kgem->exec[1]);
2468	assert(a->handle == kgem->exec[0].handle || a->handle == kgem->exec[1].handle);
2469	assert(RQ(a->rq) == kgem->next_request);
2470	assert(b->exec == &kgem->exec[0] || b->exec == &kgem->exec[1]);
2471	assert(b->handle == kgem->exec[0].handle || b->handle == kgem->exec[1].handle);
2472	assert(RQ(b->rq) == kgem->next_request);
2473
2474	a->refcnt++;
2475	b->refcnt++;
2476	kgem_reset(kgem);
2477	b->refcnt--;
2478	a->refcnt--;
2479
2480	assert(kgem->nreloc == 0);
2481	assert(kgem->nexec == 0);
2482	assert(a->exec == NULL);
2483	assert(b->exec == NULL);
2484}
2485
2486static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
2487{
2488	DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo)));
2489
2490	assert(list_is_empty(&bo->list));
2491	assert(bo->refcnt == 0);
2492	assert(bo->proxy == NULL);
2493	assert(bo->active_scanout == 0);
2494	assert_tiling(kgem, bo);
2495
2496	bo->binding.offset = 0;
2497
2498	if (DBG_NO_CACHE)
2499		goto destroy;
2500
2501	if (bo->prime)
2502		goto destroy;
2503
2504	if (bo->snoop && !bo->flush) {
2505		DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
2506		assert(bo->reusable);
2507		assert(list_is_empty(&bo->list));
2508		if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
2509			__kgem_bo_clear_busy(bo);
2510		if (bo->rq == NULL)
2511			kgem_bo_move_to_snoop(kgem, bo);
2512		return;
2513	}
2514	if (!IS_USER_MAP(bo->map__cpu))
2515		bo->flush = false;
2516
2517	if (bo->scanout) {
2518		kgem_bo_move_to_scanout(kgem, bo);
2519		return;
2520	}
2521
2522	if (bo->io)
2523		bo = kgem_bo_replace_io(bo);
2524	if (!bo->reusable) {
2525		DBG(("%s: handle=%d, not reusable\n",
2526		     __FUNCTION__, bo->handle));
2527		goto destroy;
2528	}
2529
2530	assert(list_is_empty(&bo->vma));
2531	assert(list_is_empty(&bo->list));
2532	assert(bo->flush == false);
2533	assert(bo->snoop == false);
2534	assert(bo->io == false);
2535	assert(bo->scanout == false);
2536	assert_cacheing(kgem, bo);
2537
2538	kgem_bo_undo(kgem, bo);
2539	assert(bo->refcnt == 0);
2540
2541	if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
2542		__kgem_bo_clear_busy(bo);
2543
2544	if (bo->rq) {
2545		struct list *cache;
2546
2547		DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
2548		if (bucket(bo) < NUM_CACHE_BUCKETS)
2549			cache = &kgem->active[bucket(bo)][bo->tiling];
2550		else
2551			cache = &kgem->large;
2552		list_add(&bo->list, cache);
2553		return;
2554	}
2555
2556	assert(bo->exec == NULL);
2557	assert(list_is_empty(&bo->request));
2558
2559	if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) {
2560		if (!kgem_bo_set_purgeable(kgem, bo))
2561			goto destroy;
2562
2563		if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
2564			goto destroy;
2565
2566		DBG(("%s: handle=%d, purged\n",
2567		     __FUNCTION__, bo->handle));
2568	}
2569
2570	kgem_bo_move_to_inactive(kgem, bo);
2571	return;
2572
2573destroy:
2574	if (!bo->exec)
2575		kgem_bo_free(kgem, bo);
2576}
2577
2578static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
2579{
2580	assert(bo->refcnt);
2581	if (--bo->refcnt == 0)
2582		__kgem_bo_destroy(kgem, bo);
2583}
2584
2585static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
2586{
2587	assert(bo->base.io);
2588	while (!list_is_empty(&bo->base.vma)) {
2589		struct kgem_bo *cached;
2590
2591		cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
2592		assert(cached->proxy == &bo->base);
2593		assert(cached != &bo->base);
2594		list_del(&cached->vma);
2595
2596		assert(*(struct kgem_bo **)cached->map__gtt == cached);
2597		*(struct kgem_bo **)cached->map__gtt = NULL;
2598		cached->map__gtt = NULL;
2599
2600		kgem_bo_destroy(kgem, cached);
2601	}
2602}
2603
2604void kgem_retire__buffers(struct kgem *kgem)
2605{
2606	while (!list_is_empty(&kgem->active_buffers)) {
2607		struct kgem_buffer *bo =
2608			list_last_entry(&kgem->active_buffers,
2609					struct kgem_buffer,
2610					base.list);
2611
2612		DBG(("%s: handle=%d, busy? %d [%d]\n",
2613		     __FUNCTION__, bo->base.handle, bo->base.rq != NULL, bo->base.exec != NULL));
2614
2615		assert(bo->base.exec == NULL || RQ(bo->base.rq) == kgem->next_request);
2616		if (bo->base.rq)
2617			break;
2618
2619		DBG(("%s: releasing upload cache for handle=%d? %d\n",
2620		     __FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
2621		list_del(&bo->base.list);
2622		kgem_buffer_release(kgem, bo);
2623		kgem_bo_unref(kgem, &bo->base);
2624	}
2625}
2626
2627static bool kgem_retire__flushing(struct kgem *kgem)
2628{
2629	struct kgem_bo *bo, *next;
2630	bool retired = false;
2631
2632	list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
2633		assert(RQ(bo->rq) == (void *)kgem);
2634		assert(bo->exec == NULL);
2635
2636		if (__kgem_busy(kgem, bo->handle))
2637			break;
2638
2639		__kgem_bo_clear_busy(bo);
2640
2641		if (bo->refcnt)
2642			continue;
2643
2644		retired |= kgem_bo_move_to_cache(kgem, bo);
2645	}
2646#if HAS_DEBUG_FULL
2647	{
2648		int count = 0;
2649		list_for_each_entry(bo, &kgem->flushing, request)
2650			count++;
2651		DBG(("%s: %d bo on flushing list\n", __FUNCTION__, count));
2652	}
2653#endif
2654
2655	kgem->need_retire |= !list_is_empty(&kgem->flushing);
2656
2657	return retired;
2658}
2659
2660static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
2661{
2662	bool retired = false;
2663
2664	DBG(("%s: request %d complete\n",
2665	     __FUNCTION__, rq->bo->handle));
2666	assert(RQ(rq->bo->rq) == rq);
2667
2668	if (rq == kgem->fence[rq->ring])
2669		kgem->fence[rq->ring] = NULL;
2670
2671	while (!list_is_empty(&rq->buffers)) {
2672		struct kgem_bo *bo;
2673
2674		bo = list_first_entry(&rq->buffers,
2675				      struct kgem_bo,
2676				      request);
2677
2678		assert(RQ(bo->rq) == rq);
2679		assert(bo->exec == NULL);
2680		assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
2681
2682		list_del(&bo->request);
2683
2684		if (bo->needs_flush)
2685			bo->needs_flush = __kgem_busy(kgem, bo->handle);
2686		if (bo->needs_flush) {
2687			DBG(("%s: moving %d to flushing\n",
2688			     __FUNCTION__, bo->handle));
2689			list_add(&bo->request, &kgem->flushing);
2690			bo->rq = MAKE_REQUEST(kgem, RQ_RING(bo->rq));
2691			kgem->need_retire = true;
2692			continue;
2693		}
2694
2695		bo->domain = DOMAIN_NONE;
2696		bo->gtt_dirty = false;
2697		bo->rq = NULL;
2698		if (bo->refcnt)
2699			continue;
2700
2701		retired |= kgem_bo_move_to_cache(kgem, bo);
2702	}
2703
2704	assert(rq->bo->rq == NULL);
2705	assert(rq->bo->exec == NULL);
2706	assert(list_is_empty(&rq->bo->request));
2707	assert(rq->bo->refcnt > 0);
2708
2709	if (--rq->bo->refcnt == 0) {
2710		if (kgem_bo_set_purgeable(kgem, rq->bo)) {
2711			kgem_bo_move_to_inactive(kgem, rq->bo);
2712			retired = true;
2713		} else {
2714			DBG(("%s: closing %d\n",
2715			     __FUNCTION__, rq->bo->handle));
2716			kgem_bo_free(kgem, rq->bo);
2717		}
2718	}
2719
2720	__kgem_request_free(rq);
2721	return retired;
2722}
2723
2724static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
2725{
2726	bool retired = false;
2727
2728	while (!list_is_empty(&kgem->requests[ring])) {
2729		struct kgem_request *rq;
2730
2731		rq = list_first_entry(&kgem->requests[ring],
2732				      struct kgem_request,
2733				      list);
2734		assert(rq->ring == ring);
2735		if (__kgem_busy(kgem, rq->bo->handle))
2736			break;
2737
2738		retired |= __kgem_retire_rq(kgem, rq);
2739	}
2740
2741#if HAS_DEBUG_FULL
2742	{
2743		struct kgem_bo *bo;
2744		int count = 0;
2745
2746		list_for_each_entry(bo, &kgem->requests[ring], request)
2747			count++;
2748
2749		bo = NULL;
2750		if (!list_is_empty(&kgem->requests[ring]))
2751			bo = list_first_entry(&kgem->requests[ring],
2752					      struct kgem_request,
2753					      list)->bo;
2754
2755		DBG(("%s: ring=%d, %d outstanding requests, oldest=%d\n",
2756		     __FUNCTION__, ring, count, bo ? bo->handle : 0));
2757	}
2758#endif
2759
2760	return retired;
2761}
2762
2763static bool kgem_retire__requests(struct kgem *kgem)
2764{
2765	bool retired = false;
2766	int n;
2767
2768	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
2769		retired |= kgem_retire__requests_ring(kgem, n);
2770		kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
2771	}
2772
2773	return retired;
2774}
2775
2776bool kgem_retire(struct kgem *kgem)
2777{
2778	bool retired = false;
2779
2780	DBG(("%s, need_retire?=%d\n", __FUNCTION__, kgem->need_retire));
2781
2782	kgem->need_retire = false;
2783
2784	retired |= kgem_retire__flushing(kgem);
2785	retired |= kgem_retire__requests(kgem);
2786
2787	DBG(("%s -- retired=%d, need_retire=%d\n",
2788	     __FUNCTION__, retired, kgem->need_retire));
2789
2790	kgem->retire(kgem);
2791
2792	return retired;
2793}
2794
2795bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
2796{
2797	struct kgem_request *rq;
2798
2799	assert(ring < ARRAY_SIZE(kgem->requests));
2800	assert(!list_is_empty(&kgem->requests[ring]));
2801
2802	rq = kgem->fence[ring];
2803	if (rq) {
2804		struct kgem_request *tmp;
2805
2806		if (__kgem_busy(kgem, rq->bo->handle)) {
2807			DBG(("%s: last fence handle=%d still busy\n",
2808			     __FUNCTION__, rq->bo->handle));
2809			return false;
2810		}
2811
2812		do {
2813			tmp = list_first_entry(&kgem->requests[ring],
2814					       struct kgem_request,
2815					       list);
2816			assert(tmp->ring == ring);
2817			__kgem_retire_rq(kgem, tmp);
2818		} while (tmp != rq);
2819
2820		assert(kgem->fence[ring] == NULL);
2821		if (list_is_empty(&kgem->requests[ring]))
2822			return true;
2823	}
2824
2825	rq = list_last_entry(&kgem->requests[ring],
2826			     struct kgem_request, list);
2827	assert(rq->ring == ring);
2828	if (__kgem_busy(kgem, rq->bo->handle)) {
2829		DBG(("%s: last requests handle=%d still busy\n",
2830		     __FUNCTION__, rq->bo->handle));
2831		kgem->fence[ring] = rq;
2832		return false;
2833	}
2834
2835	DBG(("%s: ring=%d idle (handle=%d)\n",
2836	     __FUNCTION__, ring, rq->bo->handle));
2837
2838	while (!list_is_empty(&kgem->requests[ring])) {
2839		rq = list_first_entry(&kgem->requests[ring],
2840				      struct kgem_request,
2841				      list);
2842		assert(rq->ring == ring);
2843		__kgem_retire_rq(kgem, rq);
2844	}
2845
2846	return true;
2847}
2848
2849void __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo)
2850{
2851	struct kgem_request *rq = bo->rq, *tmp;
2852	struct list *requests = &kgem->requests[RQ_RING(rq) == I915_EXEC_BLT];
2853
2854	rq = RQ(rq);
2855	assert(rq != &kgem->static_request);
2856	if (rq == (struct kgem_request *)kgem) {
2857		__kgem_bo_clear_busy(bo);
2858		return;
2859	}
2860
2861	do {
2862		tmp = list_first_entry(requests, struct kgem_request, list);
2863		assert(tmp->ring == rq->ring);
2864		__kgem_retire_rq(kgem, tmp);
2865	} while (tmp != rq);
2866}
2867
2868#if 0
2869static void kgem_commit__check_reloc(struct kgem *kgem)
2870{
2871	struct kgem_request *rq = kgem->next_request;
2872	struct kgem_bo *bo;
2873	bool has_64bit = kgem->gen >= 0100;
2874	int i;
2875
2876	for (i = 0; i < kgem->nreloc; i++) {
2877		list_for_each_entry(bo, &rq->buffers, request) {
2878			if (bo->target_handle == kgem->reloc[i].target_handle) {
2879				uint64_t value = 0;
2880				gem_read(kgem->fd, rq->bo->handle, &value, kgem->reloc[i].offset, has_64bit ? 8 : 4);
2881				assert(bo->exec->offset == -1 || value == bo->exec->offset + (int)kgem->reloc[i].delta);
2882				break;
2883			}
2884		}
2885	}
2886}
2887#else
2888#define kgem_commit__check_reloc(kgem)
2889#endif
2890
2891#ifndef NDEBUG
2892static void kgem_commit__check_buffers(struct kgem *kgem)
2893{
2894	struct kgem_buffer *bo;
2895
2896	list_for_each_entry(bo, &kgem->active_buffers, base.list)
2897		assert(bo->base.exec == NULL);
2898}
2899#else
2900#define kgem_commit__check_buffers(kgem)
2901#endif
2902
2903static void kgem_commit(struct kgem *kgem)
2904{
2905	struct kgem_request *rq = kgem->next_request;
2906	struct kgem_bo *bo, *next;
2907
2908	kgem_commit__check_reloc(kgem);
2909
2910	list_for_each_entry_safe(bo, next, &rq->buffers, request) {
2911		assert(next->request.prev == &bo->request);
2912
2913		DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n",
2914		     __FUNCTION__, bo->handle, bo->proxy != NULL,
2915		     bo->gpu_dirty, bo->needs_flush, bo->snoop,
2916		     (unsigned)bo->exec->offset));
2917
2918		assert(bo->exec);
2919		assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
2920		assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
2921
2922		bo->presumed_offset = bo->exec->offset;
2923		bo->exec = NULL;
2924		bo->target_handle = -1;
2925
2926		if (!bo->refcnt && !bo->reusable) {
2927			assert(!bo->snoop);
2928			assert(!bo->proxy);
2929			kgem_bo_free(kgem, bo);
2930			continue;
2931		}
2932
2933		bo->binding.offset = 0;
2934		bo->domain = DOMAIN_GPU;
2935		bo->gpu_dirty = false;
2936
2937		if (bo->proxy) {
2938			/* proxies are not used for domain tracking */
2939			__kgem_bo_clear_busy(bo);
2940		}
2941
2942		kgem->scanout_busy |= bo->scanout && bo->needs_flush;
2943	}
2944
2945	if (rq == &kgem->static_request) {
2946		struct drm_i915_gem_set_domain set_domain;
2947
2948		DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
2949
2950		VG_CLEAR(set_domain);
2951		set_domain.handle = rq->bo->handle;
2952		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
2953		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
2954		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
2955			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
2956			kgem_throttle(kgem);
2957		}
2958
2959		kgem_retire(kgem);
2960		assert(list_is_empty(&rq->buffers));
2961
2962		assert(rq->bo->map__gtt == NULL);
2963		assert(rq->bo->map__wc == NULL);
2964		assert(rq->bo->map__cpu == NULL);
2965		gem_close(kgem->fd, rq->bo->handle);
2966		kgem_cleanup_cache(kgem);
2967	} else {
2968		assert(rq->ring < ARRAY_SIZE(kgem->requests));
2969		list_add_tail(&rq->list, &kgem->requests[rq->ring]);
2970		kgem->need_throttle = kgem->need_retire = 1;
2971
2972		if (kgem->fence[rq->ring] == NULL &&
2973		    __kgem_busy(kgem, rq->bo->handle))
2974			kgem->fence[rq->ring] = rq;
2975	}
2976
2977	kgem->next_request = NULL;
2978
2979	kgem_commit__check_buffers(kgem);
2980}
2981
2982static void kgem_close_list(struct kgem *kgem, struct list *head)
2983{
2984	while (!list_is_empty(head))
2985		kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list));
2986}
2987
2988static void kgem_close_inactive(struct kgem *kgem)
2989{
2990	unsigned int i;
2991
2992	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
2993		kgem_close_list(kgem, &kgem->inactive[i]);
2994}
2995
2996static void kgem_finish_buffers(struct kgem *kgem)
2997{
2998	struct kgem_buffer *bo, *next;
2999
3000	list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
3001		DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s, refcnt=%d\n",
3002		     __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
3003		     bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no",
3004		     bo->base.refcnt));
3005
3006		assert(next->base.list.prev == &bo->base.list);
3007		assert(bo->base.io);
3008		assert(bo->base.refcnt >= 1);
3009
3010		if (bo->base.refcnt > 1 && !bo->base.exec) {
3011			DBG(("%s: skipping unattached handle=%d, used=%d, refcnt=%d\n",
3012			     __FUNCTION__, bo->base.handle, bo->used, bo->base.refcnt));
3013			continue;
3014		}
3015
3016		if (!bo->write) {
3017			assert(bo->base.exec || bo->base.refcnt > 1);
3018			goto decouple;
3019		}
3020
3021		if (bo->mmapped) {
3022			uint32_t used;
3023
3024			assert(!bo->need_io);
3025
3026			used = ALIGN(bo->used, PAGE_SIZE);
3027			if (!DBG_NO_UPLOAD_ACTIVE &&
3028			    used + PAGE_SIZE <= bytes(&bo->base) &&
3029			    (kgem->has_llc || bo->mmapped == MMAPPED_GTT || bo->base.snoop)) {
3030				DBG(("%s: retaining upload buffer (%d/%d): used=%d, refcnt=%d\n",
3031				     __FUNCTION__, bo->used, bytes(&bo->base), used, bo->base.refcnt));
3032				bo->used = used;
3033				list_move(&bo->base.list,
3034					  &kgem->active_buffers);
3035				kgem->need_retire = true;
3036				continue;
3037			}
3038			DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
3039			     __FUNCTION__, bo->used, bo->mmapped));
3040			goto decouple;
3041		}
3042
3043		if (!bo->used || !bo->base.exec) {
3044			/* Unless we replace the handle in the execbuffer,
3045			 * then this bo will become active. So decouple it
3046			 * from the buffer list and track it in the normal
3047			 * manner.
3048			 */
3049			goto decouple;
3050		}
3051
3052		assert(bo->need_io);
3053		assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
3054		assert(bo->base.domain != DOMAIN_GPU);
3055
3056		if (bo->base.refcnt == 1 &&
3057		    bo->base.size.pages.count > 1 &&
3058		    bo->used < bytes(&bo->base) / 2) {
3059			struct kgem_bo *shrink;
3060			unsigned alloc = NUM_PAGES(bo->used);
3061
3062			shrink = search_snoop_cache(kgem, alloc,
3063						    CREATE_INACTIVE | CREATE_NO_RETIRE);
3064			if (shrink) {
3065				void *map;
3066				int n;
3067
3068				DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
3069				     __FUNCTION__,
3070				     bo->used, bytes(&bo->base), bytes(shrink),
3071				     bo->base.handle, shrink->handle));
3072
3073				assert(bo->used <= bytes(shrink));
3074				map = kgem_bo_map__cpu(kgem, shrink);
3075				if (map) {
3076					kgem_bo_sync__cpu(kgem, shrink);
3077					memcpy(map, bo->mem, bo->used);
3078
3079					shrink->target_handle =
3080						kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
3081					for (n = 0; n < kgem->nreloc; n++) {
3082						if (kgem->reloc[n].target_handle == bo->base.target_handle) {
3083							kgem->reloc[n].target_handle = shrink->target_handle;
3084							kgem->reloc[n].presumed_offset = shrink->presumed_offset;
3085							kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
3086								kgem->reloc[n].delta + shrink->presumed_offset;
3087						}
3088					}
3089
3090					bo->base.exec->handle = shrink->handle;
3091					bo->base.exec->offset = shrink->presumed_offset;
3092					shrink->exec = bo->base.exec;
3093					shrink->rq = bo->base.rq;
3094					list_replace(&bo->base.request,
3095						     &shrink->request);
3096					list_init(&bo->base.request);
3097					shrink->needs_flush = bo->base.gpu_dirty;
3098
3099					bo->base.exec = NULL;
3100					bo->base.rq = NULL;
3101					bo->base.gpu_dirty = false;
3102					bo->base.needs_flush = false;
3103					bo->used = 0;
3104
3105					goto decouple;
3106				}
3107
3108				__kgem_bo_destroy(kgem, shrink);
3109			}
3110
3111			shrink = search_linear_cache(kgem, alloc,
3112						     CREATE_INACTIVE | CREATE_NO_RETIRE);
3113			if (shrink) {
3114				int n;
3115
3116				DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
3117				     __FUNCTION__,
3118				     bo->used, bytes(&bo->base), bytes(shrink),
3119				     bo->base.handle, shrink->handle));
3120
3121				assert(bo->used <= bytes(shrink));
3122				if (gem_write__cachealigned(kgem->fd, shrink->handle,
3123							    0, bo->used, bo->mem) == 0) {
3124					shrink->target_handle =
3125						kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
3126					for (n = 0; n < kgem->nreloc; n++) {
3127						if (kgem->reloc[n].target_handle == bo->base.target_handle) {
3128							kgem->reloc[n].target_handle = shrink->target_handle;
3129							kgem->reloc[n].presumed_offset = shrink->presumed_offset;
3130							kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
3131								kgem->reloc[n].delta + shrink->presumed_offset;
3132						}
3133					}
3134
3135					bo->base.exec->handle = shrink->handle;
3136					bo->base.exec->offset = shrink->presumed_offset;
3137					shrink->exec = bo->base.exec;
3138					shrink->rq = bo->base.rq;
3139					list_replace(&bo->base.request,
3140						     &shrink->request);
3141					list_init(&bo->base.request);
3142					shrink->needs_flush = bo->base.gpu_dirty;
3143
3144					bo->base.exec = NULL;
3145					bo->base.rq = NULL;
3146					bo->base.gpu_dirty = false;
3147					bo->base.needs_flush = false;
3148					bo->used = 0;
3149
3150					goto decouple;
3151				}
3152
3153				__kgem_bo_destroy(kgem, shrink);
3154			}
3155		}
3156
3157		DBG(("%s: handle=%d, uploading %d/%d\n",
3158		     __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
3159		ASSERT_IDLE(kgem, bo->base.handle);
3160		assert(bo->used <= bytes(&bo->base));
3161		gem_write__cachealigned(kgem->fd, bo->base.handle,
3162					0, bo->used, bo->mem);
3163		bo->need_io = 0;
3164
3165decouple:
3166		DBG(("%s: releasing handle=%d\n",
3167		     __FUNCTION__, bo->base.handle));
3168		list_del(&bo->base.list);
3169		kgem_bo_unref(kgem, &bo->base);
3170	}
3171}
3172
3173static void kgem_cleanup(struct kgem *kgem)
3174{
3175	int n;
3176
3177	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
3178		while (!list_is_empty(&kgem->requests[n])) {
3179			struct kgem_request *rq;
3180
3181			rq = list_first_entry(&kgem->requests[n],
3182					      struct kgem_request,
3183					      list);
3184			assert(rq->ring == n);
3185			while (!list_is_empty(&rq->buffers)) {
3186				struct kgem_bo *bo;
3187
3188				bo = list_first_entry(&rq->buffers,
3189						      struct kgem_bo,
3190						      request);
3191
3192				bo->exec = NULL;
3193				bo->gpu_dirty = false;
3194				__kgem_bo_clear_busy(bo);
3195				if (bo->refcnt == 0)
3196					kgem_bo_free(kgem, bo);
3197			}
3198
3199			__kgem_request_free(rq);
3200		}
3201	}
3202
3203	kgem_close_inactive(kgem);
3204}
3205
3206static int
3207kgem_batch_write(struct kgem *kgem,
3208		 struct kgem_bo *bo,
3209		 uint32_t size)
3210{
3211	char *ptr;
3212	int ret;
3213
3214	ASSERT_IDLE(kgem, bo->handle);
3215
3216#if DBG_NO_EXEC
3217	{
3218		uint32_t batch[] = { MI_BATCH_BUFFER_END, 0};
3219		return gem_write(kgem->fd, bo->handle, 0, sizeof(batch), batch);
3220	}
3221#endif
3222
3223	assert(!bo->scanout);
3224retry:
3225	ptr = NULL;
3226	if (bo->domain == DOMAIN_CPU || kgem->has_llc) {
3227		ptr = bo->map__cpu;
3228		if (ptr == NULL)
3229			ptr = __kgem_bo_map__cpu(kgem, bo);
3230	} else if (kgem->has_wc_mmap) {
3231		ptr = bo->map__wc;
3232		if (ptr == NULL)
3233			ptr = __kgem_bo_map__wc(kgem, bo);
3234	}
3235	if (ptr) {
3236		memcpy(ptr, kgem->batch, sizeof(uint32_t)*kgem->nbatch);
3237		if (kgem->surface != kgem->batch_size) {
3238			ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
3239			ret -= sizeof(uint32_t) * kgem->surface;
3240			ptr += size - ret;
3241			memcpy(ptr, kgem->batch + kgem->surface,
3242			       (kgem->batch_size - kgem->surface)*sizeof(uint32_t));
3243		}
3244		return 0;
3245	}
3246
3247	/* If there is no surface data, just upload the batch */
3248	if (kgem->surface == kgem->batch_size) {
3249		if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
3250						   0, sizeof(uint32_t)*kgem->nbatch,
3251						   kgem->batch)) == 0)
3252			return 0;
3253
3254		goto expire;
3255	}
3256
3257	/* Are the batch pages conjoint with the surface pages? */
3258	if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
3259		assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
3260		if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
3261						   0, kgem->batch_size*sizeof(uint32_t),
3262						   kgem->batch)) == 0)
3263			return 0;
3264
3265		goto expire;
3266	}
3267
3268	/* Disjoint surface/batch, upload separately */
3269	if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
3270					   0, sizeof(uint32_t)*kgem->nbatch,
3271					   kgem->batch)))
3272		goto expire;
3273
3274	ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
3275	ret -= sizeof(uint32_t) * kgem->surface;
3276	assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
3277	if (gem_write(kgem->fd, bo->handle,
3278		      size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
3279		      kgem->batch + kgem->surface))
3280		goto expire;
3281
3282	return 0;
3283
3284expire:
3285	assert(ret != EINVAL);
3286
3287	(void)__kgem_throttle_retire(kgem, 0);
3288	if (kgem_expire_cache(kgem))
3289		goto retry;
3290
3291	if (kgem_cleanup_cache(kgem))
3292		goto retry;
3293
3294	ERR(("%s: failed to write batch (handle=%d): %d\n",
3295	     __FUNCTION__, bo->handle, -ret));
3296	return ret;
3297}
3298
3299void kgem_reset(struct kgem *kgem)
3300{
3301	if (kgem->next_request) {
3302		struct kgem_request *rq = kgem->next_request;
3303
3304		while (!list_is_empty(&rq->buffers)) {
3305			struct kgem_bo *bo =
3306				list_first_entry(&rq->buffers,
3307						 struct kgem_bo,
3308						 request);
3309			list_del(&bo->request);
3310
3311			assert(RQ(bo->rq) == rq);
3312
3313			bo->binding.offset = 0;
3314			bo->exec = NULL;
3315			bo->target_handle = -1;
3316			bo->gpu_dirty = false;
3317
3318			if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
3319				assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
3320				list_add(&bo->request, &kgem->flushing);
3321				bo->rq = (void *)kgem;
3322				kgem->need_retire = true;
3323			} else
3324				__kgem_bo_clear_busy(bo);
3325
3326			if (bo->refcnt || bo->rq)
3327				continue;
3328
3329			kgem_bo_move_to_cache(kgem, bo);
3330		}
3331
3332		if (rq != &kgem->static_request) {
3333			list_init(&rq->list);
3334			__kgem_request_free(rq);
3335		}
3336	}
3337
3338	kgem->nfence = 0;
3339	kgem->nexec = 0;
3340	kgem->nreloc = 0;
3341	kgem->nreloc__self = 0;
3342	kgem->aperture = 0;
3343	kgem->aperture_fenced = 0;
3344	kgem->aperture_max_fence = 0;
3345	kgem->nbatch = 0;
3346	kgem->surface = kgem->batch_size;
3347	kgem->mode = KGEM_NONE;
3348	kgem->needs_semaphore = false;
3349	kgem->needs_reservation = false;
3350	kgem->flush = 0;
3351	kgem->batch_flags = kgem->batch_flags_base;
3352	assert(kgem->batch);
3353
3354	kgem->next_request = __kgem_request_alloc(kgem);
3355
3356	kgem_sna_reset(kgem);
3357}
3358
3359static int compact_batch_surface(struct kgem *kgem, int *shrink)
3360{
3361	int size, n;
3362
3363	if (!kgem->has_relaxed_delta)
3364		return kgem->batch_size * sizeof(uint32_t);
3365
3366	/* See if we can pack the contents into one or two pages */
3367	n = ALIGN(kgem->batch_size, 1024);
3368	size = n - kgem->surface + kgem->nbatch;
3369	size = ALIGN(size, 1024);
3370
3371	*shrink = (n - size) * sizeof(uint32_t);
3372	return size * sizeof(uint32_t);
3373}
3374
3375static struct kgem_bo *
3376kgem_create_batch(struct kgem *kgem)
3377{
3378#if !DBG_NO_SHRINK_BATCHES
3379	struct drm_i915_gem_set_domain set_domain;
3380	struct kgem_bo *bo;
3381	int shrink = 0;
3382	int size;
3383
3384	if (kgem->surface != kgem->batch_size)
3385		size = compact_batch_surface(kgem, &shrink);
3386	else
3387		size = kgem->nbatch * sizeof(uint32_t);
3388
3389	if (size <= 4096) {
3390		bo = list_first_entry(&kgem->pinned_batches[0],
3391				      struct kgem_bo,
3392				      list);
3393		if (!bo->rq) {
3394out_4096:
3395			assert(bo->refcnt > 0);
3396			list_move_tail(&bo->list, &kgem->pinned_batches[0]);
3397			bo = kgem_bo_reference(bo);
3398			goto write;
3399		}
3400
3401		if (!__kgem_busy(kgem, bo->handle)) {
3402			assert(RQ(bo->rq)->bo == bo);
3403			__kgem_retire_rq(kgem, RQ(bo->rq));
3404			goto out_4096;
3405		}
3406	}
3407
3408	if (size <= 16384) {
3409		bo = list_first_entry(&kgem->pinned_batches[1],
3410				      struct kgem_bo,
3411				      list);
3412		if (!bo->rq) {
3413out_16384:
3414			assert(bo->refcnt > 0);
3415			list_move_tail(&bo->list, &kgem->pinned_batches[1]);
3416			bo = kgem_bo_reference(bo);
3417			goto write;
3418		}
3419
3420		if (!__kgem_busy(kgem, bo->handle)) {
3421			__kgem_retire_rq(kgem, RQ(bo->rq));
3422			goto out_16384;
3423		}
3424	}
3425
3426	if (kgem->gen == 020) {
3427		bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
3428		if (bo)
3429			goto write;
3430
3431		/* Nothing available for reuse, rely on the kernel wa */
3432		if (kgem->has_pinned_batches) {
3433			bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
3434			if (bo) {
3435				kgem->batch_flags &= ~LOCAL_I915_EXEC_IS_PINNED;
3436				goto write;
3437			}
3438		}
3439
3440		if (size < 16384) {
3441			bo = list_first_entry(&kgem->pinned_batches[size > 4096],
3442					      struct kgem_bo,
3443					      list);
3444			list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
3445
3446			DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
3447
3448			VG_CLEAR(set_domain);
3449			set_domain.handle = bo->handle;
3450			set_domain.read_domains = I915_GEM_DOMAIN_GTT;
3451			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
3452			if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
3453				DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
3454				kgem_throttle(kgem);
3455				return NULL;
3456			}
3457
3458			kgem_retire(kgem);
3459			assert(bo->rq == NULL);
3460			bo = kgem_bo_reference(bo);
3461			goto write;
3462		}
3463	}
3464
3465	bo = NULL;
3466	if (!kgem->has_llc) {
3467		bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
3468		if (bo) {
3469write:
3470			kgem_fixup_relocs(kgem, bo, shrink);
3471			if (kgem_batch_write(kgem, bo, size)) {
3472				kgem_bo_destroy(kgem, bo);
3473				return NULL;
3474			}
3475		}
3476	}
3477	if (bo == NULL)
3478		bo = kgem_new_batch(kgem);
3479	return bo;
3480#else
3481	return kgem_new_batch(kgem);
3482#endif
3483}
3484
3485#if !NDEBUG
3486static bool dump_file(const char *path)
3487{
3488	FILE *file;
3489	size_t len = 0;
3490	char *line = NULL;
3491
3492	file = fopen(path, "r");
3493	if (file == NULL)
3494		return false;
3495
3496	while (getline(&line, &len, file) != -1)
3497		ErrorF("%s", line);
3498
3499	free(line);
3500	fclose(file);
3501	return true;
3502}
3503
3504static void dump_debugfs(struct kgem *kgem, const char *name)
3505{
3506	char path[80];
3507	int minor = kgem_get_minor(kgem);
3508
3509	if (minor < 0)
3510		return;
3511
3512	sprintf(path, "/sys/kernel/debug/dri/%d/%s", minor, name);
3513	if (dump_file(path))
3514		return;
3515
3516	sprintf(path, "/debug/dri/%d/%s", minor, name);
3517	if (dump_file(path))
3518		return;
3519}
3520
3521static void dump_gtt_info(struct kgem *kgem)
3522{
3523	dump_debugfs(kgem, "i915_gem_gtt");
3524}
3525
3526static void dump_fence_regs(struct kgem *kgem)
3527{
3528	dump_debugfs(kgem, "i915_gem_fence_regs");
3529}
3530#endif
3531
3532static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf)
3533{
3534	int ret, err;
3535
3536retry:
3537	ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
3538	if (ret == 0)
3539		return 0;
3540
3541	DBG(("%s: failed ret=%d, throttling and discarding cache\n", __FUNCTION__, ret));
3542	(void)__kgem_throttle_retire(kgem, 0);
3543	if (kgem_expire_cache(kgem))
3544		goto retry;
3545
3546	if (kgem_cleanup_cache(kgem))
3547		goto retry;
3548
3549	/* last gasp */
3550	ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
3551	if (ret == 0)
3552		return 0;
3553
3554	xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
3555		   "Failed to submit rendering commands, trying again with outputs disabled.\n");
3556
3557	/* One last trick up our sleeve for when we run out of space.
3558	 * We turn everything off to free up our pinned framebuffers,
3559	 * sprites and cursors, and try one last time.
3560	 */
3561	err = errno;
3562	if (sna_mode_disable(container_of(kgem, struct sna, kgem))) {
3563		kgem_cleanup_cache(kgem);
3564		ret = do_ioctl(kgem->fd,
3565			       DRM_IOCTL_I915_GEM_EXECBUFFER2,
3566			       execbuf);
3567		DBG(("%s: last_gasp ret=%d\n", __FUNCTION__, ret));
3568		sna_mode_enable(container_of(kgem, struct sna, kgem));
3569	}
3570	errno = err;
3571
3572	return ret;
3573}
3574
3575void _kgem_submit(struct kgem *kgem)
3576{
3577	struct kgem_request *rq;
3578	uint32_t batch_end;
3579
3580	assert(!DBG_NO_HW);
3581	assert(!kgem->wedged);
3582
3583	assert(kgem->nbatch);
3584	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
3585	assert(kgem->nbatch <= kgem->surface);
3586
3587	batch_end = kgem_end_batch(kgem);
3588	kgem_sna_flush(kgem);
3589
3590	DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d [fenced=%d]\n",
3591	     kgem->mode, kgem->ring, kgem->batch_flags,
3592	     batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
3593	     kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced));
3594
3595	assert(kgem->nbatch <= kgem->batch_size);
3596	assert(kgem->nbatch <= kgem->surface);
3597	assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
3598	assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
3599	assert(kgem->nfence <= kgem->fence_max);
3600
3601	kgem_finish_buffers(kgem);
3602
3603#if SHOW_BATCH_BEFORE
3604	__kgem_batch_debug(kgem, batch_end);
3605#endif
3606
3607	rq = kgem->next_request;
3608	assert(rq->bo == NULL);
3609
3610	rq->bo = kgem_create_batch(kgem);
3611	if (rq->bo) {
3612		struct drm_i915_gem_execbuffer2 execbuf;
3613		int i, ret;
3614
3615		assert(!rq->bo->needs_flush);
3616
3617		i = kgem->nexec++;
3618		kgem->exec[i].handle = rq->bo->handle;
3619		kgem->exec[i].relocation_count = kgem->nreloc;
3620		kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
3621		kgem->exec[i].alignment = 0;
3622		kgem->exec[i].offset = rq->bo->presumed_offset;
3623		kgem->exec[i].flags = 0;
3624		kgem->exec[i].rsvd1 = 0;
3625		kgem->exec[i].rsvd2 = 0;
3626
3627		rq->bo->exec = &kgem->exec[i];
3628		rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
3629		list_add(&rq->bo->request, &rq->buffers);
3630		rq->ring = kgem->ring == KGEM_BLT;
3631
3632		memset(&execbuf, 0, sizeof(execbuf));
3633		execbuf.buffers_ptr = (uintptr_t)kgem->exec;
3634		execbuf.buffer_count = kgem->nexec;
3635		execbuf.batch_len = batch_end*sizeof(uint32_t);
3636		execbuf.flags = kgem->ring | kgem->batch_flags;
3637
3638		if (DBG_DUMP) {
3639			int fd = open("/tmp/i915-batchbuffers.dump",
3640				      O_WRONLY | O_CREAT | O_APPEND,
3641				      0666);
3642			if (fd != -1) {
3643				ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
3644				fd = close(fd);
3645			}
3646		}
3647
3648		ret = do_execbuf(kgem, &execbuf);
3649		if (DEBUG_SYNC && ret == 0) {
3650			struct drm_i915_gem_set_domain set_domain;
3651
3652			VG_CLEAR(set_domain);
3653			set_domain.handle = rq->bo->handle;
3654			set_domain.read_domains = I915_GEM_DOMAIN_GTT;
3655			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
3656
3657			ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
3658		}
3659		if (ret < 0) {
3660			kgem_throttle(kgem);
3661			if (!kgem->wedged) {
3662				xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
3663					   "Failed to submit rendering commands, disabling acceleration.\n");
3664				__kgem_set_wedged(kgem);
3665			}
3666
3667#if !NDEBUG
3668			ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n",
3669			       kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
3670			       kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret);
3671
3672			for (i = 0; i < kgem->nexec; i++) {
3673				struct kgem_bo *bo, *found = NULL;
3674
3675				list_for_each_entry(bo, &kgem->next_request->buffers, request) {
3676					if (bo->handle == kgem->exec[i].handle) {
3677						found = bo;
3678						break;
3679					}
3680				}
3681				ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
3682				       i,
3683				       kgem->exec[i].handle,
3684				       (int)kgem->exec[i].offset,
3685				       found ? kgem_bo_size(found) : -1,
3686				       found ? found->tiling : -1,
3687				       (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
3688				       found ? found->snoop : -1,
3689				       found ? found->purged : -1);
3690			}
3691			for (i = 0; i < kgem->nreloc; i++) {
3692				ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
3693				       i,
3694				       (int)kgem->reloc[i].offset,
3695				       kgem->reloc[i].target_handle,
3696				       kgem->reloc[i].delta,
3697				       kgem->reloc[i].read_domains,
3698				       kgem->reloc[i].write_domain,
3699				       (int)kgem->reloc[i].presumed_offset);
3700			}
3701
3702			{
3703				struct drm_i915_gem_get_aperture aperture;
3704				if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0)
3705					ErrorF("Aperture size %lld, available %lld\n",
3706					       (long long)aperture.aper_size,
3707					       (long long)aperture.aper_available_size);
3708			}
3709
3710			if (ret == -ENOSPC)
3711				dump_gtt_info(kgem);
3712			if (ret == -EDEADLK)
3713				dump_fence_regs(kgem);
3714
3715			if (DEBUG_SYNC) {
3716				int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
3717				if (fd != -1) {
3718					int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
3719					assert(ignored == batch_end*sizeof(uint32_t));
3720					close(fd);
3721				}
3722
3723				FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret);
3724			}
3725#endif
3726		}
3727	}
3728#if SHOW_BATCH_AFTER
3729	if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0)
3730		__kgem_batch_debug(kgem, batch_end);
3731#endif
3732	kgem_commit(kgem);
3733	if (kgem->wedged)
3734		kgem_cleanup(kgem);
3735
3736	kgem_reset(kgem);
3737
3738	assert(kgem->next_request != NULL);
3739}
3740
3741static bool find_hang_state(struct kgem *kgem, char *path, int maxlen)
3742{
3743	int minor = kgem_get_minor(kgem);
3744
3745	/* Search for our hang state in a few canonical locations.
3746	 * In the unlikely event of having multiple devices, we
3747	 * will need to check which minor actually corresponds to ours.
3748	 */
3749
3750	snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor);
3751	if (access(path, R_OK) == 0)
3752		return true;
3753
3754	snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor);
3755	if (access(path, R_OK) == 0)
3756		return true;
3757
3758	snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor);
3759	if (access(path, R_OK) == 0)
3760		return true;
3761
3762	path[0] = '\0';
3763	return false;
3764}
3765
3766void kgem_throttle(struct kgem *kgem)
3767{
3768	if (kgem->wedged)
3769		return;
3770
3771	if (__kgem_throttle(kgem, true)) {
3772		static int once;
3773		char path[128];
3774
3775		xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
3776			   "Detected a hung GPU, disabling acceleration.\n");
3777		if (!once && find_hang_state(kgem, path, sizeof(path))) {
3778			xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
3779				   "When reporting this, please include %s and the full dmesg.\n",
3780				   path);
3781			once = 1;
3782		}
3783
3784		__kgem_set_wedged(kgem);
3785		kgem->need_throttle = false;
3786	}
3787}
3788
3789int kgem_is_wedged(struct kgem *kgem)
3790{
3791	return __kgem_throttle(kgem, true);
3792}
3793
3794static void kgem_purge_cache(struct kgem *kgem)
3795{
3796	struct kgem_bo *bo, *next;
3797	int i;
3798
3799	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
3800		list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
3801			if (!kgem_bo_is_retained(kgem, bo)) {
3802				DBG(("%s: purging %d\n",
3803				     __FUNCTION__, bo->handle));
3804				kgem_bo_free(kgem, bo);
3805			}
3806		}
3807	}
3808
3809	kgem->need_purge = false;
3810}
3811
3812void kgem_clean_scanout_cache(struct kgem *kgem)
3813{
3814	while (!list_is_empty(&kgem->scanout)) {
3815		struct kgem_bo *bo;
3816
3817		bo = list_first_entry(&kgem->scanout, struct kgem_bo, list);
3818
3819		assert(bo->scanout);
3820		assert(!bo->refcnt);
3821		assert(!bo->prime);
3822		assert(bo->proxy == NULL);
3823
3824		if (bo->exec || __kgem_busy(kgem, bo->handle))
3825			break;
3826
3827		DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
3828		     __FUNCTION__, bo->handle, bo->delta, bo->reusable));
3829		list_del(&bo->list);
3830
3831		kgem_bo_rmfb(kgem, bo);
3832		bo->scanout = false;
3833
3834		if (!bo->purged) {
3835			bo->reusable = true;
3836			if (kgem->has_llc &&
3837			    !gem_set_caching(kgem->fd, bo->handle, SNOOPED))
3838				bo->reusable = false;
3839
3840		}
3841
3842		__kgem_bo_destroy(kgem, bo);
3843	}
3844}
3845
3846void kgem_clean_large_cache(struct kgem *kgem)
3847{
3848	while (!list_is_empty(&kgem->large_inactive)) {
3849		kgem_bo_free(kgem,
3850			     list_first_entry(&kgem->large_inactive,
3851					      struct kgem_bo, list));
3852
3853	}
3854}
3855
3856bool kgem_expire_cache(struct kgem *kgem)
3857{
3858	time_t now, expire;
3859	struct kgem_bo *bo;
3860	unsigned int size = 0, count = 0;
3861	bool idle;
3862	unsigned int i;
3863
3864	time(&now);
3865
3866	while (__kgem_freed_bo) {
3867		bo = __kgem_freed_bo;
3868		__kgem_freed_bo = *(struct kgem_bo **)bo;
3869		free(bo);
3870	}
3871
3872	while (__kgem_freed_request) {
3873		struct kgem_request *rq = __kgem_freed_request;
3874		__kgem_freed_request = *(struct kgem_request **)rq;
3875		free(rq);
3876	}
3877
3878	kgem_clean_large_cache(kgem);
3879	if (container_of(kgem, struct sna, kgem)->scrn->vtSema)
3880		kgem_clean_scanout_cache(kgem);
3881
3882	expire = 0;
3883	list_for_each_entry(bo, &kgem->snoop, list) {
3884		if (bo->delta) {
3885			expire = now - MAX_INACTIVE_TIME/2;
3886			break;
3887		}
3888
3889		bo->delta = now;
3890	}
3891	if (expire) {
3892		while (!list_is_empty(&kgem->snoop)) {
3893			bo = list_last_entry(&kgem->snoop, struct kgem_bo, list);
3894
3895			if (bo->delta > expire)
3896				break;
3897
3898			kgem_bo_free(kgem, bo);
3899		}
3900	}
3901#ifdef DEBUG_MEMORY
3902	{
3903		long snoop_size = 0;
3904		int snoop_count = 0;
3905		list_for_each_entry(bo, &kgem->snoop, list)
3906			snoop_count++, snoop_size += bytes(bo);
3907		DBG(("%s: still allocated %d bo, %ld bytes, in snoop cache\n",
3908		     __FUNCTION__, snoop_count, snoop_size));
3909	}
3910#endif
3911
3912	kgem_retire(kgem);
3913	if (kgem->wedged)
3914		kgem_cleanup(kgem);
3915
3916	kgem->expire(kgem);
3917
3918	if (kgem->need_purge)
3919		kgem_purge_cache(kgem);
3920
3921	if (kgem->need_retire)
3922		kgem_retire(kgem);
3923
3924	expire = 0;
3925	idle = true;
3926	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
3927		idle &= list_is_empty(&kgem->inactive[i]);
3928		list_for_each_entry(bo, &kgem->inactive[i], list) {
3929			if (bo->delta) {
3930				expire = now - MAX_INACTIVE_TIME;
3931				break;
3932			}
3933
3934			bo->delta = now;
3935		}
3936	}
3937	if (expire == 0) {
3938		DBG(("%s: idle? %d\n", __FUNCTION__, idle));
3939		kgem->need_expire = !idle;
3940		return false;
3941	}
3942
3943	idle = true;
3944	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
3945		struct list preserve;
3946
3947		list_init(&preserve);
3948		while (!list_is_empty(&kgem->inactive[i])) {
3949			bo = list_last_entry(&kgem->inactive[i],
3950					     struct kgem_bo, list);
3951
3952			if (bo->delta > expire) {
3953				idle = false;
3954				break;
3955			}
3956
3957			if (bo->map__cpu && bo->delta + MAP_PRESERVE_TIME > expire) {
3958				idle = false;
3959				list_move_tail(&bo->list, &preserve);
3960			} else {
3961				count++;
3962				size += bytes(bo);
3963				kgem_bo_free(kgem, bo);
3964				DBG(("%s: expiring %d\n",
3965				     __FUNCTION__, bo->handle));
3966			}
3967		}
3968		if (!list_is_empty(&preserve)) {
3969			preserve.prev->next = kgem->inactive[i].next;
3970			kgem->inactive[i].next->prev = preserve.prev;
3971			kgem->inactive[i].next = preserve.next;
3972			preserve.next->prev = &kgem->inactive[i];
3973		}
3974	}
3975
3976#ifdef DEBUG_MEMORY
3977	{
3978		long inactive_size = 0;
3979		int inactive_count = 0;
3980		for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
3981			list_for_each_entry(bo, &kgem->inactive[i], list)
3982				inactive_count++, inactive_size += bytes(bo);
3983		DBG(("%s: still allocated %d bo, %ld bytes, in inactive cache\n",
3984		     __FUNCTION__, inactive_count, inactive_size));
3985	}
3986#endif
3987
3988	DBG(("%s: expired %d objects, %d bytes, idle? %d\n",
3989	     __FUNCTION__, count, size, idle));
3990
3991	kgem->need_expire = !idle;
3992	return count;
3993	(void)count;
3994	(void)size;
3995}
3996
3997bool kgem_cleanup_cache(struct kgem *kgem)
3998{
3999	unsigned int i;
4000	int n;
4001
4002	/* sync to the most recent request */
4003	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
4004		if (!list_is_empty(&kgem->requests[n])) {
4005			struct kgem_request *rq;
4006			struct drm_i915_gem_set_domain set_domain;
4007
4008			rq = list_first_entry(&kgem->requests[n],
4009					      struct kgem_request,
4010					      list);
4011
4012			DBG(("%s: sync on cleanup\n", __FUNCTION__));
4013
4014			VG_CLEAR(set_domain);
4015			set_domain.handle = rq->bo->handle;
4016			set_domain.read_domains = I915_GEM_DOMAIN_GTT;
4017			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
4018			(void)do_ioctl(kgem->fd,
4019				       DRM_IOCTL_I915_GEM_SET_DOMAIN,
4020				       &set_domain);
4021		}
4022	}
4023
4024	kgem_retire(kgem);
4025	kgem_cleanup(kgem);
4026
4027	if (!kgem->need_expire)
4028		return false;
4029
4030	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
4031		while (!list_is_empty(&kgem->inactive[i]))
4032			kgem_bo_free(kgem,
4033				     list_last_entry(&kgem->inactive[i],
4034						     struct kgem_bo, list));
4035	}
4036
4037	kgem_clean_large_cache(kgem);
4038	kgem_clean_scanout_cache(kgem);
4039
4040	while (!list_is_empty(&kgem->snoop))
4041		kgem_bo_free(kgem,
4042			     list_last_entry(&kgem->snoop,
4043					     struct kgem_bo, list));
4044
4045	while (__kgem_freed_bo) {
4046		struct kgem_bo *bo = __kgem_freed_bo;
4047		__kgem_freed_bo = *(struct kgem_bo **)bo;
4048		free(bo);
4049	}
4050
4051	kgem->need_purge = false;
4052	kgem->need_expire = false;
4053	return true;
4054}
4055
4056static struct kgem_bo *
4057search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
4058{
4059	struct kgem_bo *bo, *first = NULL;
4060	bool use_active = (flags & CREATE_INACTIVE) == 0;
4061	struct list *cache;
4062
4063	DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n",
4064	     __FUNCTION__, num_pages, flags, use_active,
4065	     num_pages >= MAX_CACHE_SIZE / PAGE_SIZE,
4066	     MAX_CACHE_SIZE / PAGE_SIZE));
4067
4068	assert(num_pages);
4069
4070	if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) {
4071		DBG(("%s: searching large buffers\n", __FUNCTION__));
4072retry_large:
4073		cache = use_active ? &kgem->large : &kgem->large_inactive;
4074		list_for_each_entry_safe(bo, first, cache, list) {
4075			assert(bo->refcnt == 0);
4076			assert(bo->reusable);
4077			assert(!bo->scanout);
4078
4079			if (num_pages > num_pages(bo))
4080				goto discard;
4081
4082			if (bo->tiling != I915_TILING_NONE) {
4083				if (use_active)
4084					goto discard;
4085
4086				if (!gem_set_tiling(kgem->fd, bo->handle,
4087						    I915_TILING_NONE, 0))
4088					goto discard;
4089
4090				bo->tiling = I915_TILING_NONE;
4091				bo->pitch = 0;
4092			}
4093
4094			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo))
4095				goto discard;
4096
4097			list_del(&bo->list);
4098			if (RQ(bo->rq) == (void *)kgem) {
4099				assert(bo->exec == NULL);
4100				list_del(&bo->request);
4101			}
4102
4103			bo->delta = 0;
4104			assert_tiling(kgem, bo);
4105			return bo;
4106
4107discard:
4108			if (!use_active)
4109				kgem_bo_free(kgem, bo);
4110		}
4111
4112		if (use_active) {
4113			use_active = false;
4114			goto retry_large;
4115		}
4116
4117		if (__kgem_throttle_retire(kgem, flags))
4118			goto retry_large;
4119
4120		return NULL;
4121	}
4122
4123	if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
4124		DBG(("%s: inactive and cache bucket empty\n",
4125		     __FUNCTION__));
4126
4127		if (flags & CREATE_NO_RETIRE) {
4128			DBG(("%s: can not retire\n", __FUNCTION__));
4129			return NULL;
4130		}
4131
4132		if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) {
4133			DBG(("%s: active cache bucket empty\n", __FUNCTION__));
4134			return NULL;
4135		}
4136
4137		if (!__kgem_throttle_retire(kgem, flags)) {
4138			DBG(("%s: nothing retired\n", __FUNCTION__));
4139			return NULL;
4140		}
4141
4142		if (list_is_empty(inactive(kgem, num_pages))) {
4143			DBG(("%s: active cache bucket still empty after retire\n",
4144			     __FUNCTION__));
4145			return NULL;
4146		}
4147	}
4148
4149	if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4150		int for_cpu = !!(flags & CREATE_CPU_MAP);
4151		DBG(("%s: searching for inactive %s map\n",
4152		     __FUNCTION__, for_cpu ? "cpu" : "gtt"));
4153		cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
4154		list_for_each_entry(bo, cache, vma) {
4155			assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
4156			assert(bucket(bo) == cache_bucket(num_pages));
4157			assert(bo->proxy == NULL);
4158			assert(bo->rq == NULL);
4159			assert(bo->exec == NULL);
4160			assert(!bo->scanout);
4161
4162			if (num_pages > num_pages(bo)) {
4163				DBG(("inactive too small: %d < %d\n",
4164				     num_pages(bo), num_pages));
4165				continue;
4166			}
4167
4168			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
4169				kgem_bo_free(kgem, bo);
4170				break;
4171			}
4172
4173			if (I915_TILING_NONE != bo->tiling &&
4174			    !gem_set_tiling(kgem->fd, bo->handle,
4175					    I915_TILING_NONE, 0))
4176				continue;
4177
4178			kgem_bo_remove_from_inactive(kgem, bo);
4179			assert(list_is_empty(&bo->vma));
4180			assert(list_is_empty(&bo->list));
4181
4182			bo->tiling = I915_TILING_NONE;
4183			bo->pitch = 0;
4184			bo->delta = 0;
4185			DBG(("  %s: found handle=%d (num_pages=%d) in linear vma cache\n",
4186			     __FUNCTION__, bo->handle, num_pages(bo)));
4187			assert(use_active || bo->domain != DOMAIN_GPU);
4188			assert(!bo->needs_flush);
4189			assert_tiling(kgem, bo);
4190			ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
4191			return bo;
4192		}
4193
4194		if (flags & CREATE_EXACT)
4195			return NULL;
4196
4197		if (flags & CREATE_CPU_MAP && !kgem->has_llc)
4198			return NULL;
4199	}
4200
4201	cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
4202	list_for_each_entry(bo, cache, list) {
4203		assert(bo->refcnt == 0);
4204		assert(bo->reusable);
4205		assert(!!bo->rq == !!use_active);
4206		assert(bo->proxy == NULL);
4207		assert(!bo->scanout);
4208
4209		if (num_pages > num_pages(bo))
4210			continue;
4211
4212		if (use_active &&
4213		    kgem->gen <= 040 &&
4214		    bo->tiling != I915_TILING_NONE)
4215			continue;
4216
4217		if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
4218			kgem_bo_free(kgem, bo);
4219			break;
4220		}
4221
4222		if (I915_TILING_NONE != bo->tiling) {
4223			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP))
4224				continue;
4225
4226			if (first)
4227				continue;
4228
4229			if (!gem_set_tiling(kgem->fd, bo->handle,
4230					    I915_TILING_NONE, 0))
4231				continue;
4232
4233			bo->tiling = I915_TILING_NONE;
4234			bo->pitch = 0;
4235		}
4236
4237		if (bo->map__gtt || bo->map__wc || bo->map__cpu) {
4238			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4239				int for_cpu = !!(flags & CREATE_CPU_MAP);
4240				if (for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)){
4241					if (first != NULL)
4242						break;
4243
4244					first = bo;
4245					continue;
4246				}
4247			} else {
4248				if (first != NULL)
4249					break;
4250
4251				first = bo;
4252				continue;
4253			}
4254		} else {
4255			if (flags & CREATE_GTT_MAP && !kgem_bo_can_map(kgem, bo))
4256				continue;
4257
4258			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4259				if (first != NULL)
4260					break;
4261
4262				first = bo;
4263				continue;
4264			}
4265		}
4266
4267		if (use_active)
4268			kgem_bo_remove_from_active(kgem, bo);
4269		else
4270			kgem_bo_remove_from_inactive(kgem, bo);
4271
4272		assert(bo->tiling == I915_TILING_NONE);
4273		bo->pitch = 0;
4274		bo->delta = 0;
4275		DBG(("  %s: found handle=%d (num_pages=%d) in linear %s cache\n",
4276		     __FUNCTION__, bo->handle, num_pages(bo),
4277		     use_active ? "active" : "inactive"));
4278		assert(list_is_empty(&bo->list));
4279		assert(list_is_empty(&bo->vma));
4280		assert(use_active || bo->domain != DOMAIN_GPU);
4281		assert(!bo->needs_flush || use_active);
4282		assert_tiling(kgem, bo);
4283		ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
4284		return bo;
4285	}
4286
4287	if (first) {
4288		assert(first->tiling == I915_TILING_NONE);
4289
4290		if (use_active)
4291			kgem_bo_remove_from_active(kgem, first);
4292		else
4293			kgem_bo_remove_from_inactive(kgem, first);
4294
4295		first->pitch = 0;
4296		first->delta = 0;
4297		DBG(("  %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
4298		     __FUNCTION__, first->handle, num_pages(first),
4299		     use_active ? "active" : "inactive"));
4300		assert(list_is_empty(&first->list));
4301		assert(list_is_empty(&first->vma));
4302		assert(use_active || first->domain != DOMAIN_GPU);
4303		assert(!first->needs_flush || use_active);
4304		ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
4305		return first;
4306	}
4307
4308	return NULL;
4309}
4310
4311struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name)
4312{
4313	struct drm_gem_open open_arg;
4314	struct drm_i915_gem_get_tiling tiling;
4315	struct kgem_bo *bo;
4316
4317	DBG(("%s(name=%d)\n", __FUNCTION__, name));
4318
4319	VG_CLEAR(open_arg);
4320	open_arg.name = name;
4321	if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_OPEN, &open_arg))
4322		return NULL;
4323
4324	DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle));
4325
4326	VG_CLEAR(tiling);
4327	tiling.handle = open_arg.handle;
4328	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) {
4329		DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno));
4330		gem_close(kgem->fd, open_arg.handle);
4331		return NULL;
4332	}
4333
4334	DBG(("%s: handle=%d, tiling=%d\n", __FUNCTION__, tiling.handle, tiling.tiling_mode));
4335
4336	bo = __kgem_bo_alloc(open_arg.handle, open_arg.size / PAGE_SIZE);
4337	if (bo == NULL) {
4338		gem_close(kgem->fd, open_arg.handle);
4339		return NULL;
4340	}
4341
4342	bo->unique_id = kgem_get_unique_id(kgem);
4343	bo->tiling = tiling.tiling_mode;
4344	bo->reusable = false;
4345	bo->prime = true;
4346	bo->purged = true; /* no coherency guarantees */
4347
4348	debug_alloc__bo(kgem, bo);
4349	return bo;
4350}
4351
4352struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size)
4353{
4354#ifdef DRM_IOCTL_PRIME_FD_TO_HANDLE
4355	struct drm_prime_handle args;
4356	struct drm_i915_gem_get_tiling tiling;
4357	struct local_i915_gem_caching caching;
4358	struct kgem_bo *bo;
4359	off_t seek;
4360
4361	DBG(("%s(name=%d)\n", __FUNCTION__, name));
4362
4363	VG_CLEAR(args);
4364	args.fd = name;
4365	args.flags = 0;
4366	if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args)) {
4367		DBG(("%s(name=%d) fd-to-handle failed, ret=%d\n", __FUNCTION__, name, errno));
4368		return NULL;
4369	}
4370
4371	VG_CLEAR(tiling);
4372	tiling.handle = args.handle;
4373	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) {
4374		DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno));
4375		gem_close(kgem->fd, args.handle);
4376		return NULL;
4377	}
4378
4379	/* Query actual size, overriding specified if available */
4380	seek = lseek(args.fd, 0, SEEK_END);
4381	DBG(("%s: estimated size=%ld, actual=%lld\n",
4382	     __FUNCTION__, (long)size, (long long)seek));
4383	if (seek != -1) {
4384		if (size > seek) {
4385			DBG(("%s(name=%d) estimated required size [%d] is larger than actual [%ld]\n", __FUNCTION__, name, size, (long)seek));
4386			gem_close(kgem->fd, args.handle);
4387			return NULL;
4388		}
4389		size = seek;
4390	}
4391
4392	DBG(("%s: new handle=%d, tiling=%d\n", __FUNCTION__,
4393	     args.handle, tiling.tiling_mode));
4394	bo = __kgem_bo_alloc(args.handle, NUM_PAGES(size));
4395	if (bo == NULL) {
4396		gem_close(kgem->fd, args.handle);
4397		return NULL;
4398	}
4399
4400	bo->unique_id = kgem_get_unique_id(kgem);
4401	bo->tiling = tiling.tiling_mode;
4402	bo->reusable = false;
4403	bo->prime = true;
4404	bo->domain = DOMAIN_NONE;
4405
4406	/* is this a special bo (e.g. scanout or CPU coherent)? */
4407
4408	VG_CLEAR(caching);
4409	caching.handle = args.handle;
4410	caching.caching = kgem->has_llc;
4411	(void)drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &caching);
4412	DBG(("%s: imported handle=%d has caching %d\n", __FUNCTION__, args.handle, caching.caching));
4413	switch (caching.caching) {
4414	case 0:
4415		if (kgem->has_llc) {
4416			DBG(("%s: interpreting handle=%d as a foreign scanout\n",
4417			     __FUNCTION__, args.handle));
4418			bo->scanout = true;
4419		}
4420		break;
4421	case 1:
4422		if (!kgem->has_llc) {
4423			DBG(("%s: interpreting handle=%d as a foreign snooped buffer\n",
4424			     __FUNCTION__, args.handle));
4425			bo->snoop = true;
4426			if (bo->tiling) {
4427				DBG(("%s: illegal snooped tiled buffer\n", __FUNCTION__));
4428				kgem_bo_free(kgem, bo);
4429				return NULL;
4430			}
4431		}
4432		break;
4433	case 2:
4434		DBG(("%s: interpreting handle=%d as a foreign scanout\n",
4435		     __FUNCTION__, args.handle));
4436		bo->scanout = true;
4437		break;
4438	}
4439
4440	debug_alloc__bo(kgem, bo);
4441	return bo;
4442#else
4443	return NULL;
4444#endif
4445}
4446
4447int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo)
4448{
4449#if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC)
4450	struct drm_prime_handle args;
4451
4452	VG_CLEAR(args);
4453	args.handle = bo->handle;
4454	args.flags = O_CLOEXEC;
4455
4456	if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args))
4457		return -1;
4458
4459	bo->reusable = false;
4460	return args.fd;
4461#else
4462	return -1;
4463#endif
4464}
4465
4466struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
4467{
4468	struct kgem_bo *bo;
4469	uint32_t handle;
4470
4471	DBG(("%s(%d)\n", __FUNCTION__, size));
4472	assert(size);
4473
4474	if (flags & CREATE_GTT_MAP && kgem->has_llc) {
4475		flags &= ~CREATE_GTT_MAP;
4476		flags |= CREATE_CPU_MAP;
4477	}
4478
4479	size = NUM_PAGES(size);
4480	if ((flags & CREATE_UNCACHED) == 0) {
4481		bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
4482		if (bo) {
4483			assert(bo->domain != DOMAIN_GPU);
4484			ASSERT_IDLE(kgem, bo->handle);
4485			bo->refcnt = 1;
4486			return bo;
4487		}
4488
4489		if (flags & CREATE_CACHED)
4490			return NULL;
4491	}
4492
4493	handle = gem_create(kgem->fd, size);
4494	if (handle == 0)
4495		return NULL;
4496
4497	DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size));
4498	bo = __kgem_bo_alloc(handle, size);
4499	if (bo == NULL) {
4500		gem_close(kgem->fd, handle);
4501		return NULL;
4502	}
4503
4504	debug_alloc__bo(kgem, bo);
4505	return bo;
4506}
4507
4508int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp)
4509{
4510	if (DBG_NO_TILING)
4511		return tiling < 0 ? tiling : I915_TILING_NONE;
4512
4513	if (kgem->gen < 040) {
4514		if (tiling && width * bpp > 8192 * 8) {
4515			DBG(("%s: pitch too large for tliing [%d]\n",
4516			     __FUNCTION__, width*bpp/8));
4517			tiling = I915_TILING_NONE;
4518			goto done;
4519		}
4520	} else {
4521		if (width*bpp > (MAXSHORT-512) * 8) {
4522			if (tiling > 0)
4523				tiling = -tiling;
4524			else if (tiling == 0)
4525				tiling = -I915_TILING_X;
4526			DBG(("%s: large pitch [%d], forcing TILING [%d]\n",
4527			     __FUNCTION__, width*bpp/8, tiling));
4528		} else if (tiling && (width|height) > 8192) {
4529			DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n",
4530			     __FUNCTION__, width, height));
4531			tiling = -I915_TILING_X;
4532		}
4533
4534		/* fences limited to 128k (256k on ivb) */
4535		assert(width * bpp <= 128 * 1024 * 8);
4536	}
4537
4538	if (tiling < 0)
4539		return tiling;
4540
4541	if (tiling == I915_TILING_Y && !kgem->can_render_y)
4542		tiling = I915_TILING_X;
4543
4544	if (tiling && (height == 1 || width == 1)) {
4545		DBG(("%s: disabling tiling [%dx%d] for single row/col\n",
4546		     __FUNCTION__,width, height));
4547		tiling = I915_TILING_NONE;
4548		goto done;
4549	}
4550	if (tiling == I915_TILING_Y && height <= 16) {
4551		DBG(("%s: too short [%d] for TILING_Y\n",
4552		     __FUNCTION__,height));
4553		tiling = I915_TILING_X;
4554	}
4555	if (tiling && width * bpp > 8 * (4096 - 64)) {
4556		DBG(("%s: TLB miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
4557		     __FUNCTION__,
4558		     width, height, width*bpp/8,
4559		     tiling));
4560		return -tiling;
4561	}
4562	if (tiling == I915_TILING_X && height < 4) {
4563		DBG(("%s: too short [%d] for TILING_X\n",
4564		     __FUNCTION__, height));
4565		tiling = I915_TILING_NONE;
4566		goto done;
4567	}
4568
4569	if (tiling == I915_TILING_X && width * bpp <= 8*512) {
4570		DBG(("%s: too thin [width %d, %d bpp] for TILING_X\n",
4571		     __FUNCTION__, width, bpp));
4572		tiling = I915_TILING_NONE;
4573		goto done;
4574	}
4575	if (tiling == I915_TILING_Y && width * bpp < 8*128) {
4576		DBG(("%s: too thin [%d] for TILING_Y\n",
4577		     __FUNCTION__, width));
4578		tiling = I915_TILING_NONE;
4579		goto done;
4580	}
4581
4582	if (tiling && ALIGN(height, 2) * ALIGN(width*bpp, 8*64) <= 4096 * 8) {
4583		DBG(("%s: too small [%d bytes] for TILING_%c\n", __FUNCTION__,
4584		     ALIGN(height, 2) * ALIGN(width*bpp, 8*64) / 8,
4585		     tiling == I915_TILING_X ? 'X' : 'Y'));
4586		tiling = I915_TILING_NONE;
4587		goto done;
4588	}
4589
4590	if (tiling && width * bpp >= 8 * 4096 / 2) {
4591		DBG(("%s: TLB near-miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
4592		     __FUNCTION__,
4593		     width, height, width*bpp/8,
4594		     tiling));
4595		return -tiling;
4596	}
4597
4598done:
4599	DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling));
4600	return tiling;
4601}
4602
4603static int bits_per_pixel(int depth)
4604{
4605	switch (depth) {
4606	case 8: return 8;
4607	case 15:
4608	case 16: return 16;
4609	case 24:
4610	case 30:
4611	case 32: return 32;
4612	default: return 0;
4613	}
4614}
4615
4616unsigned kgem_can_create_2d(struct kgem *kgem,
4617			    int width, int height, int depth)
4618{
4619	uint32_t pitch, size;
4620	unsigned flags = 0;
4621	int tiling;
4622	int bpp;
4623
4624	DBG(("%s: %dx%d @ %d\n", __FUNCTION__, width, height, depth));
4625
4626	bpp = bits_per_pixel(depth);
4627	if (bpp == 0) {
4628		DBG(("%s: unhandled depth %d\n", __FUNCTION__, depth));
4629		return 0;
4630	}
4631
4632	if (width > MAXSHORT || height > MAXSHORT) {
4633		DBG(("%s: unhandled size %dx%d\n",
4634		     __FUNCTION__, width, height));
4635		return 0;
4636	}
4637
4638	size = kgem_surface_size(kgem, false, 0,
4639				 width, height, bpp,
4640				 I915_TILING_NONE, &pitch);
4641	DBG(("%s: untiled size=%d\n", __FUNCTION__, size));
4642	if (size > 0) {
4643		if (size <= kgem->max_cpu_size)
4644			flags |= KGEM_CAN_CREATE_CPU;
4645		if (size > 4096 && size <= kgem->max_gpu_size)
4646			flags |= KGEM_CAN_CREATE_GPU;
4647		if (size <= PAGE_SIZE*kgem->aperture_mappable/4 || kgem->has_wc_mmap)
4648			flags |= KGEM_CAN_CREATE_GTT;
4649		if (size > kgem->large_object_size)
4650			flags |= KGEM_CAN_CREATE_LARGE;
4651		if (size > kgem->max_object_size) {
4652			DBG(("%s: too large (untiled) %d > %d\n",
4653			     __FUNCTION__, size, kgem->max_object_size));
4654			return 0;
4655		}
4656	}
4657
4658	tiling = kgem_choose_tiling(kgem, I915_TILING_X,
4659				    width, height, bpp);
4660	if (tiling != I915_TILING_NONE) {
4661		size = kgem_surface_size(kgem, false, 0,
4662					 width, height, bpp, tiling,
4663					 &pitch);
4664		DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size));
4665		if (size > 0 && size <= kgem->max_gpu_size)
4666			flags |= KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED;
4667		if (size > 0 && size <= PAGE_SIZE*kgem->aperture_mappable/4)
4668			flags |= KGEM_CAN_CREATE_GTT;
4669		if (size > PAGE_SIZE*kgem->aperture_mappable/4)
4670			flags &= ~KGEM_CAN_CREATE_GTT;
4671		if (size > kgem->large_object_size)
4672			flags |= KGEM_CAN_CREATE_LARGE;
4673		if (size > kgem->max_object_size) {
4674			DBG(("%s: too large (tiled) %d > %d\n",
4675			     __FUNCTION__, size, kgem->max_object_size));
4676			return 0;
4677		}
4678		if (kgem->gen < 040) {
4679			int fence_size = 1024 * 1024;
4680			while (fence_size < size)
4681				fence_size <<= 1;
4682			if (fence_size > kgem->max_gpu_size)
4683				flags &= ~KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED;
4684			if (fence_size > PAGE_SIZE*kgem->aperture_fenceable/4)
4685				flags &= ~KGEM_CAN_CREATE_GTT;
4686		}
4687	}
4688
4689	return flags;
4690}
4691
4692inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
4693{
4694	unsigned int size;
4695
4696	assert(bo->tiling);
4697	assert_tiling(kgem, bo);
4698	assert(kgem->gen < 040);
4699
4700	if (kgem->gen < 030)
4701		size = 512 * 1024 / PAGE_SIZE;
4702	else
4703		size = 1024 * 1024 / PAGE_SIZE;
4704	while (size < num_pages(bo))
4705		size <<= 1;
4706
4707	return size;
4708}
4709
4710static struct kgem_bo *
4711__kgem_bo_create_as_display(struct kgem *kgem, int size, int tiling, int pitch)
4712{
4713	struct local_i915_gem_create2 args;
4714	struct kgem_bo *bo;
4715
4716	if (!kgem->has_create2)
4717		return NULL;
4718
4719	memset(&args, 0, sizeof(args));
4720	args.size = size * PAGE_SIZE;
4721	args.placement = LOCAL_I915_CREATE_PLACEMENT_STOLEN;
4722	args.caching = DISPLAY;
4723	args.tiling_mode = tiling;
4724	args.stride = pitch;
4725
4726	if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args)) {
4727		args.placement = LOCAL_I915_CREATE_PLACEMENT_SYSTEM;
4728		if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args))
4729			return NULL;
4730	}
4731
4732	bo = __kgem_bo_alloc(args.handle, size);
4733	if (bo == NULL) {
4734		gem_close(kgem->fd, args.handle);
4735		return NULL;
4736	}
4737
4738	bo->unique_id = kgem_get_unique_id(kgem);
4739	bo->tiling = tiling;
4740	bo->pitch = pitch;
4741	if (args.placement == LOCAL_I915_CREATE_PLACEMENT_STOLEN) {
4742		bo->purged = true; /* for asserts against CPU access */
4743	}
4744	bo->reusable = false; /* so that unclaimed scanouts are freed */
4745	bo->domain = DOMAIN_NONE;
4746
4747	if (__kgem_busy(kgem, bo->handle)) {
4748		assert(bo->exec == NULL);
4749		list_add(&bo->request, &kgem->flushing);
4750		bo->rq = (void *)kgem;
4751		kgem->need_retire = true;
4752	}
4753
4754	assert_tiling(kgem, bo);
4755	debug_alloc__bo(kgem, bo);
4756
4757	return bo;
4758}
4759
4760static void __kgem_bo_make_scanout(struct kgem *kgem,
4761				   struct kgem_bo *bo,
4762				   int width, int height)
4763{
4764	ScrnInfoPtr scrn =
4765		container_of(kgem, struct sna, kgem)->scrn;
4766	struct drm_mode_fb_cmd arg;
4767
4768	assert(bo->proxy == NULL);
4769
4770	if (!scrn->vtSema)
4771		return;
4772
4773	DBG(("%s: create fb %dx%d@%d/%d\n",
4774	     __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel));
4775
4776	VG_CLEAR(arg);
4777	arg.width = width;
4778	arg.height = height;
4779	arg.pitch = bo->pitch;
4780	arg.bpp = scrn->bitsPerPixel;
4781	arg.depth = scrn->depth;
4782	arg.handle = bo->handle;
4783
4784	/* First move the scanout out of cached memory */
4785	if (kgem->has_llc) {
4786		if (!gem_set_caching(kgem->fd, bo->handle, DISPLAY) &&
4787		    !gem_set_caching(kgem->fd, bo->handle, UNCACHED))
4788			return;
4789	}
4790
4791	bo->scanout = true;
4792
4793	/* Then pre-emptively move the object into the mappable
4794	 * portion to avoid rebinding later when busy.
4795	 */
4796	if (bo->map__gtt == NULL)
4797		bo->map__gtt = __kgem_bo_map__gtt(kgem, bo);
4798	if (bo->map__gtt) {
4799		if (sigtrap_get() == 0) {
4800			*(uint32_t *)bo->map__gtt = 0;
4801			sigtrap_put();
4802		}
4803		bo->domain = DOMAIN_GTT;
4804	}
4805
4806	if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0) {
4807		DBG(("%s: attached fb=%d to handle=%d\n",
4808		     __FUNCTION__, arg.fb_id, arg.handle));
4809		bo->delta = arg.fb_id;
4810	}
4811}
4812
4813struct kgem_bo *kgem_create_2d(struct kgem *kgem,
4814			       int width,
4815			       int height,
4816			       int bpp,
4817			       int tiling,
4818			       uint32_t flags)
4819{
4820	struct list *cache;
4821	struct kgem_bo *bo;
4822	uint32_t pitch, tiled_height, size;
4823	uint32_t handle;
4824	int i, bucket, retry;
4825	bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT);
4826
4827	if (tiling < 0)
4828		exact = true, tiling = -tiling;
4829
4830	DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
4831	     width, height, bpp, tiling, exact,
4832	     !!(flags & CREATE_INACTIVE),
4833	     !!(flags & CREATE_CPU_MAP),
4834	     !!(flags & CREATE_GTT_MAP),
4835	     !!(flags & CREATE_SCANOUT),
4836	     !!(flags & CREATE_PRIME),
4837	     !!(flags & CREATE_TEMPORARY)));
4838
4839	size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
4840				 width, height, bpp, tiling, &pitch);
4841	if (size == 0) {
4842		DBG(("%s: invalid surface size (too large?)\n", __FUNCTION__));
4843		return NULL;
4844	}
4845
4846	size /= PAGE_SIZE;
4847	bucket = cache_bucket(size);
4848
4849	if (flags & CREATE_SCANOUT) {
4850		struct kgem_bo *last = NULL;
4851
4852		list_for_each_entry_reverse(bo, &kgem->scanout, list) {
4853			assert(bo->scanout);
4854			assert(!bo->flush);
4855			assert(!bo->refcnt);
4856			assert_tiling(kgem, bo);
4857
4858			if (size > num_pages(bo) || num_pages(bo) > 2*size)
4859				continue;
4860
4861			if (bo->tiling != tiling || bo->pitch != pitch)
4862				/* No tiling/pitch without recreating fb */
4863				continue;
4864
4865			if (bo->delta && !check_scanout_size(kgem, bo, width, height))
4866				continue;
4867
4868			if (flags & CREATE_INACTIVE && bo->rq) {
4869				last = bo;
4870				continue;
4871			}
4872
4873			list_del(&bo->list);
4874
4875			bo->unique_id = kgem_get_unique_id(kgem);
4876			DBG(("  1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4877			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4878			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4879			assert_tiling(kgem, bo);
4880			bo->refcnt = 1;
4881			return bo;
4882		}
4883
4884		if (last) {
4885			list_del(&last->list);
4886
4887			last->unique_id = kgem_get_unique_id(kgem);
4888			DBG(("  1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4889			     last->pitch, last->tiling, last->handle, last->unique_id));
4890			assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last));
4891			assert_tiling(kgem, last);
4892			last->refcnt = 1;
4893			return last;
4894		}
4895
4896		if (container_of(kgem, struct sna, kgem)->scrn->vtSema) {
4897			ScrnInfoPtr scrn = container_of(kgem, struct sna, kgem)->scrn;
4898
4899			list_for_each_entry_reverse(bo, &kgem->scanout, list) {
4900				struct drm_mode_fb_cmd arg;
4901
4902				assert(bo->scanout);
4903				assert(!bo->refcnt);
4904
4905				if (size > num_pages(bo) || num_pages(bo) > 2*size)
4906					continue;
4907
4908				if (flags & CREATE_INACTIVE && bo->rq)
4909					continue;
4910
4911				list_del(&bo->list);
4912
4913				if (bo->tiling != tiling || bo->pitch != pitch) {
4914					if (bo->delta) {
4915						kgem_bo_rmfb(kgem, bo);
4916						bo->delta = 0;
4917					}
4918
4919					if (gem_set_tiling(kgem->fd, bo->handle,
4920							   tiling, pitch)) {
4921						bo->tiling = tiling;
4922						bo->pitch = pitch;
4923					} else {
4924						kgem_bo_free(kgem, bo);
4925						break;
4926					}
4927				}
4928
4929				VG_CLEAR(arg);
4930				arg.width = width;
4931				arg.height = height;
4932				arg.pitch = bo->pitch;
4933				arg.bpp = scrn->bitsPerPixel;
4934				arg.depth = scrn->depth;
4935				arg.handle = bo->handle;
4936
4937				if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg)) {
4938					kgem_bo_free(kgem, bo);
4939					break;
4940				}
4941
4942				bo->delta = arg.fb_id;
4943				bo->unique_id = kgem_get_unique_id(kgem);
4944
4945				DBG(("  2:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4946				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4947				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4948				assert_tiling(kgem, bo);
4949				bo->refcnt = 1;
4950				return bo;
4951			}
4952		}
4953
4954		bo = __kgem_bo_create_as_display(kgem, size, tiling, pitch);
4955		if (bo)
4956			return bo;
4957
4958		flags |= CREATE_INACTIVE;
4959	}
4960
4961	if (bucket >= NUM_CACHE_BUCKETS) {
4962		DBG(("%s: large bo num pages=%d, bucket=%d\n",
4963		     __FUNCTION__, size, bucket));
4964
4965		if (flags & CREATE_INACTIVE)
4966			goto large_inactive;
4967
4968		tiled_height = kgem_aligned_height(kgem, height, tiling);
4969
4970		list_for_each_entry(bo, &kgem->large, list) {
4971			assert(!bo->purged);
4972			assert(!bo->scanout);
4973			assert(bo->refcnt == 0);
4974			assert(bo->reusable);
4975			assert_tiling(kgem, bo);
4976
4977			if (kgem->gen < 040) {
4978				if (bo->pitch < pitch) {
4979					DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
4980					     bo->tiling, tiling,
4981					     bo->pitch, pitch));
4982					continue;
4983				}
4984
4985				if (bo->pitch * tiled_height > bytes(bo))
4986					continue;
4987			} else {
4988				if (num_pages(bo) < size)
4989					continue;
4990
4991				if (bo->pitch != pitch || bo->tiling != tiling) {
4992					if (!gem_set_tiling(kgem->fd, bo->handle,
4993							    tiling, pitch))
4994						continue;
4995
4996					bo->pitch = pitch;
4997					bo->tiling = tiling;
4998				}
4999			}
5000
5001			kgem_bo_remove_from_active(kgem, bo);
5002
5003			bo->unique_id = kgem_get_unique_id(kgem);
5004			bo->delta = 0;
5005			DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5006			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5007			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5008			assert_tiling(kgem, bo);
5009			bo->refcnt = 1;
5010			return bo;
5011		}
5012
5013large_inactive:
5014		__kgem_throttle_retire(kgem, flags);
5015		list_for_each_entry(bo, &kgem->large_inactive, list) {
5016			assert(bo->refcnt == 0);
5017			assert(bo->reusable);
5018			assert(!bo->scanout);
5019			assert_tiling(kgem, bo);
5020
5021			if (size > num_pages(bo))
5022				continue;
5023
5024			if (bo->tiling != tiling ||
5025			    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
5026				if (!gem_set_tiling(kgem->fd, bo->handle,
5027						    tiling, pitch))
5028					continue;
5029
5030				bo->tiling = tiling;
5031				bo->pitch = pitch;
5032			}
5033
5034			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5035				kgem_bo_free(kgem, bo);
5036				break;
5037			}
5038
5039			list_del(&bo->list);
5040
5041			assert(bo->domain != DOMAIN_GPU);
5042			bo->unique_id = kgem_get_unique_id(kgem);
5043			bo->pitch = pitch;
5044			bo->delta = 0;
5045			DBG(("  1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5046			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5047			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5048			assert_tiling(kgem, bo);
5049			bo->refcnt = 1;
5050
5051			if (flags & CREATE_SCANOUT)
5052				__kgem_bo_make_scanout(kgem, bo, width, height);
5053
5054			return bo;
5055		}
5056
5057		goto create;
5058	}
5059
5060	if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
5061		int for_cpu = !!(flags & CREATE_CPU_MAP);
5062		if (kgem->has_llc && tiling == I915_TILING_NONE)
5063			for_cpu = 1;
5064		/* We presume that we will need to upload to this bo,
5065		 * and so would prefer to have an active VMA.
5066		 */
5067		cache = &kgem->vma[for_cpu].inactive[bucket];
5068		do {
5069			list_for_each_entry(bo, cache, vma) {
5070				assert(bucket(bo) == bucket);
5071				assert(bo->refcnt == 0);
5072				assert(!bo->scanout);
5073				assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
5074				assert(bo->rq == NULL);
5075				assert(bo->exec == NULL);
5076				assert(list_is_empty(&bo->request));
5077				assert(bo->flush == false);
5078				assert_tiling(kgem, bo);
5079
5080				if (size > num_pages(bo)) {
5081					DBG(("inactive too small: %d < %d\n",
5082					     num_pages(bo), size));
5083					continue;
5084				}
5085
5086				if (flags & UNCACHED && !kgem->has_llc && bo->domain != DOMAIN_CPU)
5087					continue;
5088
5089				if (bo->tiling != tiling ||
5090				    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
5091					if (bo->map__gtt ||
5092					    !gem_set_tiling(kgem->fd, bo->handle,
5093							    tiling, pitch)) {
5094						DBG(("inactive GTT vma with wrong tiling: %d < %d\n",
5095						     bo->tiling, tiling));
5096						continue;
5097					}
5098					bo->tiling = tiling;
5099					bo->pitch = pitch;
5100				}
5101
5102				if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5103					kgem_bo_free(kgem, bo);
5104					break;
5105				}
5106
5107				assert(bo->tiling == tiling);
5108				bo->pitch = pitch;
5109				bo->delta = 0;
5110				bo->unique_id = kgem_get_unique_id(kgem);
5111
5112				kgem_bo_remove_from_inactive(kgem, bo);
5113				assert(list_is_empty(&bo->list));
5114				assert(list_is_empty(&bo->vma));
5115
5116				DBG(("  from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
5117				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5118				assert(bo->reusable);
5119				assert(bo->domain != DOMAIN_GPU);
5120				ASSERT_IDLE(kgem, bo->handle);
5121				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5122				assert_tiling(kgem, bo);
5123				bo->refcnt = 1;
5124				return bo;
5125			}
5126		} while (!list_is_empty(cache) &&
5127			 __kgem_throttle_retire(kgem, flags));
5128
5129		if (flags & CREATE_CPU_MAP && !kgem->has_llc) {
5130			if (list_is_empty(&kgem->active[bucket][tiling]) &&
5131			    list_is_empty(&kgem->inactive[bucket]))
5132				flags &= ~CREATE_CACHED;
5133
5134			goto create;
5135		}
5136	}
5137
5138	if (flags & CREATE_INACTIVE)
5139		goto skip_active_search;
5140
5141	/* Best active match */
5142	retry = NUM_CACHE_BUCKETS - bucket;
5143	if (retry > 3 && (flags & CREATE_TEMPORARY) == 0)
5144		retry = 3;
5145search_active:
5146	assert(bucket < NUM_CACHE_BUCKETS);
5147	cache = &kgem->active[bucket][tiling];
5148	if (tiling) {
5149		tiled_height = kgem_aligned_height(kgem, height, tiling);
5150		list_for_each_entry(bo, cache, list) {
5151			assert(!bo->purged);
5152			assert(bo->refcnt == 0);
5153			assert(bucket(bo) == bucket);
5154			assert(bo->reusable);
5155			assert(bo->tiling == tiling);
5156			assert(bo->flush == false);
5157			assert(!bo->scanout);
5158			assert_tiling(kgem, bo);
5159
5160			if (kgem->gen < 040) {
5161				if (bo->pitch < pitch) {
5162					DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
5163					     bo->tiling, tiling,
5164					     bo->pitch, pitch));
5165					continue;
5166				}
5167
5168				if (bo->pitch * tiled_height > bytes(bo))
5169					continue;
5170			} else {
5171				if (num_pages(bo) < size)
5172					continue;
5173
5174				if (bo->pitch != pitch) {
5175					if (!gem_set_tiling(kgem->fd,
5176							    bo->handle,
5177							    tiling, pitch))
5178						continue;
5179
5180					bo->pitch = pitch;
5181				}
5182			}
5183
5184			kgem_bo_remove_from_active(kgem, bo);
5185
5186			bo->unique_id = kgem_get_unique_id(kgem);
5187			bo->delta = 0;
5188			DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5189			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5190			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5191			assert_tiling(kgem, bo);
5192			bo->refcnt = 1;
5193			return bo;
5194		}
5195	} else {
5196		list_for_each_entry(bo, cache, list) {
5197			assert(bucket(bo) == bucket);
5198			assert(!bo->purged);
5199			assert(bo->refcnt == 0);
5200			assert(bo->reusable);
5201			assert(!bo->scanout);
5202			assert(bo->tiling == tiling);
5203			assert(bo->flush == false);
5204			assert_tiling(kgem, bo);
5205
5206			if (num_pages(bo) < size)
5207				continue;
5208
5209			kgem_bo_remove_from_active(kgem, bo);
5210
5211			bo->pitch = pitch;
5212			bo->unique_id = kgem_get_unique_id(kgem);
5213			bo->delta = 0;
5214			DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5215			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5216			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5217			assert_tiling(kgem, bo);
5218			bo->refcnt = 1;
5219			return bo;
5220		}
5221	}
5222
5223	if (kgem->gen >= 040) {
5224		for (i = I915_TILING_Y; i >= I915_TILING_NONE; i--) {
5225			cache = &kgem->active[bucket][i];
5226			list_for_each_entry(bo, cache, list) {
5227				assert(!bo->purged);
5228				assert(bo->refcnt == 0);
5229				assert(bo->reusable);
5230				assert(!bo->scanout);
5231				assert(bo->flush == false);
5232				assert_tiling(kgem, bo);
5233
5234				if (num_pages(bo) < size)
5235					continue;
5236
5237				if (bo->tiling != tiling ||
5238				    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
5239					if (!gem_set_tiling(kgem->fd,
5240							    bo->handle,
5241							    tiling, pitch))
5242						continue;
5243				}
5244
5245				kgem_bo_remove_from_active(kgem, bo);
5246
5247				bo->unique_id = kgem_get_unique_id(kgem);
5248				bo->pitch = pitch;
5249				bo->tiling = tiling;
5250				bo->delta = 0;
5251				DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5252				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5253				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5254				assert_tiling(kgem, bo);
5255				bo->refcnt = 1;
5256				return bo;
5257			}
5258		}
5259	} else if (!exact) { /* allow an active near-miss? */
5260		for (i = tiling; i >= I915_TILING_NONE; i--) {
5261			tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
5262							 width, height, bpp, tiling, &pitch);
5263			cache = active(kgem, tiled_height / PAGE_SIZE, i);
5264			tiled_height = kgem_aligned_height(kgem, height, i);
5265			list_for_each_entry(bo, cache, list) {
5266				assert(!bo->purged);
5267				assert(bo->refcnt == 0);
5268				assert(bo->reusable);
5269				assert(!bo->scanout);
5270				assert(bo->flush == false);
5271				assert_tiling(kgem, bo);
5272
5273				if (bo->tiling) {
5274					if (bo->pitch < pitch) {
5275						DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
5276						     bo->tiling, tiling,
5277						     bo->pitch, pitch));
5278						continue;
5279					}
5280				} else
5281					bo->pitch = pitch;
5282
5283				if (bo->pitch * tiled_height > bytes(bo))
5284					continue;
5285
5286				kgem_bo_remove_from_active(kgem, bo);
5287
5288				bo->unique_id = kgem_get_unique_id(kgem);
5289				bo->delta = 0;
5290				DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5291				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5292				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5293				assert_tiling(kgem, bo);
5294				bo->refcnt = 1;
5295				return bo;
5296			}
5297		}
5298	}
5299
5300	if (--retry) {
5301		bucket++;
5302		goto search_active;
5303	}
5304
5305skip_active_search:
5306	bucket = cache_bucket(size);
5307	retry = NUM_CACHE_BUCKETS - bucket;
5308	if (retry > 3)
5309		retry = 3;
5310search_inactive:
5311	/* Now just look for a close match and prefer any currently active */
5312	assert(bucket < NUM_CACHE_BUCKETS);
5313	cache = &kgem->inactive[bucket];
5314	list_for_each_entry(bo, cache, list) {
5315		assert(bucket(bo) == bucket);
5316		assert(bo->reusable);
5317		assert(!bo->scanout);
5318		assert(bo->flush == false);
5319		assert_tiling(kgem, bo);
5320
5321		if (size > num_pages(bo)) {
5322			DBG(("inactive too small: %d < %d\n",
5323			     num_pages(bo), size));
5324			continue;
5325		}
5326
5327		if (bo->tiling != tiling ||
5328		    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
5329			if (!gem_set_tiling(kgem->fd, bo->handle,
5330					    tiling, pitch))
5331				continue;
5332		}
5333
5334		if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5335			kgem_bo_free(kgem, bo);
5336			break;
5337		}
5338
5339		kgem_bo_remove_from_inactive(kgem, bo);
5340		assert(list_is_empty(&bo->list));
5341		assert(list_is_empty(&bo->vma));
5342
5343		bo->pitch = pitch;
5344		bo->tiling = tiling;
5345
5346		bo->delta = 0;
5347		bo->unique_id = kgem_get_unique_id(kgem);
5348		assert(bo->pitch);
5349		DBG(("  from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
5350		     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5351		assert(bo->refcnt == 0);
5352		assert(bo->reusable);
5353		assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
5354		ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
5355		assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5356		assert_tiling(kgem, bo);
5357		bo->refcnt = 1;
5358
5359		if (flags & CREATE_SCANOUT)
5360			__kgem_bo_make_scanout(kgem, bo, width, height);
5361
5362		return bo;
5363	}
5364
5365	if ((flags & CREATE_NO_RETIRE) == 0) {
5366		list_for_each_entry_reverse(bo, &kgem->active[bucket][tiling], list) {
5367			if (bo->exec)
5368				break;
5369
5370			if (size > num_pages(bo))
5371				continue;
5372
5373			if (__kgem_busy(kgem, bo->handle)) {
5374				if (flags & CREATE_NO_THROTTLE)
5375					goto no_retire;
5376
5377				do {
5378					if (!kgem->need_throttle) {
5379						DBG(("%s: not throttling for active handle=%d\n", __FUNCTION__, bo->handle));
5380						goto no_retire;
5381					}
5382
5383					__kgem_throttle(kgem, false);
5384				} while (__kgem_busy(kgem, bo->handle));
5385			}
5386
5387			DBG(("%s: flushed active handle=%d\n", __FUNCTION__, bo->handle));
5388
5389			kgem_bo_remove_from_active(kgem, bo);
5390			__kgem_bo_clear_busy(bo);
5391
5392			if (tiling != I915_TILING_NONE && bo->pitch != pitch) {
5393				if (!gem_set_tiling(kgem->fd, bo->handle, tiling, pitch)) {
5394					kgem_bo_free(kgem, bo);
5395					goto no_retire;
5396				}
5397			}
5398
5399			bo->pitch = pitch;
5400			bo->unique_id = kgem_get_unique_id(kgem);
5401			bo->delta = 0;
5402			DBG(("  2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5403			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5404			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5405			assert_tiling(kgem, bo);
5406			bo->refcnt = 1;
5407
5408			if (flags & CREATE_SCANOUT)
5409				__kgem_bo_make_scanout(kgem, bo, width, height);
5410
5411			return bo;
5412		}
5413no_retire:
5414		flags |= CREATE_NO_RETIRE;
5415	}
5416
5417	if (--retry) {
5418		bucket++;
5419		goto search_inactive;
5420	}
5421
5422create:
5423	if (flags & CREATE_CACHED) {
5424		DBG(("%s: no cached bo found, requested not to create a new bo\n", __FUNCTION__));
5425		return NULL;
5426	}
5427
5428	if (bucket >= NUM_CACHE_BUCKETS)
5429		size = ALIGN(size, 1024);
5430	handle = gem_create(kgem->fd, size);
5431	if (handle == 0) {
5432		DBG(("%s: kernel allocation (gem_create) failure\n", __FUNCTION__));
5433		return NULL;
5434	}
5435
5436	bo = __kgem_bo_alloc(handle, size);
5437	if (!bo) {
5438		DBG(("%s: malloc failed\n", __FUNCTION__));
5439		gem_close(kgem->fd, handle);
5440		return NULL;
5441	}
5442
5443	bo->unique_id = kgem_get_unique_id(kgem);
5444	if (tiling == I915_TILING_NONE ||
5445	    gem_set_tiling(kgem->fd, handle, tiling, pitch)) {
5446		bo->tiling = tiling;
5447		bo->pitch = pitch;
5448		if (flags & CREATE_SCANOUT)
5449			__kgem_bo_make_scanout(kgem, bo, width, height);
5450	} else {
5451		if (flags & CREATE_EXACT) {
5452			DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__));
5453			gem_close(kgem->fd, handle);
5454			free(bo);
5455			return NULL;
5456		}
5457	}
5458
5459	assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
5460	assert_tiling(kgem, bo);
5461
5462	debug_alloc__bo(kgem, bo);
5463
5464	DBG(("  new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n",
5465	     bo->pitch, bo->tiling, bo->handle, bo->unique_id,
5466	     size, num_pages(bo), bucket(bo)));
5467	return bo;
5468}
5469
5470struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
5471				   int width,
5472				   int height,
5473				   int bpp,
5474				   uint32_t flags)
5475{
5476	struct kgem_bo *bo;
5477	int stride, size;
5478
5479	if (DBG_NO_CPU)
5480		return NULL;
5481
5482	DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp));
5483
5484	if (kgem->has_llc) {
5485		bo = kgem_create_2d(kgem, width, height, bpp,
5486				    I915_TILING_NONE, flags);
5487		if (bo == NULL)
5488			return bo;
5489
5490		assert(bo->tiling == I915_TILING_NONE);
5491		assert_tiling(kgem, bo);
5492
5493		if (kgem_bo_map__cpu(kgem, bo) == NULL) {
5494			kgem_bo_destroy(kgem, bo);
5495			return NULL;
5496		}
5497
5498		return bo;
5499	}
5500
5501	assert(width > 0 && height > 0);
5502	stride = ALIGN(width, 2) * bpp >> 3;
5503	stride = ALIGN(stride, 4);
5504	size = stride * ALIGN(height, 2);
5505	assert(size >= PAGE_SIZE);
5506
5507	DBG(("%s: %dx%d, %d bpp, stride=%d\n",
5508	     __FUNCTION__, width, height, bpp, stride));
5509
5510	bo = search_snoop_cache(kgem, NUM_PAGES(size), 0);
5511	if (bo) {
5512		assert(bo->tiling == I915_TILING_NONE);
5513		assert_tiling(kgem, bo);
5514		assert(bo->snoop);
5515		bo->refcnt = 1;
5516		bo->pitch = stride;
5517		bo->unique_id = kgem_get_unique_id(kgem);
5518		return bo;
5519	}
5520
5521	if (kgem->has_caching) {
5522		bo = kgem_create_linear(kgem, size, flags);
5523		if (bo == NULL)
5524			return NULL;
5525
5526		assert(bo->tiling == I915_TILING_NONE);
5527		assert_tiling(kgem, bo);
5528
5529		assert(!__kgem_busy(kgem, bo->handle));
5530		if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) {
5531			kgem_bo_destroy(kgem, bo);
5532			return NULL;
5533		}
5534		bo->snoop = true;
5535
5536		if (kgem_bo_map__cpu(kgem, bo) == NULL) {
5537			kgem_bo_destroy(kgem, bo);
5538			return NULL;
5539		}
5540
5541		bo->pitch = stride;
5542		bo->unique_id = kgem_get_unique_id(kgem);
5543		return bo;
5544	}
5545
5546	if (kgem->has_userptr) {
5547		void *ptr;
5548
5549		/* XXX */
5550		//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
5551		if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE)))
5552			return NULL;
5553
5554		bo = kgem_create_map(kgem, ptr, size, false);
5555		if (bo == NULL) {
5556			free(ptr);
5557			return NULL;
5558		}
5559
5560		bo->pitch = stride;
5561		bo->unique_id = kgem_get_unique_id(kgem);
5562		return bo;
5563	}
5564
5565	return NULL;
5566}
5567
5568void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
5569{
5570	DBG(("%s: handle=%d, proxy? %d\n",
5571	     __FUNCTION__, bo->handle, bo->proxy != NULL));
5572
5573	if (bo->proxy) {
5574		assert(!bo->reusable);
5575		kgem_bo_binding_free(kgem, bo);
5576
5577		assert(list_is_empty(&bo->list));
5578		_list_del(&bo->vma);
5579		_list_del(&bo->request);
5580
5581		if (bo->io && bo->domain == DOMAIN_CPU)
5582			_kgem_bo_delete_buffer(kgem, bo);
5583
5584		kgem_bo_unref(kgem, bo->proxy);
5585
5586		if (DBG_NO_MALLOC_CACHE) {
5587			free(bo);
5588		} else {
5589			*(struct kgem_bo **)bo = __kgem_freed_bo;
5590			__kgem_freed_bo = bo;
5591		}
5592	} else
5593		__kgem_bo_destroy(kgem, bo);
5594}
5595
5596static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
5597{
5598	assert(bo->rq);
5599	assert(bo->exec == NULL);
5600	assert(bo->needs_flush);
5601
5602	/* The kernel will emit a flush *and* update its own flushing lists. */
5603	if (!__kgem_busy(kgem, bo->handle))
5604		__kgem_bo_clear_busy(bo);
5605
5606	DBG(("%s: handle=%d, busy?=%d\n",
5607	     __FUNCTION__, bo->handle, bo->rq != NULL));
5608}
5609
5610void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo)
5611{
5612	if (!bo->needs_flush)
5613		return;
5614
5615	kgem_bo_submit(kgem, bo);
5616
5617	/* If the kernel fails to emit the flush, then it will be forced when
5618	 * we assume direct access. And as the usual failure is EIO, we do
5619	 * not actually care.
5620	 */
5621	assert(bo->exec == NULL);
5622	if (bo->rq)
5623		__kgem_flush(kgem, bo);
5624
5625	/* Whatever actually happens, we can regard the GTT write domain
5626	 * as being flushed.
5627	 */
5628	bo->gtt_dirty = false;
5629	bo->needs_flush = false;
5630	bo->domain = DOMAIN_NONE;
5631}
5632
5633inline static bool nearly_idle(struct kgem *kgem)
5634{
5635	int ring = kgem->ring == KGEM_BLT;
5636
5637	if (list_is_singular(&kgem->requests[ring]))
5638		return true;
5639
5640	return __kgem_ring_is_idle(kgem, ring);
5641}
5642
5643inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
5644{
5645	if (kgem->needs_semaphore)
5646		return false;
5647
5648	if (bo->rq == NULL || RQ_RING(bo->rq) == kgem->ring)
5649		return false;
5650
5651	kgem->needs_semaphore = true;
5652	return true;
5653}
5654
5655inline static bool needs_reservation(struct kgem *kgem, struct kgem_bo *bo)
5656{
5657	if (kgem->needs_reservation)
5658		return false;
5659
5660	if (bo->presumed_offset)
5661		return false;
5662
5663	kgem->needs_reservation = true;
5664	return nearly_idle(kgem);
5665}
5666
5667inline static bool needs_batch_flush(struct kgem *kgem, struct kgem_bo *bo)
5668{
5669	bool flush = false;
5670
5671	if (needs_semaphore(kgem, bo)) {
5672		DBG(("%s: flushing before handle=%d for required semaphore\n", __FUNCTION__, bo->handle));
5673		flush = true;
5674	}
5675
5676	if (needs_reservation(kgem, bo)) {
5677		DBG(("%s: flushing before handle=%d for new reservation\n", __FUNCTION__, bo->handle));
5678		flush = true;
5679	}
5680
5681	return kgem->nreloc ? flush : false;
5682}
5683
5684static bool aperture_check(struct kgem *kgem, unsigned num_pages)
5685{
5686	struct drm_i915_gem_get_aperture aperture;
5687	int reserve;
5688
5689	if (kgem->aperture)
5690		return false;
5691
5692	/* Leave some space in case of alignment issues */
5693	reserve = kgem->aperture_mappable / 2;
5694	if (kgem->gen < 033 && reserve < kgem->aperture_max_fence)
5695		reserve = kgem->aperture_max_fence;
5696	if (!kgem->has_llc)
5697		reserve += kgem->nexec * PAGE_SIZE * 2;
5698
5699	DBG(("%s: num_pages=%d, holding %d pages in reserve, total aperture %d\n",
5700	     __FUNCTION__, num_pages, reserve, kgem->aperture_total));
5701	num_pages += reserve;
5702
5703	VG_CLEAR(aperture);
5704	aperture.aper_available_size = kgem->aperture_total;
5705	aperture.aper_available_size *= PAGE_SIZE;
5706	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
5707
5708	DBG(("%s: aperture required %ld bytes, available %ld bytes\n",
5709	     __FUNCTION__,
5710	     (long)num_pages * PAGE_SIZE,
5711	     (long)aperture.aper_available_size));
5712
5713	return num_pages <= aperture.aper_available_size / PAGE_SIZE;
5714}
5715
5716static inline bool kgem_flush(struct kgem *kgem, bool flush)
5717{
5718	if (unlikely(kgem->wedged))
5719		return false;
5720
5721	if (kgem->nreloc == 0)
5722		return true;
5723
5724	if (container_of(kgem, struct sna, kgem)->flags & SNA_POWERSAVE)
5725		return true;
5726
5727	if (kgem->flush == flush && kgem->aperture < kgem->aperture_low)
5728		return true;
5729
5730	DBG(("%s: opportunistic flushing? flush=%d,%d, aperture=%d/%d, idle?=%d\n",
5731	     __FUNCTION__, kgem->flush, flush, kgem->aperture, kgem->aperture_low, kgem_ring_is_idle(kgem, kgem->ring)));
5732	return !kgem_ring_is_idle(kgem, kgem->ring);
5733}
5734
5735bool kgem_check_bo(struct kgem *kgem, ...)
5736{
5737	va_list ap;
5738	struct kgem_bo *bo;
5739	int num_exec = 0;
5740	int num_pages = 0;
5741	bool flush = false;
5742	bool busy = true;
5743
5744	va_start(ap, kgem);
5745	while ((bo = va_arg(ap, struct kgem_bo *))) {
5746		while (bo->proxy)
5747			bo = bo->proxy;
5748		if (bo->exec)
5749			continue;
5750
5751		if (needs_batch_flush(kgem, bo)) {
5752			va_end(ap);
5753			return false;
5754		}
5755
5756		num_pages += num_pages(bo);
5757		num_exec++;
5758
5759		flush |= bo->flush;
5760		busy &= bo->rq != NULL;
5761	}
5762	va_end(ap);
5763
5764	DBG(("%s: num_pages=+%d, num_exec=+%d\n",
5765	     __FUNCTION__, num_pages, num_exec));
5766
5767	if (!num_pages)
5768		return true;
5769
5770	if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
5771		DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
5772		     kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
5773		return false;
5774	}
5775
5776	if (num_pages + kgem->aperture > kgem->aperture_high) {
5777		DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n",
5778		     __FUNCTION__, kgem->aperture, num_pages, kgem->aperture_high));
5779		return aperture_check(kgem, num_pages);
5780	}
5781
5782	if (busy)
5783		return true;
5784
5785	return kgem_flush(kgem, flush);
5786}
5787
5788bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
5789{
5790	assert(bo->refcnt);
5791	while (bo->proxy)
5792		bo = bo->proxy;
5793	assert(bo->refcnt);
5794
5795	if (bo->exec) {
5796		if (kgem->gen < 040 &&
5797		    bo->tiling != I915_TILING_NONE &&
5798		    (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
5799			uint32_t size;
5800
5801			assert(bo->tiling == I915_TILING_X);
5802
5803			if (kgem->nfence >= kgem->fence_max)
5804				return false;
5805
5806			if (kgem->aperture_fenced) {
5807				size = 3*kgem->aperture_fenced;
5808				if (kgem->aperture_total == kgem->aperture_mappable)
5809					size += kgem->aperture;
5810				if (size > kgem->aperture_fenceable &&
5811				    kgem_ring_is_idle(kgem, kgem->ring)) {
5812					DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
5813					return false;
5814				}
5815			}
5816
5817			size = kgem_bo_fenced_size(kgem, bo);
5818			if (size > kgem->aperture_max_fence)
5819				kgem->aperture_max_fence = size;
5820			size += kgem->aperture_fenced;
5821			if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
5822				size = 2 * kgem->aperture_max_fence;
5823			if (kgem->aperture_total == kgem->aperture_mappable)
5824				size += kgem->aperture;
5825			if (size > kgem->aperture_fenceable) {
5826				DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
5827				     __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
5828				return false;
5829			}
5830		}
5831
5832		return true;
5833	}
5834
5835	if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1)
5836		return false;
5837
5838	if (needs_batch_flush(kgem, bo))
5839		return false;
5840
5841	assert_tiling(kgem, bo);
5842	if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) {
5843		uint32_t size;
5844
5845		assert(bo->tiling == I915_TILING_X);
5846
5847		if (kgem->nfence >= kgem->fence_max)
5848			return false;
5849
5850		if (kgem->aperture_fenced) {
5851			size = 3*kgem->aperture_fenced;
5852			if (kgem->aperture_total == kgem->aperture_mappable)
5853				size += kgem->aperture;
5854			if (size > kgem->aperture_fenceable &&
5855			    kgem_ring_is_idle(kgem, kgem->ring)) {
5856				DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
5857				return false;
5858			}
5859		}
5860
5861		size = kgem_bo_fenced_size(kgem, bo);
5862		if (size > kgem->aperture_max_fence)
5863			kgem->aperture_max_fence = size;
5864		size += kgem->aperture_fenced;
5865		if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
5866			size = 2 * kgem->aperture_max_fence;
5867		if (kgem->aperture_total == kgem->aperture_mappable)
5868			size += kgem->aperture;
5869		if (size > kgem->aperture_fenceable) {
5870			DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
5871			     __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
5872			return false;
5873		}
5874	}
5875
5876	if (kgem->aperture + kgem->aperture_fenced + num_pages(bo) > kgem->aperture_high) {
5877		DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n",
5878		     __FUNCTION__, kgem->aperture, num_pages(bo), kgem->aperture_high));
5879		return aperture_check(kgem, num_pages(bo));
5880	}
5881
5882	if (bo->rq)
5883		return true;
5884
5885	return kgem_flush(kgem, bo->flush);
5886}
5887
5888bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
5889{
5890	va_list ap;
5891	struct kgem_bo *bo;
5892	int num_fence = 0;
5893	int num_exec = 0;
5894	int num_pages = 0;
5895	int fenced_size = 0;
5896	bool flush = false;
5897	bool busy = true;
5898
5899	va_start(ap, kgem);
5900	while ((bo = va_arg(ap, struct kgem_bo *))) {
5901		assert(bo->refcnt);
5902		while (bo->proxy)
5903			bo = bo->proxy;
5904		assert(bo->refcnt);
5905		if (bo->exec) {
5906			if (kgem->gen >= 040 || bo->tiling == I915_TILING_NONE)
5907				continue;
5908
5909			if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
5910				fenced_size += kgem_bo_fenced_size(kgem, bo);
5911				num_fence++;
5912			}
5913
5914			continue;
5915		}
5916
5917		if (needs_batch_flush(kgem, bo)) {
5918			va_end(ap);
5919			return false;
5920		}
5921
5922		assert_tiling(kgem, bo);
5923		num_pages += num_pages(bo);
5924		num_exec++;
5925		if (kgem->gen < 040 && bo->tiling) {
5926			uint32_t size = kgem_bo_fenced_size(kgem, bo);
5927			if (size > kgem->aperture_max_fence)
5928				kgem->aperture_max_fence = size;
5929			fenced_size += size;
5930			num_fence++;
5931		}
5932
5933		flush |= bo->flush;
5934		busy &= bo->rq != NULL;
5935	}
5936	va_end(ap);
5937
5938	if (num_fence) {
5939		uint32_t size;
5940
5941		if (kgem->nfence + num_fence > kgem->fence_max)
5942			return false;
5943
5944		if (kgem->aperture_fenced) {
5945			size = 3*kgem->aperture_fenced;
5946			if (kgem->aperture_total == kgem->aperture_mappable)
5947				size += kgem->aperture;
5948			if (size > kgem->aperture_fenceable &&
5949			    kgem_ring_is_idle(kgem, kgem->ring)) {
5950				DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
5951				return false;
5952			}
5953		}
5954
5955		size = kgem->aperture_fenced;
5956		size += fenced_size;
5957		if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
5958			size = 2 * kgem->aperture_max_fence;
5959		if (kgem->aperture_total == kgem->aperture_mappable)
5960			size += kgem->aperture;
5961		if (size > kgem->aperture_fenceable) {
5962			DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
5963			     __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
5964			return false;
5965		}
5966	}
5967
5968	if (num_pages == 0)
5969		return true;
5970
5971	if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem))
5972		return false;
5973
5974	if (num_pages + kgem->aperture > kgem->aperture_high - kgem->aperture_fenced) {
5975		DBG(("%s: final aperture usage (%d + %d + %d) is greater than high water mark (%d)\n",
5976		     __FUNCTION__, kgem->aperture, kgem->aperture_fenced, num_pages, kgem->aperture_high));
5977		return aperture_check(kgem, num_pages);
5978	}
5979
5980	if (busy)
5981		return true;
5982
5983	return kgem_flush(kgem, flush);
5984}
5985
5986uint32_t kgem_add_reloc(struct kgem *kgem,
5987			uint32_t pos,
5988			struct kgem_bo *bo,
5989			uint32_t read_write_domain,
5990			uint32_t delta)
5991{
5992	int index;
5993
5994	DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
5995	     __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
5996
5997	assert(kgem->gen < 0100);
5998	assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
5999
6000	index = kgem->nreloc++;
6001	assert(index < ARRAY_SIZE(kgem->reloc));
6002	kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
6003	if (bo) {
6004		assert(kgem->mode != KGEM_NONE);
6005		assert(bo->refcnt);
6006		while (bo->proxy) {
6007			DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
6008			     __FUNCTION__, bo->delta, bo->handle));
6009			delta += bo->delta;
6010			assert(bo->handle == bo->proxy->handle);
6011			/* need to release the cache upon batch submit */
6012			if (bo->exec == NULL) {
6013				list_move_tail(&bo->request,
6014					       &kgem->next_request->buffers);
6015				bo->rq = MAKE_REQUEST(kgem->next_request,
6016						      kgem->ring);
6017				bo->exec = &_kgem_dummy_exec;
6018				bo->domain = DOMAIN_GPU;
6019			}
6020
6021			if (read_write_domain & 0x7fff && !bo->gpu_dirty)
6022				__kgem_bo_mark_dirty(bo);
6023
6024			bo = bo->proxy;
6025			assert(bo->refcnt);
6026		}
6027		assert(bo->refcnt);
6028
6029		if (bo->exec == NULL)
6030			kgem_add_bo(kgem, bo);
6031		assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
6032		assert(RQ_RING(bo->rq) == kgem->ring);
6033
6034		if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
6035			if (bo->tiling &&
6036			    (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
6037				assert(bo->tiling == I915_TILING_X);
6038				assert(kgem->nfence < kgem->fence_max);
6039				kgem->aperture_fenced +=
6040					kgem_bo_fenced_size(kgem, bo);
6041				kgem->nfence++;
6042			}
6043			bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
6044		}
6045
6046		kgem->reloc[index].delta = delta;
6047		kgem->reloc[index].target_handle = bo->target_handle;
6048		kgem->reloc[index].presumed_offset = bo->presumed_offset;
6049
6050		if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
6051			assert(!bo->snoop || kgem->can_blt_cpu);
6052			__kgem_bo_mark_dirty(bo);
6053		}
6054
6055		delta += bo->presumed_offset;
6056	} else {
6057		kgem->reloc[index].delta = delta;
6058		kgem->reloc[index].target_handle = ~0U;
6059		kgem->reloc[index].presumed_offset = 0;
6060		if (kgem->nreloc__self < 256)
6061			kgem->reloc__self[kgem->nreloc__self++] = index;
6062	}
6063	kgem->reloc[index].read_domains = read_write_domain >> 16;
6064	kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
6065
6066	return delta;
6067}
6068
6069uint64_t kgem_add_reloc64(struct kgem *kgem,
6070			  uint32_t pos,
6071			  struct kgem_bo *bo,
6072			  uint32_t read_write_domain,
6073			  uint64_t delta)
6074{
6075	int index;
6076
6077	DBG(("%s: handle=%d, pos=%d, delta=%ld, domains=%08x\n",
6078	     __FUNCTION__, bo ? bo->handle : 0, pos, (long)delta, read_write_domain));
6079
6080	assert(kgem->gen >= 0100);
6081	assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
6082
6083	index = kgem->nreloc++;
6084	assert(index < ARRAY_SIZE(kgem->reloc));
6085	kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
6086	if (bo) {
6087		assert(kgem->mode != KGEM_NONE);
6088		assert(bo->refcnt);
6089		while (bo->proxy) {
6090			DBG(("%s: adding proxy [delta=%ld] for handle=%d\n",
6091			     __FUNCTION__, (long)bo->delta, bo->handle));
6092			delta += bo->delta;
6093			assert(bo->handle == bo->proxy->handle);
6094			/* need to release the cache upon batch submit */
6095			if (bo->exec == NULL) {
6096				list_move_tail(&bo->request,
6097					       &kgem->next_request->buffers);
6098				bo->rq = MAKE_REQUEST(kgem->next_request,
6099						      kgem->ring);
6100				bo->exec = &_kgem_dummy_exec;
6101				bo->domain = DOMAIN_GPU;
6102			}
6103
6104			if (read_write_domain & 0x7fff && !bo->gpu_dirty)
6105				__kgem_bo_mark_dirty(bo);
6106
6107			bo = bo->proxy;
6108			assert(bo->refcnt);
6109		}
6110		assert(bo->refcnt);
6111
6112		if (bo->exec == NULL)
6113			kgem_add_bo(kgem, bo);
6114		assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
6115		assert(RQ_RING(bo->rq) == kgem->ring);
6116
6117		DBG(("%s[%d] = (delta=%d, target handle=%d, presumed=%llx)\n",
6118					__FUNCTION__, index, delta, bo->target_handle, (long long)bo->presumed_offset));
6119		kgem->reloc[index].delta = delta;
6120		kgem->reloc[index].target_handle = bo->target_handle;
6121		kgem->reloc[index].presumed_offset = bo->presumed_offset;
6122
6123		if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
6124			assert(!bo->snoop || kgem->can_blt_cpu);
6125			__kgem_bo_mark_dirty(bo);
6126		}
6127
6128		delta += bo->presumed_offset;
6129	} else {
6130		DBG(("%s[%d] = (delta=%d, target handle=batch)\n",
6131					__FUNCTION__, index, delta));
6132		kgem->reloc[index].delta = delta;
6133		kgem->reloc[index].target_handle = ~0U;
6134		kgem->reloc[index].presumed_offset = 0;
6135		if (kgem->nreloc__self < 256)
6136			kgem->reloc__self[kgem->nreloc__self++] = index;
6137	}
6138	kgem->reloc[index].read_domains = read_write_domain >> 16;
6139	kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
6140
6141	return delta;
6142}
6143
6144static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
6145{
6146	int i, j;
6147
6148	DBG(("%s: type=%d, count=%d (bucket: %d)\n",
6149	     __FUNCTION__, type, kgem->vma[type].count, bucket));
6150	if (kgem->vma[type].count <= 0)
6151	       return;
6152
6153	if (kgem->need_purge)
6154		kgem_purge_cache(kgem);
6155
6156	/* vma are limited on a per-process basis to around 64k.
6157	 * This includes all malloc arenas as well as other file
6158	 * mappings. In order to be fair and not hog the cache,
6159	 * and more importantly not to exhaust that limit and to
6160	 * start failing mappings, we keep our own number of open
6161	 * vma to within a conservative value.
6162	 */
6163	i = 0;
6164	while (kgem->vma[type].count > 0) {
6165		struct kgem_bo *bo = NULL;
6166
6167		for (j = 0;
6168		     bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
6169		     j++) {
6170			struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
6171			if (!list_is_empty(head))
6172				bo = list_last_entry(head, struct kgem_bo, vma);
6173		}
6174		if (bo == NULL)
6175			break;
6176
6177		DBG(("%s: discarding inactive %s vma cache for %d\n",
6178		     __FUNCTION__, type ? "CPU" : "GTT", bo->handle));
6179
6180		assert(bo->rq == NULL);
6181		if (type) {
6182			VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
6183			munmap(MAP(bo->map__cpu), bytes(bo));
6184			bo->map__cpu = NULL;
6185		} else {
6186			if (bo->map__wc) {
6187				VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
6188				munmap(bo->map__wc, bytes(bo));
6189				bo->map__wc = NULL;
6190			}
6191			if (bo->map__gtt) {
6192				munmap(bo->map__gtt, bytes(bo));
6193				bo->map__gtt = NULL;
6194			}
6195		}
6196
6197		list_del(&bo->vma);
6198		kgem->vma[type].count--;
6199
6200		if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
6201			DBG(("%s: freeing unpurgeable old mapping\n",
6202			     __FUNCTION__));
6203			kgem_bo_free(kgem, bo);
6204		}
6205	}
6206}
6207
6208static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo)
6209{
6210	void *ptr;
6211
6212	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
6213
6214	assert(bo->proxy == NULL);
6215	assert(!bo->snoop);
6216
6217	kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
6218
6219	if (bo->tiling || !kgem->has_wc_mmap) {
6220		assert(num_pages(bo) <= kgem->aperture_mappable / 2);
6221		assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
6222
6223		ptr = bo->map__gtt;
6224		if (ptr == NULL)
6225			ptr = __kgem_bo_map__gtt(kgem, bo);
6226	} else {
6227		ptr = bo->map__wc;
6228		if (ptr == NULL)
6229			ptr = __kgem_bo_map__wc(kgem, bo);
6230	}
6231
6232	return ptr;
6233}
6234
6235void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
6236{
6237	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6238	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6239
6240	assert(bo->proxy == NULL);
6241	assert(list_is_empty(&bo->list));
6242	assert_tiling(kgem, bo);
6243	assert(!bo->purged || bo->reusable);
6244
6245	if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) {
6246		DBG(("%s: converting request for GTT map into CPU map\n",
6247		     __FUNCTION__));
6248		return kgem_bo_map__cpu(kgem, bo);
6249	}
6250
6251	return __kgem_bo_map__gtt_or_wc(kgem, bo);
6252}
6253
6254void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
6255{
6256	void *ptr;
6257
6258	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6259	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6260
6261	assert(bo->proxy == NULL);
6262	assert(list_is_empty(&bo->list));
6263	assert(bo->exec == NULL);
6264	assert_tiling(kgem, bo);
6265	assert(!bo->purged || bo->reusable);
6266
6267	if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
6268	    (kgem->has_llc || bo->domain == DOMAIN_CPU)) {
6269		DBG(("%s: converting request for GTT map into CPU map\n",
6270		     __FUNCTION__));
6271		ptr = kgem_bo_map__cpu(kgem, bo);
6272		if (ptr)
6273			kgem_bo_sync__cpu(kgem, bo);
6274		return ptr;
6275	}
6276
6277	ptr = __kgem_bo_map__gtt_or_wc(kgem, bo);
6278
6279	if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
6280		struct drm_i915_gem_set_domain set_domain;
6281
6282		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
6283		     bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
6284
6285		/* XXX use PROT_READ to avoid the write flush? */
6286
6287		VG_CLEAR(set_domain);
6288		set_domain.handle = bo->handle;
6289		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
6290		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
6291		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
6292			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
6293			kgem_throttle(kgem);
6294		}
6295		kgem_bo_retire(kgem, bo);
6296		bo->domain = DOMAIN_GTT;
6297		bo->gtt_dirty = true;
6298	}
6299
6300	return ptr;
6301}
6302
6303void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
6304{
6305	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6306	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6307
6308	assert(bo->proxy == NULL);
6309	assert(bo->exec == NULL);
6310	assert(list_is_empty(&bo->list));
6311	assert_tiling(kgem, bo);
6312	assert(!bo->purged || bo->reusable);
6313
6314	return __kgem_bo_map__gtt_or_wc(kgem, bo);
6315}
6316
6317void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
6318{
6319	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6320	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6321
6322	assert(bo->proxy == NULL);
6323	assert(bo->exec == NULL);
6324	assert(list_is_empty(&bo->list));
6325	assert_tiling(kgem, bo);
6326	assert(!bo->purged || bo->reusable);
6327
6328	if (bo->map__wc)
6329		return bo->map__wc;
6330
6331	return __kgem_bo_map__wc(kgem, bo);
6332}
6333
6334void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
6335{
6336	DBG(("%s(handle=%d, size=%d, map=%p:%p)\n",
6337	     __FUNCTION__, bo->handle, bytes(bo), bo->map__gtt, bo->map__cpu));
6338	assert(!bo->purged);
6339	assert(list_is_empty(&bo->list));
6340	assert(bo->proxy == NULL);
6341	assert_tiling(kgem, bo);
6342
6343	if (bo->map__cpu)
6344		return MAP(bo->map__cpu);
6345
6346	kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
6347
6348	return __kgem_bo_map__cpu(kgem, bo);
6349}
6350
6351void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
6352{
6353	void *ptr;
6354
6355	if (bo->tiling == I915_TILING_NONE && kgem->has_llc) {
6356		ptr = MAP(bo->map__cpu);
6357		if (ptr == NULL)
6358			ptr = __kgem_bo_map__cpu(kgem, bo);
6359	} else if (bo->tiling || !kgem->has_wc_mmap) {
6360		ptr = bo->map__gtt;
6361		if (ptr == NULL)
6362			ptr = __kgem_bo_map__gtt(kgem, bo);
6363	} else {
6364		ptr = bo->map__wc;
6365		if (ptr == NULL)
6366			ptr = __kgem_bo_map__wc(kgem, bo);
6367	}
6368
6369	return ptr;
6370}
6371
6372
6373uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
6374{
6375	struct drm_gem_flink flink;
6376
6377	VG_CLEAR(flink);
6378	flink.handle = bo->handle;
6379	if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink))
6380		return 0;
6381
6382	DBG(("%s: flinked handle=%d to name=%d, marking non-reusable\n",
6383	     __FUNCTION__, flink.handle, flink.name));
6384
6385	/* Ordinarily giving the name aware makes the buffer non-reusable.
6386	 * However, we track the lifetime of all clients and their hold
6387	 * on the buffer, and *presuming* they do not pass it on to a third
6388	 * party, we track the lifetime accurately.
6389	 */
6390	bo->reusable = false;
6391
6392	kgem_bo_unclean(kgem, bo);
6393
6394	return flink.name;
6395}
6396
6397struct kgem_bo *kgem_create_map(struct kgem *kgem,
6398				void *ptr, uint32_t size,
6399				bool read_only)
6400{
6401	struct kgem_bo *bo;
6402	uintptr_t first_page, last_page;
6403	uint32_t handle;
6404
6405	assert(MAP(ptr) == ptr);
6406
6407	DBG(("%s(%p size=%d, read-only?=%d) - has_userptr?=%d\n", __FUNCTION__,
6408	     ptr, size, read_only, kgem->has_userptr));
6409	if (!kgem->has_userptr)
6410		return NULL;
6411
6412	first_page = (uintptr_t)ptr;
6413	last_page = first_page + size + PAGE_SIZE - 1;
6414
6415	first_page &= ~(PAGE_SIZE-1);
6416	last_page &= ~(PAGE_SIZE-1);
6417	assert(last_page > first_page);
6418
6419	handle = gem_userptr(kgem->fd,
6420			     (void *)first_page, last_page-first_page,
6421			     read_only);
6422	if (handle == 0) {
6423		DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno));
6424		return NULL;
6425	}
6426
6427	bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE);
6428	if (bo == NULL) {
6429		gem_close(kgem->fd, handle);
6430		return NULL;
6431	}
6432
6433	bo->unique_id = kgem_get_unique_id(kgem);
6434	bo->snoop = !kgem->has_llc;
6435	debug_alloc__bo(kgem, bo);
6436
6437	if (first_page != (uintptr_t)ptr) {
6438		struct kgem_bo *proxy;
6439
6440		proxy = kgem_create_proxy(kgem, bo,
6441					  (uintptr_t)ptr - first_page, size);
6442		kgem_bo_destroy(kgem, bo);
6443		if (proxy == NULL)
6444			return NULL;
6445
6446		bo = proxy;
6447	}
6448
6449	bo->map__cpu = MAKE_USER_MAP(ptr);
6450
6451	DBG(("%s(ptr=%p, size=%d, pages=%d, read_only=%d) => handle=%d (proxy? %d)\n",
6452	     __FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle, bo->proxy != NULL));
6453	return bo;
6454}
6455
6456void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
6457{
6458	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
6459	assert(!bo->scanout);
6460	assert_tiling(kgem, bo);
6461
6462	kgem_bo_submit(kgem, bo);
6463
6464	/* SHM pixmaps use proxies for subpage offsets */
6465	assert(!bo->purged);
6466	while (bo->proxy)
6467		bo = bo->proxy;
6468	assert(!bo->purged);
6469
6470	if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
6471		struct drm_i915_gem_set_domain set_domain;
6472
6473		DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
6474		     __FUNCTION__, bo->handle,
6475		     bo->needs_flush, bo->domain,
6476		     __kgem_busy(kgem, bo->handle)));
6477
6478		VG_CLEAR(set_domain);
6479		set_domain.handle = bo->handle;
6480		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
6481		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
6482
6483		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
6484			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
6485			kgem_throttle(kgem);
6486		}
6487		kgem_bo_retire(kgem, bo);
6488		bo->domain = DOMAIN_CPU;
6489	}
6490}
6491
6492void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write)
6493{
6494	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
6495	assert(!bo->scanout || !write);
6496	assert_tiling(kgem, bo);
6497
6498	if (write || bo->needs_flush)
6499		kgem_bo_submit(kgem, bo);
6500
6501	/* SHM pixmaps use proxies for subpage offsets */
6502	assert(!bo->purged);
6503	assert(bo->refcnt);
6504	while (bo->proxy)
6505		bo = bo->proxy;
6506	assert(bo->refcnt);
6507	assert(!bo->purged);
6508
6509	if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
6510		struct drm_i915_gem_set_domain set_domain;
6511
6512		DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
6513		     __FUNCTION__, bo->handle,
6514		     bo->needs_flush, bo->domain,
6515		     __kgem_busy(kgem, bo->handle)));
6516
6517		VG_CLEAR(set_domain);
6518		set_domain.handle = bo->handle;
6519		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
6520		set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0;
6521
6522		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
6523			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
6524			kgem_throttle(kgem);
6525		}
6526		if (write) {
6527			kgem_bo_retire(kgem, bo);
6528			bo->domain = DOMAIN_CPU;
6529		} else {
6530			if (bo->exec == NULL)
6531				kgem_bo_maybe_retire(kgem, bo);
6532			bo->domain = DOMAIN_NONE;
6533		}
6534	}
6535}
6536
6537void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
6538{
6539	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
6540	assert(bo->refcnt);
6541	assert(bo->proxy == NULL);
6542	assert_tiling(kgem, bo);
6543
6544	kgem_bo_submit(kgem, bo);
6545
6546	if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
6547		struct drm_i915_gem_set_domain set_domain;
6548
6549		DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
6550		     __FUNCTION__, bo->handle,
6551		     bo->needs_flush, bo->domain,
6552		     __kgem_busy(kgem, bo->handle)));
6553
6554		VG_CLEAR(set_domain);
6555		set_domain.handle = bo->handle;
6556		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
6557		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
6558
6559		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
6560			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
6561			kgem_throttle(kgem);
6562		}
6563		kgem_bo_retire(kgem, bo);
6564		bo->domain = DOMAIN_GTT;
6565		bo->gtt_dirty = true;
6566	}
6567}
6568
6569void kgem_clear_dirty(struct kgem *kgem)
6570{
6571	struct list * const buffers = &kgem->next_request->buffers;
6572	struct kgem_bo *bo;
6573
6574	list_for_each_entry(bo, buffers, request) {
6575		if (!bo->gpu_dirty)
6576			break;
6577
6578		bo->gpu_dirty = false;
6579	}
6580}
6581
6582struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
6583				  struct kgem_bo *target,
6584				  int offset, int length)
6585{
6586	struct kgem_bo *bo;
6587
6588	DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
6589	     __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
6590	     offset, length, target->io));
6591
6592	bo = __kgem_bo_alloc(target->handle, length);
6593	if (bo == NULL)
6594		return NULL;
6595
6596	bo->unique_id = kgem_get_unique_id(kgem);
6597	bo->reusable = false;
6598	bo->size.bytes = length;
6599
6600	bo->io = target->io && target->proxy == NULL;
6601	bo->gpu_dirty = target->gpu_dirty;
6602	bo->tiling = target->tiling;
6603	bo->pitch = target->pitch;
6604	bo->flush = target->flush;
6605	bo->snoop = target->snoop;
6606
6607	assert(!bo->scanout);
6608	bo->proxy = kgem_bo_reference(target);
6609	bo->delta = offset;
6610
6611	/* Proxies are only tracked for busyness on the current rq */
6612	if (target->exec && !bo->io) {
6613		assert(RQ(target->rq) == kgem->next_request);
6614		list_move_tail(&bo->request, &kgem->next_request->buffers);
6615		bo->exec = &_kgem_dummy_exec;
6616		bo->rq = target->rq;
6617	}
6618
6619	return bo;
6620}
6621
6622static struct kgem_buffer *
6623buffer_alloc(void)
6624{
6625	struct kgem_buffer *bo;
6626
6627	bo = malloc(sizeof(*bo));
6628	if (bo == NULL)
6629		return NULL;
6630
6631	bo->mem = NULL;
6632	bo->need_io = false;
6633	bo->mmapped = MMAPPED_CPU;
6634
6635	return bo;
6636}
6637
6638static struct kgem_buffer *
6639buffer_alloc_with_data(int num_pages)
6640{
6641	struct kgem_buffer *bo;
6642
6643	bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE);
6644	if (bo == NULL)
6645		return NULL;
6646
6647	bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT);
6648	bo->mmapped = false;
6649	return bo;
6650}
6651
6652static inline bool
6653use_snoopable_buffer(struct kgem *kgem, uint32_t flags)
6654{
6655	if ((flags & KGEM_BUFFER_WRITE) == 0)
6656		return kgem->gen >= 030;
6657
6658	return true;
6659}
6660
6661static void
6662init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
6663{
6664	DBG(("%s: reusing handle=%d for buffer\n",
6665	     __FUNCTION__, old->handle));
6666
6667	assert(old->proxy == NULL);
6668	assert(list_is_empty(&old->list));
6669
6670	memcpy(&bo->base, old, sizeof(*old));
6671	if (old->rq)
6672		list_replace(&old->request, &bo->base.request);
6673	else
6674		list_init(&bo->base.request);
6675	list_replace(&old->vma, &bo->base.vma);
6676	list_init(&bo->base.list);
6677	free(old);
6678
6679	assert(bo->base.tiling == I915_TILING_NONE);
6680
6681	bo->base.refcnt = 1;
6682}
6683
6684static struct kgem_buffer *
6685search_snoopable_buffer(struct kgem *kgem, unsigned alloc)
6686{
6687	struct kgem_buffer *bo;
6688	struct kgem_bo *old;
6689
6690	old = search_snoop_cache(kgem, alloc, 0);
6691	if (old) {
6692		if (!old->io) {
6693			bo = buffer_alloc();
6694			if (bo == NULL)
6695				return NULL;
6696
6697			init_buffer_from_bo(bo, old);
6698		} else {
6699			bo = (struct kgem_buffer *)old;
6700			bo->base.refcnt = 1;
6701		}
6702
6703		DBG(("%s: created CPU handle=%d for buffer, size %d\n",
6704		     __FUNCTION__, bo->base.handle, num_pages(&bo->base)));
6705
6706		assert(bo->base.snoop);
6707		assert(bo->base.tiling == I915_TILING_NONE);
6708		assert(num_pages(&bo->base) >= alloc);
6709		assert(bo->mmapped == MMAPPED_CPU);
6710		assert(bo->need_io == false);
6711
6712		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
6713		if (bo->mem == NULL) {
6714			bo->base.refcnt = 0;
6715			kgem_bo_free(kgem, &bo->base);
6716			bo = NULL;
6717		}
6718
6719		return bo;
6720	}
6721
6722	return NULL;
6723}
6724
6725static struct kgem_buffer *
6726create_snoopable_buffer(struct kgem *kgem, unsigned alloc)
6727{
6728	struct kgem_buffer *bo;
6729	uint32_t handle;
6730
6731	if (kgem->has_llc) {
6732		struct kgem_bo *old;
6733
6734		bo = buffer_alloc();
6735		if (bo == NULL)
6736			return NULL;
6737
6738		old = search_linear_cache(kgem, alloc,
6739					 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
6740		if (old) {
6741			init_buffer_from_bo(bo, old);
6742		} else {
6743			handle = gem_create(kgem->fd, alloc);
6744			if (handle == 0) {
6745				free(bo);
6746				return NULL;
6747			}
6748
6749			__kgem_bo_init(&bo->base, handle, alloc);
6750			debug_alloc__bo(kgem, &bo->base);
6751			DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n",
6752			     __FUNCTION__, bo->base.handle, alloc));
6753		}
6754
6755		assert(bo->base.refcnt == 1);
6756		assert(bo->mmapped == MMAPPED_CPU);
6757		assert(bo->need_io == false);
6758
6759		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
6760		if (bo->mem != NULL)
6761			return bo;
6762
6763		bo->base.refcnt = 0; /* for valgrind */
6764		kgem_bo_free(kgem, &bo->base);
6765	}
6766
6767	if (kgem->has_caching) {
6768		struct kgem_bo *old;
6769
6770		bo = buffer_alloc();
6771		if (bo == NULL)
6772			return NULL;
6773
6774		old = search_linear_cache(kgem, alloc,
6775					 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
6776		if (old) {
6777			init_buffer_from_bo(bo, old);
6778		} else {
6779			handle = gem_create(kgem->fd, alloc);
6780			if (handle == 0) {
6781				free(bo);
6782				return NULL;
6783			}
6784
6785			__kgem_bo_init(&bo->base, handle, alloc);
6786			debug_alloc__bo(kgem, &bo->base);
6787			DBG(("%s: created CPU handle=%d for buffer, size %d\n",
6788			     __FUNCTION__, bo->base.handle, alloc));
6789		}
6790
6791		assert(bo->base.refcnt == 1);
6792		assert(bo->mmapped == MMAPPED_CPU);
6793		assert(bo->need_io == false);
6794		assert(!__kgem_busy(kgem, bo->base.handle));
6795
6796		if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED))
6797			goto free_caching;
6798
6799		bo->base.snoop = true;
6800
6801		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
6802		if (bo->mem == NULL)
6803			goto free_caching;
6804
6805		return bo;
6806
6807free_caching:
6808		bo->base.refcnt = 0; /* for valgrind */
6809		kgem_bo_free(kgem, &bo->base);
6810	}
6811
6812	if (kgem->has_userptr) {
6813		bo = buffer_alloc();
6814		if (bo == NULL)
6815			return NULL;
6816
6817		//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
6818		if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) {
6819			free(bo);
6820			return NULL;
6821		}
6822
6823		handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false);
6824		if (handle == 0) {
6825			free(bo->mem);
6826			free(bo);
6827			return NULL;
6828		}
6829
6830		__kgem_bo_init(&bo->base, handle, alloc);
6831		debug_alloc__bo(kgem, &bo->base);
6832		DBG(("%s: created snoop handle=%d for buffer\n",
6833		     __FUNCTION__, bo->base.handle));
6834
6835		assert(bo->mmapped == MMAPPED_CPU);
6836		assert(bo->need_io == false);
6837
6838		bo->base.refcnt = 1;
6839		bo->base.snoop = true;
6840		bo->base.map__cpu = MAKE_USER_MAP(bo->mem);
6841
6842		return bo;
6843	}
6844
6845	return NULL;
6846}
6847
6848struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
6849				   uint32_t size, uint32_t flags,
6850				   void **ret)
6851{
6852	struct kgem_buffer *bo;
6853	unsigned offset, alloc;
6854	struct kgem_bo *old;
6855
6856	DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n",
6857	     __FUNCTION__, size, flags,
6858	     !!(flags & KGEM_BUFFER_WRITE),
6859	     !!(flags & KGEM_BUFFER_INPLACE),
6860	     !!(flags & KGEM_BUFFER_LAST)));
6861	assert(size);
6862	/* we should never be asked to create anything TOO large */
6863	assert(size <= kgem->max_object_size);
6864
6865#if !DBG_NO_UPLOAD_CACHE
6866	list_for_each_entry(bo, &kgem->batch_buffers, base.list) {
6867		assert(bo->base.io);
6868		assert(bo->base.refcnt >= 1);
6869
6870		/* We can reuse any write buffer which we can fit */
6871		if (flags == KGEM_BUFFER_LAST &&
6872		    bo->write == KGEM_BUFFER_WRITE &&
6873		    bo->base.refcnt == 1 &&
6874		    bo->mmapped == MMAPPED_NONE &&
6875		    size <= bytes(&bo->base)) {
6876			DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n",
6877			     __FUNCTION__, size, bo->used, bytes(&bo->base)));
6878			gem_write__cachealigned(kgem->fd, bo->base.handle,
6879						0, bo->used, bo->mem);
6880			assert(list_is_empty(&bo->base.vma));
6881			bo->need_io = 0;
6882			bo->write = 0;
6883			offset = 0;
6884			bo->used = size;
6885			goto done;
6886		}
6887
6888		if (flags & KGEM_BUFFER_WRITE) {
6889			if ((bo->write & KGEM_BUFFER_WRITE) == 0 ||
6890			    (((bo->write & ~flags) & KGEM_BUFFER_INPLACE) &&
6891			     !bo->base.snoop)) {
6892				DBG(("%s: skip write %x buffer, need %x\n",
6893				     __FUNCTION__, bo->write, flags));
6894				continue;
6895			}
6896			assert(bo->mmapped || bo->need_io);
6897		} else {
6898			if (bo->write & KGEM_BUFFER_WRITE) {
6899				DBG(("%s: skip write %x buffer, need %x\n",
6900				     __FUNCTION__, bo->write, flags));
6901				continue;
6902			}
6903		}
6904
6905		if (bo->used + size <= bytes(&bo->base)) {
6906			DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
6907			     __FUNCTION__, bo->used, size, bytes(&bo->base)));
6908			offset = bo->used;
6909			bo->used += size;
6910			goto done;
6911		}
6912	}
6913
6914	if (flags & KGEM_BUFFER_WRITE) {
6915		list_for_each_entry(bo, &kgem->active_buffers, base.list) {
6916			assert(bo->base.io);
6917			assert(bo->base.refcnt >= 1);
6918			assert(bo->base.exec == NULL);
6919			assert(bo->mmapped);
6920			assert(bo->mmapped == MMAPPED_GTT || kgem->has_llc || bo->base.snoop);
6921
6922			if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE && !bo->base.snoop) {
6923				DBG(("%s: skip write %x buffer, need %x\n",
6924				     __FUNCTION__, bo->write, flags));
6925				continue;
6926			}
6927
6928			if (bo->used + size <= bytes(&bo->base)) {
6929				DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
6930				     __FUNCTION__, bo->used, size, bytes(&bo->base)));
6931				offset = bo->used;
6932				bo->used += size;
6933				list_move(&bo->base.list, &kgem->batch_buffers);
6934				goto done;
6935			}
6936
6937			if (bo->base.refcnt == 1 &&
6938			    size <= bytes(&bo->base) &&
6939			    (bo->base.rq == NULL ||
6940			     !__kgem_busy(kgem, bo->base.handle))) {
6941				DBG(("%s: reusing whole buffer? size=%d, total=%d\n",
6942				     __FUNCTION__, size, bytes(&bo->base)));
6943				__kgem_bo_clear_busy(&bo->base);
6944				assert(list_is_empty(&bo->base.vma));
6945
6946				switch (bo->mmapped) {
6947				case MMAPPED_CPU:
6948					kgem_bo_sync__cpu(kgem, &bo->base);
6949					break;
6950				case MMAPPED_GTT:
6951					kgem_bo_sync__gtt(kgem, &bo->base);
6952					break;
6953				}
6954
6955				offset = 0;
6956				bo->used = size;
6957				list_move(&bo->base.list, &kgem->batch_buffers);
6958				goto done;
6959			}
6960		}
6961	}
6962#endif
6963
6964#if !DBG_NO_MAP_UPLOAD
6965	/* Be a little more generous and hope to hold fewer mmappings */
6966	alloc = ALIGN(2*size, kgem->buffer_size);
6967	if (alloc > MAX_CACHE_SIZE)
6968		alloc = ALIGN(size, kgem->buffer_size);
6969	if (alloc > MAX_CACHE_SIZE)
6970		alloc = PAGE_ALIGN(size);
6971	assert(alloc);
6972
6973	alloc /= PAGE_SIZE;
6974	if (alloc > kgem->aperture_mappable / 4 && !kgem->has_wc_mmap)
6975		flags &= ~KGEM_BUFFER_INPLACE;
6976
6977	if (kgem->has_llc &&
6978	    (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) {
6979		bo = buffer_alloc();
6980		if (bo == NULL)
6981			goto skip_llc;
6982
6983		old = NULL;
6984		if ((flags & KGEM_BUFFER_WRITE) == 0)
6985			old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP);
6986		if (old == NULL)
6987			old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP);
6988		if (old == NULL)
6989			old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP);
6990		if (old) {
6991			DBG(("%s: found LLC handle=%d for buffer\n",
6992			     __FUNCTION__, old->handle));
6993
6994			init_buffer_from_bo(bo, old);
6995		} else {
6996			uint32_t handle = gem_create(kgem->fd, alloc);
6997			if (handle == 0) {
6998				free(bo);
6999				goto skip_llc;
7000			}
7001			__kgem_bo_init(&bo->base, handle, alloc);
7002			debug_alloc__bo(kgem, &bo->base);
7003			DBG(("%s: created LLC handle=%d for buffer\n",
7004			     __FUNCTION__, bo->base.handle));
7005		}
7006
7007		assert(bo->mmapped);
7008		assert(!bo->need_io);
7009
7010		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7011		if (bo->mem) {
7012			if (flags & KGEM_BUFFER_WRITE)
7013				kgem_bo_sync__cpu(kgem, &bo->base);
7014			flags &= ~KGEM_BUFFER_INPLACE;
7015			goto init;
7016		} else {
7017			bo->base.refcnt = 0; /* for valgrind */
7018			kgem_bo_free(kgem, &bo->base);
7019		}
7020	}
7021skip_llc:
7022
7023	if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) {
7024		/* The issue with using a GTT upload buffer is that we may
7025		 * cause eviction-stalls in order to free up some GTT space.
7026		 * An is-mappable? ioctl could help us detect when we are
7027		 * about to block, or some per-page magic in the kernel.
7028		 *
7029		 * XXX This is especially noticeable on memory constrained
7030		 * devices like gen2 or with relatively slow gpu like i3.
7031		 */
7032		DBG(("%s: searching for an inactive GTT map for upload\n",
7033		     __FUNCTION__));
7034		old = search_linear_cache(kgem, alloc,
7035					  CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
7036#if HAVE_I915_GEM_BUFFER_INFO
7037		if (old) {
7038			struct drm_i915_gem_buffer_info info;
7039
7040			/* An example of such a non-blocking ioctl might work */
7041
7042			VG_CLEAR(info);
7043			info.handle = handle;
7044			if (do_ioctl(kgem->fd,
7045				     DRM_IOCTL_I915_GEM_BUFFER_INFO,
7046				     &fino) == 0) {
7047				old->presumed_offset = info.addr;
7048				if ((info.flags & I915_GEM_MAPPABLE) == 0) {
7049					kgem_bo_move_to_inactive(kgem, old);
7050					old = NULL;
7051				}
7052			}
7053		}
7054#endif
7055		if (old == NULL)
7056			old = search_linear_cache(kgem, NUM_PAGES(size),
7057						  CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
7058		if (old == NULL) {
7059			old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
7060			if (old && !kgem_bo_can_map(kgem, old)) {
7061				_kgem_bo_destroy(kgem, old);
7062				old = NULL;
7063			}
7064		}
7065		if (old) {
7066			DBG(("%s: reusing handle=%d for buffer\n",
7067			     __FUNCTION__, old->handle));
7068			assert(kgem_bo_can_map(kgem, old));
7069			assert(!old->snoop);
7070			assert(old->rq == NULL);
7071
7072			bo = buffer_alloc();
7073			if (bo == NULL)
7074				return NULL;
7075
7076			init_buffer_from_bo(bo, old);
7077			assert(num_pages(&bo->base) >= NUM_PAGES(size));
7078
7079			assert(bo->mmapped);
7080			assert(bo->base.refcnt == 1);
7081
7082			bo->mem = kgem_bo_map(kgem, &bo->base);
7083			if (bo->mem) {
7084				if (bo->mem == MAP(bo->base.map__cpu))
7085					flags &= ~KGEM_BUFFER_INPLACE;
7086				else
7087					bo->mmapped = MMAPPED_GTT;
7088				goto init;
7089			} else {
7090				bo->base.refcnt = 0;
7091				kgem_bo_free(kgem, &bo->base);
7092			}
7093		}
7094	}
7095#else
7096	flags &= ~KGEM_BUFFER_INPLACE;
7097#endif
7098	/* Be more parsimonious with pwrite/pread/cacheable buffers */
7099	if ((flags & KGEM_BUFFER_INPLACE) == 0)
7100		alloc = NUM_PAGES(size);
7101
7102	if (use_snoopable_buffer(kgem, flags)) {
7103		bo = search_snoopable_buffer(kgem, alloc);
7104		if (bo) {
7105			if (flags & KGEM_BUFFER_WRITE)
7106				kgem_bo_sync__cpu(kgem, &bo->base);
7107			flags &= ~KGEM_BUFFER_INPLACE;
7108			goto init;
7109		}
7110
7111		if ((flags & KGEM_BUFFER_INPLACE) == 0) {
7112			bo = create_snoopable_buffer(kgem, alloc);
7113			if (bo)
7114				goto init;
7115		}
7116	}
7117
7118	flags &= ~KGEM_BUFFER_INPLACE;
7119
7120	old = NULL;
7121	if ((flags & KGEM_BUFFER_WRITE) == 0)
7122		old = search_linear_cache(kgem, alloc, 0);
7123	if (old == NULL)
7124		old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
7125	if (old) {
7126		DBG(("%s: reusing ordinary handle %d for io\n",
7127		     __FUNCTION__, old->handle));
7128		bo = buffer_alloc_with_data(num_pages(old));
7129		if (bo == NULL)
7130			return NULL;
7131
7132		init_buffer_from_bo(bo, old);
7133		bo->need_io = flags & KGEM_BUFFER_WRITE;
7134	} else {
7135		unsigned hint;
7136
7137		if (use_snoopable_buffer(kgem, flags)) {
7138			bo = create_snoopable_buffer(kgem, alloc);
7139			if (bo)
7140				goto init;
7141		}
7142
7143		bo = buffer_alloc();
7144		if (bo == NULL)
7145			return NULL;
7146
7147		hint = CREATE_INACTIVE;
7148		if (flags & KGEM_BUFFER_WRITE)
7149			hint |= CREATE_CPU_MAP;
7150		old = search_linear_cache(kgem, alloc, hint);
7151		if (old) {
7152			DBG(("%s: reusing handle=%d for buffer\n",
7153			     __FUNCTION__, old->handle));
7154
7155			init_buffer_from_bo(bo, old);
7156		} else {
7157			uint32_t handle = gem_create(kgem->fd, alloc);
7158			if (handle == 0) {
7159				free(bo);
7160				return NULL;
7161			}
7162
7163			DBG(("%s: created handle=%d for buffer\n",
7164			     __FUNCTION__, handle));
7165
7166			__kgem_bo_init(&bo->base, handle, alloc);
7167			debug_alloc__bo(kgem, &bo->base);
7168		}
7169
7170		assert(bo->mmapped);
7171		assert(!bo->need_io);
7172		assert(bo->base.refcnt == 1);
7173
7174		if (flags & KGEM_BUFFER_WRITE) {
7175			bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7176			if (bo->mem != NULL) {
7177				kgem_bo_sync__cpu(kgem, &bo->base);
7178				goto init;
7179			}
7180		}
7181
7182		DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__));
7183		old = &bo->base;
7184		bo = buffer_alloc_with_data(num_pages(old));
7185		if (bo == NULL) {
7186			old->refcnt= 0;
7187			kgem_bo_free(kgem, old);
7188			return NULL;
7189		}
7190
7191		init_buffer_from_bo(bo, old);
7192
7193		assert(bo->mem);
7194		assert(!bo->mmapped);
7195		assert(bo->base.refcnt == 1);
7196
7197		bo->need_io = flags & KGEM_BUFFER_WRITE;
7198	}
7199init:
7200	bo->base.io = true;
7201	assert(bo->base.refcnt == 1);
7202	assert(num_pages(&bo->base) >= NUM_PAGES(size));
7203	assert(!bo->need_io || !bo->base.needs_flush);
7204	assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
7205	assert(bo->mem);
7206	assert(bo->mmapped != MMAPPED_GTT || bo->base.map__gtt == bo->mem || bo->base.map__wc == bo->mem);
7207	assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem);
7208
7209	bo->used = size;
7210	bo->write = flags & KGEM_BUFFER_WRITE_INPLACE;
7211	offset = 0;
7212
7213	assert(list_is_empty(&bo->base.list));
7214	list_add(&bo->base.list, &kgem->batch_buffers);
7215
7216	DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n",
7217	     __FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write));
7218
7219done:
7220	bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT);
7221	assert(bo->used && bo->used <= bytes(&bo->base));
7222	assert(bo->mem);
7223	*ret = (char *)bo->mem + offset;
7224	return kgem_create_proxy(kgem, &bo->base, offset, size);
7225}
7226
7227bool kgem_buffer_is_inplace(struct kgem_bo *_bo)
7228{
7229	struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy;
7230	return bo->write & KGEM_BUFFER_WRITE_INPLACE;
7231}
7232
7233struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
7234				      int width, int height, int bpp,
7235				      uint32_t flags,
7236				      void **ret)
7237{
7238	struct kgem_bo *bo;
7239	int stride;
7240
7241	assert(width > 0 && height > 0);
7242	assert(ret != NULL);
7243	stride = ALIGN(width, 2) * bpp >> 3;
7244	stride = ALIGN(stride, kgem->gen >= 0100 ? 32 : 4);
7245
7246	DBG(("%s: %dx%d, %d bpp, stride=%d\n",
7247	     __FUNCTION__, width, height, bpp, stride));
7248
7249	bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret);
7250	if (bo == NULL) {
7251		DBG(("%s: allocation failure for upload buffer\n",
7252		     __FUNCTION__));
7253		return NULL;
7254	}
7255	assert(*ret != NULL);
7256	assert(bo->proxy != NULL);
7257
7258	if (height & 1) {
7259		struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
7260		int min;
7261
7262		assert(io->used);
7263
7264		/* Having padded this surface to ensure that accesses to
7265		 * the last pair of rows is valid, remove the padding so
7266		 * that it can be allocated to other pixmaps.
7267		 */
7268		min = bo->delta + height * stride;
7269		min = ALIGN(min, UPLOAD_ALIGNMENT);
7270		if (io->used != min) {
7271			DBG(("%s: trimming buffer from %d to %d\n",
7272			     __FUNCTION__, io->used, min));
7273			io->used = min;
7274		}
7275		bo->size.bytes -= stride;
7276	}
7277
7278	bo->map__cpu = *ret;
7279	bo->pitch = stride;
7280	bo->unique_id = kgem_get_unique_id(kgem);
7281	return bo;
7282}
7283
7284struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
7285					 const void *data,
7286					 const BoxRec *box,
7287					 int stride, int bpp)
7288{
7289	int width  = box->x2 - box->x1;
7290	int height = box->y2 - box->y1;
7291	struct kgem_bo *bo;
7292	void *dst;
7293
7294	if (!kgem_can_create_2d(kgem, width, height, bpp))
7295		return NULL;
7296
7297	DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n",
7298	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp));
7299
7300	assert(data);
7301	assert(width > 0);
7302	assert(height > 0);
7303	assert(stride);
7304	assert(bpp);
7305
7306	bo = kgem_create_buffer_2d(kgem,
7307				   width, height, bpp,
7308				   KGEM_BUFFER_WRITE_INPLACE, &dst);
7309	if (bo == NULL)
7310		return NULL;
7311
7312	if (sigtrap_get()) {
7313		kgem_bo_destroy(kgem, bo);
7314		return NULL;
7315	}
7316
7317	memcpy_blt(data, dst, bpp,
7318		   stride, bo->pitch,
7319		   box->x1, box->y1,
7320		   0, 0,
7321		   width, height);
7322
7323	sigtrap_put();
7324	return bo;
7325}
7326
7327void kgem_proxy_bo_attach(struct kgem_bo *bo,
7328			  struct kgem_bo **ptr)
7329{
7330	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
7331	assert(bo->map__gtt == NULL);
7332	assert(bo->proxy);
7333	list_add(&bo->vma, &bo->proxy->vma);
7334	bo->map__gtt = ptr;
7335	*ptr = kgem_bo_reference(bo);
7336}
7337
7338void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
7339{
7340	struct kgem_buffer *bo;
7341	uint32_t offset = _bo->delta, length = _bo->size.bytes;
7342
7343	/* We expect the caller to have already submitted the batch */
7344	assert(_bo->io);
7345	assert(_bo->exec == NULL);
7346	assert(_bo->rq == NULL);
7347	assert(_bo->proxy);
7348
7349	_bo = _bo->proxy;
7350	assert(_bo->proxy == NULL);
7351	assert(_bo->exec == NULL);
7352
7353	bo = (struct kgem_buffer *)_bo;
7354
7355	DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__,
7356	     offset, length, bo->base.snoop));
7357
7358	if (bo->mmapped) {
7359		struct drm_i915_gem_set_domain set_domain;
7360
7361		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n",
7362		     __FUNCTION__,
7363		     bo->base.needs_flush,
7364		     bo->base.domain,
7365		     __kgem_busy(kgem, bo->base.handle)));
7366
7367		assert(bo->mmapped == MMAPPED_GTT || bo->base.snoop || kgem->has_llc);
7368
7369		VG_CLEAR(set_domain);
7370		set_domain.handle = bo->base.handle;
7371		set_domain.write_domain = 0;
7372		set_domain.read_domains =
7373			bo->mmapped == MMAPPED_CPU ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
7374
7375		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
7376			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
7377			kgem_throttle(kgem);
7378		}
7379	} else {
7380		if (gem_read(kgem->fd,
7381			     bo->base.handle, (char *)bo->mem+offset,
7382			     offset, length))
7383			return;
7384	}
7385	kgem_bo_maybe_retire(kgem, &bo->base);
7386	bo->base.domain = DOMAIN_NONE;
7387}
7388
7389uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
7390{
7391	struct kgem_bo_binding *b;
7392
7393	assert(bo->refcnt);
7394
7395	for (b = &bo->binding; b && b->offset; b = b->next)
7396		if (format == b->format)
7397			return b->offset;
7398
7399	return 0;
7400}
7401
7402void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
7403{
7404	struct kgem_bo_binding *b;
7405
7406	assert(bo->refcnt);
7407
7408	for (b = &bo->binding; b; b = b->next) {
7409		if (b->offset)
7410			continue;
7411
7412		b->offset = offset;
7413		b->format = format;
7414
7415		if (b->next)
7416			b->next->offset = 0;
7417
7418		return;
7419	}
7420
7421	b = malloc(sizeof(*b));
7422	if (b) {
7423		b->next = bo->binding.next;
7424		b->format = format;
7425		b->offset = offset;
7426		bo->binding.next = b;
7427	}
7428}
7429
7430struct kgem_bo *
7431kgem_replace_bo(struct kgem *kgem,
7432		struct kgem_bo *src,
7433		uint32_t width,
7434		uint32_t height,
7435		uint32_t pitch,
7436		uint32_t bpp)
7437{
7438	struct kgem_bo *dst;
7439	uint32_t br00, br13;
7440	uint32_t handle;
7441	uint32_t size;
7442	uint32_t *b;
7443
7444	DBG(("%s: replacing bo handle=%d, size=%dx%d pitch=%d, with pitch=%d\n",
7445	     __FUNCTION__, src->handle,  width, height, src->pitch, pitch));
7446
7447	/* We only expect to be called to fixup small buffers, hence why
7448	 * we only attempt to allocate a linear bo.
7449	 */
7450	assert(src->tiling == I915_TILING_NONE);
7451	assert(kgem_bo_can_blt(kgem, src));
7452
7453	size = height * pitch;
7454	size = NUM_PAGES(size);
7455
7456	dst = search_linear_cache(kgem, size, 0);
7457	if (dst == NULL)
7458		dst = search_linear_cache(kgem, size, CREATE_INACTIVE);
7459	if (dst == NULL) {
7460		handle = gem_create(kgem->fd, size);
7461		if (handle == 0)
7462			return NULL;
7463
7464		dst = __kgem_bo_alloc(handle, size);
7465		if (dst == NULL) {
7466			gem_close(kgem->fd, handle);
7467			return NULL;
7468		}
7469
7470		debug_alloc__bo(kgem, dst);
7471	}
7472	dst->pitch = pitch;
7473	dst->unique_id = kgem_get_unique_id(kgem);
7474	dst->refcnt = 1;
7475	assert(dst->tiling == I915_TILING_NONE);
7476	assert(kgem_bo_can_blt(kgem, dst));
7477
7478	kgem_set_mode(kgem, KGEM_BLT, dst);
7479	if (!kgem_check_batch(kgem, 10) ||
7480	    !kgem_check_reloc(kgem, 2) ||
7481	    !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
7482		kgem_submit(kgem);
7483		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
7484			kgem_bo_destroy(kgem, dst);
7485			return NULL;
7486		}
7487		_kgem_set_mode(kgem, KGEM_BLT);
7488	}
7489
7490	br00 = XY_SRC_COPY_BLT_CMD;
7491	br13 = pitch;
7492	pitch = src->pitch;
7493	if (kgem->gen >= 040 && src->tiling) {
7494		br00 |= BLT_SRC_TILED;
7495		pitch >>= 2;
7496	}
7497
7498	br13 |= 0xcc << 16;
7499	switch (bpp) {
7500	default:
7501	case 32: br00 |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
7502		 br13 |= 1 << 25; /* RGB8888 */
7503	case 16: br13 |= 1 << 24; /* RGB565 */
7504	case 8: break;
7505	}
7506
7507	b = kgem->batch + kgem->nbatch;
7508	if (kgem->gen >= 0100) {
7509		b[0] = br00 | 8;
7510		b[1] = br13;
7511		b[2] = 0;
7512		b[3] = height << 16 | width;
7513		*(uint64_t *)(b+4) =
7514			kgem_add_reloc64(kgem, kgem->nbatch + 4, dst,
7515					 I915_GEM_DOMAIN_RENDER << 16 |
7516					 I915_GEM_DOMAIN_RENDER |
7517					 KGEM_RELOC_FENCED,
7518					 0);
7519		b[6] = 0;
7520		b[7] = pitch;
7521		*(uint64_t *)(b+8) =
7522			kgem_add_reloc64(kgem, kgem->nbatch + 8, src,
7523					 I915_GEM_DOMAIN_RENDER << 16 |
7524					 KGEM_RELOC_FENCED,
7525					 0);
7526		kgem->nbatch += 10;
7527	} else {
7528		b[0] = br00 | 6;
7529		b[1] = br13;
7530		b[2] = 0;
7531		b[3] = height << 16 | width;
7532		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst,
7533				      I915_GEM_DOMAIN_RENDER << 16 |
7534				      I915_GEM_DOMAIN_RENDER |
7535				      KGEM_RELOC_FENCED,
7536				      0);
7537		b[5] = 0;
7538		b[6] = pitch;
7539		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src,
7540				      I915_GEM_DOMAIN_RENDER << 16 |
7541				      KGEM_RELOC_FENCED,
7542				      0);
7543		kgem->nbatch += 8;
7544	}
7545
7546	return dst;
7547}
7548
7549bool kgem_bo_convert_to_gpu(struct kgem *kgem,
7550			    struct kgem_bo *bo,
7551			    unsigned flags)
7552{
7553	DBG(("%s: converting handle=%d from CPU to GPU, flags=%x, busy?=%d\n",
7554	     __FUNCTION__, bo->handle, flags, __kgem_bo_is_busy(kgem, bo)));
7555	assert(bo->tiling == I915_TILING_NONE);
7556
7557	if (kgem->has_llc)
7558		return true;
7559
7560	if (flags & MOVE_ASYNC_HINT && __kgem_bo_is_busy(kgem, bo))
7561		return false;
7562
7563	assert(bo->snoop);
7564
7565	kgem_bo_submit(kgem, bo);
7566
7567	if (!gem_set_caching(kgem->fd, bo->handle, UNCACHED))
7568		return false;
7569
7570	bo->snoop = false;
7571	return true;
7572}
7573