kgem.c revision 9a906b70
1/*
2 * Copyright (c) 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "sna.h"
33#include "sna_reg.h"
34
35#include <unistd.h>
36#include <sys/ioctl.h>
37#include <sys/mman.h>
38#include <sys/stat.h>
39#include <time.h>
40#include <sched.h>
41#include <errno.h>
42#include <fcntl.h>
43
44#include <xf86drm.h>
45
46#ifdef HAVE_VALGRIND
47#include <valgrind.h>
48#include <memcheck.h>
49#endif
50
51#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
52#include <sys/sysinfo.h>
53#endif
54
55#include "sna_cpuid.h"
56
57static struct kgem_bo *
58search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
59
60static struct kgem_bo *
61search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
62
63#define DBG_NO_HW 0
64#define DBG_NO_EXEC 0
65#define DBG_NO_TILING 0
66#define DBG_NO_CACHE 0
67#define DBG_NO_SNOOP_CACHE 0
68#define DBG_NO_CACHE_LEVEL 0
69#define DBG_NO_CPU 0
70#define DBG_NO_CREATE2 0
71#define DBG_NO_USERPTR 0
72#define DBG_NO_UNSYNCHRONIZED_USERPTR 0
73#define DBG_NO_LLC 0
74#define DBG_NO_SEMAPHORES 0
75#define DBG_NO_MADV 0
76#define DBG_NO_UPLOAD_CACHE 0
77#define DBG_NO_UPLOAD_ACTIVE 0
78#define DBG_NO_MAP_UPLOAD 0
79#define DBG_NO_RELAXED_FENCING 0
80#define DBG_NO_SECURE_BATCHES 0
81#define DBG_NO_PINNED_BATCHES 0
82#define DBG_NO_FAST_RELOC 0
83#define DBG_NO_HANDLE_LUT 0
84#define DBG_NO_WT 0
85#define DBG_DUMP 0
86#define DBG_NO_MALLOC_CACHE 0
87
88#define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */
89
90#ifndef DEBUG_SYNC
91#define DEBUG_SYNC 0
92#endif
93
94#define SHOW_BATCH_BEFORE 0
95#define SHOW_BATCH_AFTER 0
96
97#if 0
98#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
99#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
100#else
101#define ASSERT_IDLE(kgem__, handle__)
102#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
103#endif
104
105/* Worst case seems to be 965gm where we cannot write within a cacheline that
106 * is being simultaneously being read by the GPU, or within the sampler
107 * prefetch. In general, the chipsets seem to have a requirement that sampler
108 * offsets be aligned to a cacheline (64 bytes).
109 *
110 * Actually, it turns out the BLT color pattern (BR15) has the most severe
111 * alignment restrictions, 64 bytes for 8-bpp, 128 bytes for 16-bpp and 256
112 * bytes for 32-bpp.
113 */
114#define UPLOAD_ALIGNMENT 256
115
116#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
117#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE)
118
119#define MAX_GTT_VMA_CACHE 512
120#define MAX_CPU_VMA_CACHE INT16_MAX
121#define MAP_PRESERVE_TIME 10
122
123#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
124#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 1)
125
126#define LOCAL_I915_PARAM_HAS_BLT		11
127#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING	12
128#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA	15
129#define LOCAL_I915_PARAM_HAS_SEMAPHORES		20
130#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES	23
131#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES	24
132#define LOCAL_I915_PARAM_HAS_NO_RELOC		25
133#define LOCAL_I915_PARAM_HAS_HANDLE_LUT		26
134#define LOCAL_I915_PARAM_HAS_WT			27
135
136#define LOCAL_I915_EXEC_IS_PINNED		(1<<10)
137#define LOCAL_I915_EXEC_NO_RELOC		(1<<11)
138#define LOCAL_I915_EXEC_HANDLE_LUT		(1<<12)
139
140#define LOCAL_I915_GEM_CREATE2       0x34
141#define LOCAL_IOCTL_I915_GEM_CREATE2 DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CREATE2, struct local_i915_gem_create2)
142struct local_i915_gem_create2 {
143	uint64_t size;
144	uint32_t placement;
145#define LOCAL_I915_CREATE_PLACEMENT_SYSTEM 0
146#define LOCAL_I915_CREATE_PLACEMENT_STOLEN 1 /* Cannot use CPU mmaps or pread/pwrite */
147	uint32_t domain;
148	uint32_t caching;
149	uint32_t tiling_mode;
150	uint32_t stride;
151	uint32_t flags;
152	uint32_t pad;
153	uint32_t handle;
154};
155
156#define LOCAL_I915_GEM_USERPTR       0x33
157#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
158struct local_i915_gem_userptr {
159	uint64_t user_ptr;
160	uint64_t user_size;
161	uint32_t flags;
162#define I915_USERPTR_READ_ONLY		0x1
163#define I915_USERPTR_UNSYNCHRONIZED	0x80000000
164	uint32_t handle;
165};
166
167#define UNCACHED	0
168#define SNOOPED		1
169#define DISPLAY		2
170
171struct local_i915_gem_caching {
172	uint32_t handle;
173	uint32_t caching;
174};
175
176#define LOCAL_I915_GEM_SET_CACHING	0x2f
177#define LOCAL_I915_GEM_GET_CACHING	0x30
178#define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching)
179#define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching)
180
181struct kgem_buffer {
182	struct kgem_bo base;
183	void *mem;
184	uint32_t used;
185	uint32_t need_io : 1;
186	uint32_t write : 2;
187	uint32_t mmapped : 2;
188};
189enum {
190	MMAPPED_NONE,
191	MMAPPED_GTT,
192	MMAPPED_CPU
193};
194
195static struct kgem_bo *__kgem_freed_bo;
196static struct kgem_request *__kgem_freed_request;
197static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
198
199static inline int bytes(struct kgem_bo *bo)
200{
201	return __kgem_bo_size(bo);
202}
203
204#define bucket(B) (B)->size.pages.bucket
205#define num_pages(B) (B)->size.pages.count
206
207static int do_ioctl(int fd, unsigned long req, void *arg)
208{
209	int err;
210
211restart:
212	if (ioctl(fd, req, arg) == 0)
213		return 0;
214
215	err = errno;
216
217	if (err == EINTR)
218		goto restart;
219
220	if (err == EAGAIN) {
221		sched_yield();
222		goto restart;
223	}
224
225	return -err;
226}
227
228#ifdef DEBUG_MEMORY
229static void debug_alloc(struct kgem *kgem, size_t size)
230{
231	kgem->debug_memory.bo_allocs++;
232	kgem->debug_memory.bo_bytes += size;
233}
234static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo)
235{
236	debug_alloc(kgem, bytes(bo));
237}
238#else
239#define debug_alloc__bo(k, b)
240#endif
241
242#ifndef NDEBUG
243static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo)
244{
245	struct drm_i915_gem_get_tiling tiling;
246
247	assert(bo);
248
249	VG_CLEAR(tiling);
250	tiling.handle = bo->handle;
251	tiling.tiling_mode = bo->tiling;
252	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling);
253	assert(tiling.tiling_mode == bo->tiling);
254}
255
256static void assert_cacheing(struct kgem *kgem, struct kgem_bo *bo)
257{
258	struct local_i915_gem_caching arg;
259	int expect = kgem->has_llc ? SNOOPED : UNCACHED;
260
261	VG_CLEAR(arg);
262	arg.handle = bo->handle;
263	arg.caching = expect;
264
265	(void)do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &arg);
266
267	assert(arg.caching == expect);
268}
269
270static void assert_bo_retired(struct kgem_bo *bo)
271{
272	DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
273	     bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
274	assert(bo->refcnt);
275	assert(bo->rq == NULL);
276	assert(bo->exec == NULL);
277	assert(list_is_empty(&bo->request));
278}
279#else
280#define assert_tiling(kgem, bo)
281#define assert_cacheing(kgem, bo)
282#define assert_bo_retired(bo)
283#endif
284
285static void kgem_sna_reset(struct kgem *kgem)
286{
287	struct sna *sna = container_of(kgem, struct sna, kgem);
288
289	sna->render.reset(sna);
290	sna->blt_state.fill_bo = 0;
291}
292
293static void kgem_sna_flush(struct kgem *kgem)
294{
295	struct sna *sna = container_of(kgem, struct sna, kgem);
296
297	sna->render.flush(sna);
298
299	if (sna->render.solid_cache.dirty)
300		sna_render_flush_solid(sna);
301}
302
303static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
304{
305	struct drm_i915_gem_set_tiling set_tiling;
306	int err;
307
308	if (DBG_NO_TILING)
309		return false;
310
311	VG_CLEAR(set_tiling);
312restart:
313	set_tiling.handle = handle;
314	set_tiling.tiling_mode = tiling;
315	set_tiling.stride = stride;
316
317	if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0)
318		return true;
319
320	err = errno;
321	if (err == EINTR)
322		goto restart;
323
324	if (err == EAGAIN) {
325		sched_yield();
326		goto restart;
327	}
328
329	return false;
330}
331
332static bool gem_set_caching(int fd, uint32_t handle, int caching)
333{
334	struct local_i915_gem_caching arg;
335
336	VG_CLEAR(arg);
337	arg.handle = handle;
338	arg.caching = caching;
339	return do_ioctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0;
340}
341
342static uint32_t gem_userptr(int fd, void *ptr, int size, int read_only)
343{
344	struct local_i915_gem_userptr arg;
345
346	VG_CLEAR(arg);
347	arg.user_ptr = (uintptr_t)ptr;
348	arg.user_size = size;
349	arg.flags = I915_USERPTR_UNSYNCHRONIZED;
350	if (read_only)
351		arg.flags |= I915_USERPTR_READ_ONLY;
352
353	if (DBG_NO_UNSYNCHRONIZED_USERPTR ||
354	    do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
355		arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED;
356		if (do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
357			DBG(("%s: failed to map %p + %d bytes: %d\n",
358			     __FUNCTION__, ptr, size, errno));
359			return 0;
360		}
361	}
362
363	return arg.handle;
364}
365
366static bool __kgem_throttle(struct kgem *kgem, bool harder)
367{
368	/* Let this be woken up by sigtimer so that we don't block here
369	 * too much and completely starve X. We will sleep again shortly,
370	 * and so catch up or detect the hang.
371	 */
372	do {
373		if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE) == 0) {
374			kgem->need_throttle = 0;
375			return false;
376		}
377
378		if (errno == EIO)
379			return true;
380	} while (harder);
381
382	return false;
383}
384
385static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
386{
387	if (flags & CREATE_NO_RETIRE || !kgem->need_retire) {
388		DBG(("%s: not retiring\n", __FUNCTION__));
389		return false;
390	}
391
392	if (kgem_retire(kgem))
393		return true;
394
395	if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) {
396		DBG(("%s: not throttling\n", __FUNCTION__));
397		return false;
398	}
399
400	__kgem_throttle(kgem, false);
401	return kgem_retire(kgem);
402}
403
404static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
405{
406	struct drm_i915_gem_mmap_gtt mmap_arg;
407	void *ptr;
408	int err;
409
410	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
411	     bo->handle, bytes(bo)));
412	assert(bo->proxy == NULL);
413	assert(!bo->snoop);
414	assert(num_pages(bo) <= kgem->aperture_mappable / 4);
415
416retry_gtt:
417	VG_CLEAR(mmap_arg);
418	mmap_arg.handle = bo->handle;
419#ifdef __NetBSD__
420	if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) {
421		err = errno;
422#else
423	if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg))) {
424#endif
425		assert(err != EINVAL);
426
427		(void)__kgem_throttle_retire(kgem, 0);
428		if (kgem_expire_cache(kgem))
429			goto retry_gtt;
430
431		if (kgem_cleanup_cache(kgem))
432			goto retry_gtt;
433
434		ERR(("%s: failed to retrieve GTT offset for handle=%d: %d\n",
435		     __FUNCTION__, bo->handle, -err));
436		return NULL;
437	}
438
439retry_mmap:
440#ifdef __NetBSD__
441	err = -drmMap(kgem->fd, mmap_arg.offset, bytes(bo), &ptr);
442	if (err) {
443#else
444	ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED,
445		   kgem->fd, mmap_arg.offset);
446	if (ptr == MAP_FAILED) {
447		err = errno;
448#endif
449		assert(err != EINVAL);
450
451		if (__kgem_throttle_retire(kgem, 0))
452			goto retry_mmap;
453
454		if (kgem_cleanup_cache(kgem))
455			goto retry_mmap;
456
457		ERR(("%s: failed to mmap handle=%d, %d bytes, into GTT domain: %d\n",
458		     __FUNCTION__, bo->handle, bytes(bo), err));
459		ptr = NULL;
460	}
461
462	return ptr;
463}
464
465static int gem_write(int fd, uint32_t handle,
466		     int offset, int length,
467		     const void *src)
468{
469	struct drm_i915_gem_pwrite pwrite;
470
471	DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
472	     handle, offset, length));
473
474	VG_CLEAR(pwrite);
475	pwrite.handle = handle;
476	pwrite.offset = offset;
477	pwrite.size = length;
478	pwrite.data_ptr = (uintptr_t)src;
479	return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
480}
481
482static int gem_write__cachealigned(int fd, uint32_t handle,
483				   int offset, int length,
484				   const void *src)
485{
486	struct drm_i915_gem_pwrite pwrite;
487
488	DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
489	     handle, offset, length));
490
491	VG_CLEAR(pwrite);
492	pwrite.handle = handle;
493	/* align the transfer to cachelines; fortuitously this is safe! */
494	if ((offset | length) & 63) {
495		pwrite.offset = offset & ~63;
496		pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
497		pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
498	} else {
499		pwrite.offset = offset;
500		pwrite.size = length;
501		pwrite.data_ptr = (uintptr_t)src;
502	}
503	return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
504}
505
506static int gem_read(int fd, uint32_t handle, const void *dst,
507		    int offset, int length)
508{
509	struct drm_i915_gem_pread pread;
510	int ret;
511
512	DBG(("%s(handle=%d, len=%d)\n", __FUNCTION__,
513	     handle, length));
514
515	VG_CLEAR(pread);
516	pread.handle = handle;
517	pread.offset = offset;
518	pread.size = length;
519	pread.data_ptr = (uintptr_t)dst;
520	ret = do_ioctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
521	if (ret) {
522		DBG(("%s: failed, errno=%d\n", __FUNCTION__, -ret));
523		return ret;
524	}
525
526	VG(VALGRIND_MAKE_MEM_DEFINED(dst, length));
527	return 0;
528}
529
530bool __kgem_busy(struct kgem *kgem, int handle)
531{
532	struct drm_i915_gem_busy busy;
533
534	VG_CLEAR(busy);
535	busy.handle = handle;
536	busy.busy = !kgem->wedged;
537	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
538	DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
539	     __FUNCTION__, handle, busy.busy, kgem->wedged));
540
541	return busy.busy;
542}
543
544static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
545{
546	DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
547	     __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
548	     __kgem_busy(kgem, bo->handle)));
549	assert(bo->exec == NULL);
550	assert(list_is_empty(&bo->vma));
551
552	if (bo->rq) {
553		__kgem_bo_clear_busy(bo);
554		kgem_retire(kgem);
555		assert_bo_retired(bo);
556	} else {
557		assert(bo->exec == NULL);
558		assert(list_is_empty(&bo->request));
559		assert(!bo->needs_flush);
560		ASSERT_IDLE(kgem, bo->handle);
561	}
562}
563
564static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo)
565{
566	DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
567	     __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
568	     __kgem_busy(kgem, bo->handle)));
569	assert(bo->exec == NULL);
570	assert(list_is_empty(&bo->vma));
571
572	if (bo->rq) {
573		if (!__kgem_busy(kgem, bo->handle)) {
574			__kgem_bo_clear_busy(bo);
575			kgem_retire(kgem);
576		}
577	} else {
578		assert(!bo->needs_flush);
579		ASSERT_IDLE(kgem, bo->handle);
580	}
581}
582
583bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
584		   const void *data, int length)
585{
586	int err;
587
588	assert(bo->refcnt);
589	assert(bo->proxy == NULL);
590	ASSERT_IDLE(kgem, bo->handle);
591
592	assert(length <= bytes(bo));
593retry:
594	if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) {
595		assert(err != EINVAL);
596
597		(void)__kgem_throttle_retire(kgem, 0);
598		if (kgem_expire_cache(kgem))
599			goto retry;
600
601		if (kgem_cleanup_cache(kgem))
602			goto retry;
603
604		ERR(("%s: failed to write %d bytes into BO handle=%d: %d\n",
605		     __FUNCTION__, length, bo->handle, -err));
606		return false;
607	}
608
609	DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
610	if (bo->exec == NULL)
611		kgem_bo_maybe_retire(kgem, bo);
612	bo->domain = DOMAIN_NONE;
613	bo->gtt_dirty = true;
614	return true;
615}
616
617static uint32_t gem_create(int fd, int num_pages)
618{
619	struct drm_i915_gem_create create;
620
621	VG_CLEAR(create);
622	create.handle = 0;
623	create.size = PAGE_SIZE * num_pages;
624	(void)do_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
625
626	return create.handle;
627}
628
629static bool
630kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
631{
632#if DBG_NO_MADV
633	return true;
634#else
635	struct drm_i915_gem_madvise madv;
636
637	assert(bo->exec == NULL);
638	assert(!bo->purged);
639
640	VG_CLEAR(madv);
641	madv.handle = bo->handle;
642	madv.madv = I915_MADV_DONTNEED;
643	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
644		bo->purged = 1;
645		kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
646		return madv.retained;
647	}
648
649	return true;
650#endif
651}
652
653static bool
654kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
655{
656#if DBG_NO_MADV
657	return true;
658#else
659	struct drm_i915_gem_madvise madv;
660
661	if (!bo->purged)
662		return true;
663
664	VG_CLEAR(madv);
665	madv.handle = bo->handle;
666	madv.madv = I915_MADV_DONTNEED;
667	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
668		return madv.retained;
669
670	return false;
671#endif
672}
673
674static bool
675kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
676{
677#if DBG_NO_MADV
678	return true;
679#else
680	struct drm_i915_gem_madvise madv;
681
682	assert(bo->purged);
683
684	VG_CLEAR(madv);
685	madv.handle = bo->handle;
686	madv.madv = I915_MADV_WILLNEED;
687	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
688		bo->purged = !madv.retained;
689		kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
690		return madv.retained;
691	}
692
693	return false;
694#endif
695}
696
697static void gem_close(int fd, uint32_t handle)
698{
699	struct drm_gem_close close;
700
701	VG_CLEAR(close);
702	close.handle = handle;
703	(void)do_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
704}
705
706constant inline static unsigned long __fls(unsigned long word)
707{
708#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
709	asm("bsr %1,%0"
710	    : "=r" (word)
711	    : "rm" (word));
712	return word;
713#else
714	unsigned int v = 0;
715
716	while (word >>= 1)
717		v++;
718
719	return v;
720#endif
721}
722
723constant inline static int cache_bucket(int num_pages)
724{
725	return __fls(num_pages);
726}
727
728static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
729				      int handle, int num_pages)
730{
731	DBG(("%s(handle=%d, num_pages=%d)\n", __FUNCTION__, handle, num_pages));
732
733	assert(num_pages);
734	memset(bo, 0, sizeof(*bo));
735
736	bo->refcnt = 1;
737	bo->handle = handle;
738	bo->target_handle = -1;
739	num_pages(bo) = num_pages;
740	bucket(bo) = cache_bucket(num_pages);
741	bo->reusable = true;
742	bo->domain = DOMAIN_CPU;
743	list_init(&bo->request);
744	list_init(&bo->list);
745	list_init(&bo->vma);
746
747	return bo;
748}
749
750static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
751{
752	struct kgem_bo *bo;
753
754	if (__kgem_freed_bo) {
755		bo = __kgem_freed_bo;
756		__kgem_freed_bo = *(struct kgem_bo **)bo;
757	} else {
758		bo = malloc(sizeof(*bo));
759		if (bo == NULL)
760			return NULL;
761	}
762
763	return __kgem_bo_init(bo, handle, num_pages);
764}
765
766static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
767{
768	struct kgem_request *rq;
769
770	rq = __kgem_freed_request;
771	if (rq) {
772		__kgem_freed_request = *(struct kgem_request **)rq;
773	} else {
774		rq = malloc(sizeof(*rq));
775		if (rq == NULL)
776			rq = &kgem->static_request;
777	}
778
779	list_init(&rq->buffers);
780	rq->bo = NULL;
781	rq->ring = 0;
782
783	return rq;
784}
785
786static void __kgem_request_free(struct kgem_request *rq)
787{
788	_list_del(&rq->list);
789	if (DBG_NO_MALLOC_CACHE) {
790		free(rq);
791	} else {
792		*(struct kgem_request **)rq = __kgem_freed_request;
793		__kgem_freed_request = rq;
794	}
795}
796
797static struct list *inactive(struct kgem *kgem, int num_pages)
798{
799	assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
800	assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
801	return &kgem->inactive[cache_bucket(num_pages)];
802}
803
804static struct list *active(struct kgem *kgem, int num_pages, int tiling)
805{
806	assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
807	assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
808	return &kgem->active[cache_bucket(num_pages)][tiling];
809}
810
811static size_t
812agp_aperture_size(struct pci_device *dev, unsigned gen)
813{
814	/* XXX assume that only future chipsets are unknown and follow
815	 * the post gen2 PCI layout.
816	 */
817	return dev->regions[gen < 030 ? 0 : 2].size;
818}
819
820static size_t
821total_ram_size(void)
822{
823#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
824	struct sysinfo info;
825	if (sysinfo(&info) == 0)
826		return info.totalram * info.mem_unit;
827#endif
828
829#ifdef _SC_PHYS_PAGES
830	 return sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE);
831#endif
832
833	return 0;
834}
835
836static unsigned
837cpu_cache_size__cpuid4(void)
838{
839	/* Deterministic Cache Parameters (Function 04h)":
840	 *    When EAX is initialized to a value of 4, the CPUID instruction
841	 *    returns deterministic cache information in the EAX, EBX, ECX
842	 *    and EDX registers.  This function requires ECX be initialized
843	 *    with an index which indicates which cache to return information
844	 *    about. The OS is expected to call this function (CPUID.4) with
845	 *    ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches.
846	 *    The order in which the caches are returned is not specified
847	 *    and may change at Intel's discretion.
848	 *
849	 * Calculating the Cache Size in bytes:
850	 *          = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1)
851	 */
852
853	 unsigned int eax, ebx, ecx, edx;
854	 unsigned int llc_size = 0;
855	 int cnt;
856
857	 if (__get_cpuid_max(BASIC_CPUID, NULL) < 4)
858		 return 0;
859
860	 cnt = 0;
861	 do {
862		 unsigned associativity, line_partitions, line_size, sets;
863
864		 __cpuid_count(4, cnt++, eax, ebx, ecx, edx);
865
866		 if ((eax & 0x1f) == 0)
867			 break;
868
869		 associativity = ((ebx >> 22) & 0x3ff) + 1;
870		 line_partitions = ((ebx >> 12) & 0x3ff) + 1;
871		 line_size = (ebx & 0xfff) + 1;
872		 sets = ecx + 1;
873
874		 llc_size = associativity * line_partitions * line_size * sets;
875	 } while (1);
876
877	 return llc_size;
878}
879
880static unsigned
881cpu_cache_size(void)
882{
883	unsigned size;
884	FILE *file;
885
886	size = cpu_cache_size__cpuid4();
887	if (size)
888		return size;
889
890	file = fopen("/proc/cpuinfo", "r");
891	if (file) {
892		size_t len = 0;
893		char *line = NULL;
894		while (getline(&line, &len, file) != -1) {
895			int kb;
896			if (sscanf(line, "cache size : %d KB", &kb) == 1) {
897				/* Paranoid check against gargantuan caches */
898				if (kb <= 1<<20)
899					size = kb * 1024;
900				break;
901			}
902		}
903		free(line);
904		fclose(file);
905	}
906
907	if (size == 0)
908		size = 64 * 1024;
909
910	return size;
911}
912
913static int gem_param(struct kgem *kgem, int name)
914{
915	drm_i915_getparam_t gp;
916	int v = -1; /* No param uses the sign bit, reserve it for errors */
917
918	VG_CLEAR(gp);
919	gp.param = name;
920	gp.value = &v;
921	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp))
922		return -1;
923
924	VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
925	return v;
926}
927
928static bool test_has_execbuffer2(struct kgem *kgem)
929{
930	struct drm_i915_gem_execbuffer2 execbuf;
931
932	memset(&execbuf, 0, sizeof(execbuf));
933	execbuf.buffer_count = 1;
934
935	return do_ioctl(kgem->fd,
936			 DRM_IOCTL_I915_GEM_EXECBUFFER2,
937			 &execbuf) == -EFAULT;
938}
939
940static bool test_has_no_reloc(struct kgem *kgem)
941{
942	if (DBG_NO_FAST_RELOC)
943		return false;
944
945	return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
946}
947
948static bool test_has_handle_lut(struct kgem *kgem)
949{
950	if (DBG_NO_HANDLE_LUT)
951		return false;
952
953	return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
954}
955
956static bool test_has_wt(struct kgem *kgem)
957{
958	if (DBG_NO_WT)
959		return false;
960
961	return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0;
962}
963
964static bool test_has_semaphores_enabled(struct kgem *kgem)
965{
966	FILE *file;
967	bool detected = false;
968	int ret;
969
970	if (DBG_NO_SEMAPHORES)
971		return false;
972
973	ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
974	if (ret != -1)
975		return ret > 0;
976
977	file = fopen("/sys/module/i915/parameters/semaphores", "r");
978	if (file) {
979		int value;
980		if (fscanf(file, "%d", &value) == 1)
981			detected = value != 0;
982		fclose(file);
983	}
984
985	return detected;
986}
987
988static bool is_hw_supported(struct kgem *kgem,
989			    struct pci_device *dev)
990{
991	if (DBG_NO_HW)
992		return false;
993
994	if (!test_has_execbuffer2(kgem))
995		return false;
996
997	if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
998		return kgem->has_blt;
999
1000	/* Although pre-855gm the GMCH is fubar, it works mostly. So
1001	 * let the user decide through "NoAccel" whether or not to risk
1002	 * hw acceleration.
1003	 */
1004
1005	if (kgem->gen == 060 && dev && dev->revision < 8) {
1006		/* pre-production SNB with dysfunctional BLT */
1007		return false;
1008	}
1009
1010	if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
1011		return kgem->has_blt;
1012
1013	return true;
1014}
1015
1016static bool test_has_relaxed_fencing(struct kgem *kgem)
1017{
1018	if (kgem->gen < 040) {
1019		if (DBG_NO_RELAXED_FENCING)
1020			return false;
1021
1022		return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
1023	} else
1024		return true;
1025}
1026
1027static bool test_has_llc(struct kgem *kgem)
1028{
1029	int has_llc = -1;
1030
1031	if (DBG_NO_LLC)
1032		return false;
1033
1034#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */
1035	has_llc = gem_param(kgem, I915_PARAM_HAS_LLC);
1036#endif
1037	if (has_llc == -1) {
1038		DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
1039		has_llc = kgem->gen >= 060;
1040	}
1041
1042	return has_llc;
1043}
1044
1045static bool test_has_caching(struct kgem *kgem)
1046{
1047	uint32_t handle;
1048	bool ret;
1049
1050	if (DBG_NO_CACHE_LEVEL)
1051		return false;
1052
1053	/* Incoherent blt and sampler hangs the GPU */
1054	if (kgem->gen == 040)
1055		return false;
1056
1057	handle = gem_create(kgem->fd, 1);
1058	if (handle == 0)
1059		return false;
1060
1061	ret = gem_set_caching(kgem->fd, handle, UNCACHED);
1062	gem_close(kgem->fd, handle);
1063	return ret;
1064}
1065
1066static bool test_has_userptr(struct kgem *kgem)
1067{
1068	uint32_t handle;
1069	void *ptr;
1070
1071	if (DBG_NO_USERPTR)
1072		return false;
1073
1074	/* Incoherent blt and sampler hangs the GPU */
1075	if (kgem->gen == 040)
1076		return false;
1077
1078	if (kgem->gen >= 0100)
1079		return false; /* FIXME https://bugs.freedesktop.org/show_bug.cgi?id=79053 */
1080
1081	if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE))
1082		return false;
1083
1084	handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false);
1085	gem_close(kgem->fd, handle);
1086	free(ptr);
1087
1088	return handle != 0;
1089}
1090
1091static bool test_has_create2(struct kgem *kgem)
1092{
1093#if defined(USE_CREATE2)
1094	struct local_i915_gem_create2 args;
1095
1096	if (DBG_NO_CREATE2)
1097		return false;
1098
1099	memset(&args, 0, sizeof(args));
1100	args.size = PAGE_SIZE;
1101	args.caching = DISPLAY;
1102	if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0)
1103		gem_close(kgem->fd, args.handle);
1104
1105	return args.handle != 0;
1106#else
1107	return false;
1108#endif
1109}
1110
1111static bool test_has_secure_batches(struct kgem *kgem)
1112{
1113	if (DBG_NO_SECURE_BATCHES)
1114		return false;
1115
1116	return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
1117}
1118
1119static bool test_has_pinned_batches(struct kgem *kgem)
1120{
1121	if (DBG_NO_PINNED_BATCHES)
1122		return false;
1123
1124	return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
1125}
1126
1127static int kgem_get_screen_index(struct kgem *kgem)
1128{
1129	struct sna *sna = container_of(kgem, struct sna, kgem);
1130	return sna->scrn->scrnIndex;
1131}
1132
1133static int __find_debugfs(struct kgem *kgem)
1134{
1135	int i;
1136
1137	for (i = 0; i < DRM_MAX_MINOR; i++) {
1138		char path[80];
1139
1140		sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i);
1141		if (access(path, R_OK) == 0)
1142			return i;
1143
1144		sprintf(path, "/debug/dri/%d/i915_wedged", i);
1145		if (access(path, R_OK) == 0)
1146			return i;
1147	}
1148
1149	return -1;
1150}
1151
1152static int kgem_get_minor(struct kgem *kgem)
1153{
1154	struct stat st;
1155
1156	if (fstat(kgem->fd, &st))
1157		return __find_debugfs(kgem);
1158
1159	if (!S_ISCHR(st.st_mode))
1160		return __find_debugfs(kgem);
1161
1162	return st.st_rdev & 0x63;
1163}
1164
1165static bool kgem_init_pinned_batches(struct kgem *kgem)
1166{
1167	int count[2] = { 16, 4 };
1168	int size[2] = { 1, 4 };
1169	int n, i;
1170
1171	if (kgem->wedged)
1172		return true;
1173
1174	for (n = 0; n < ARRAY_SIZE(count); n++) {
1175		for (i = 0; i < count[n]; i++) {
1176			struct drm_i915_gem_pin pin;
1177			struct kgem_bo *bo;
1178
1179			VG_CLEAR(pin);
1180
1181			pin.handle = gem_create(kgem->fd, size[n]);
1182			if (pin.handle == 0)
1183				goto err;
1184
1185			DBG(("%s: new handle=%d, num_pages=%d\n",
1186			     __FUNCTION__, pin.handle, size[n]));
1187
1188			bo = __kgem_bo_alloc(pin.handle, size[n]);
1189			if (bo == NULL) {
1190				gem_close(kgem->fd, pin.handle);
1191				goto err;
1192			}
1193
1194			pin.alignment = 0;
1195			if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) {
1196				gem_close(kgem->fd, pin.handle);
1197				free(bo);
1198				goto err;
1199			}
1200			bo->presumed_offset = pin.offset;
1201			debug_alloc__bo(kgem, bo);
1202			list_add(&bo->list, &kgem->pinned_batches[n]);
1203		}
1204	}
1205
1206	return true;
1207
1208err:
1209	for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
1210		while (!list_is_empty(&kgem->pinned_batches[n])) {
1211			kgem_bo_destroy(kgem,
1212					list_first_entry(&kgem->pinned_batches[n],
1213							 struct kgem_bo, list));
1214		}
1215	}
1216
1217	/* For simplicity populate the lists with a single unpinned bo */
1218	for (n = 0; n < ARRAY_SIZE(count); n++) {
1219		struct kgem_bo *bo;
1220		uint32_t handle;
1221
1222		handle = gem_create(kgem->fd, size[n]);
1223		if (handle == 0)
1224			break;
1225
1226		bo = __kgem_bo_alloc(handle, size[n]);
1227		if (bo == NULL) {
1228			gem_close(kgem->fd, handle);
1229			break;
1230		}
1231
1232		debug_alloc__bo(kgem, bo);
1233		list_add(&bo->list, &kgem->pinned_batches[n]);
1234	}
1235	return false;
1236}
1237
1238static void kgem_init_swizzling(struct kgem *kgem)
1239{
1240	struct drm_i915_gem_get_tiling tiling;
1241
1242	if (kgem->gen < 050) /* bit17 swizzling :( */
1243		return;
1244
1245	VG_CLEAR(tiling);
1246	tiling.handle = gem_create(kgem->fd, 1);
1247	if (!tiling.handle)
1248		return;
1249
1250	if (!gem_set_tiling(kgem->fd, tiling.handle, I915_TILING_X, 512))
1251		goto out;
1252
1253	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling))
1254		goto out;
1255
1256	choose_memcpy_tiled_x(kgem, tiling.swizzle_mode);
1257out:
1258	gem_close(kgem->fd, tiling.handle);
1259}
1260
1261
1262void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
1263{
1264	struct drm_i915_gem_get_aperture aperture;
1265	size_t totalram;
1266	unsigned half_gpu_max;
1267	unsigned int i, j;
1268
1269	DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen));
1270
1271	kgem->fd = fd;
1272	kgem->gen = gen;
1273
1274	list_init(&kgem->requests[0]);
1275	list_init(&kgem->requests[1]);
1276	list_init(&kgem->batch_buffers);
1277	list_init(&kgem->active_buffers);
1278	list_init(&kgem->flushing);
1279	list_init(&kgem->large);
1280	list_init(&kgem->large_inactive);
1281	list_init(&kgem->snoop);
1282	list_init(&kgem->scanout);
1283	for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
1284		list_init(&kgem->pinned_batches[i]);
1285	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
1286		list_init(&kgem->inactive[i]);
1287	for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
1288		for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
1289			list_init(&kgem->active[i][j]);
1290	}
1291	for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
1292		for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
1293			list_init(&kgem->vma[i].inactive[j]);
1294	}
1295	kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
1296	kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
1297
1298	kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
1299	DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
1300	     kgem->has_blt));
1301
1302	kgem->has_relaxed_delta =
1303		gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
1304	DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
1305	     kgem->has_relaxed_delta));
1306
1307	kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem);
1308	DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
1309	     kgem->has_relaxed_fencing));
1310
1311	kgem->has_llc = test_has_llc(kgem);
1312	DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__,
1313	     kgem->has_llc));
1314
1315	kgem->has_wt = test_has_wt(kgem);
1316	DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__,
1317	     kgem->has_wt));
1318
1319	kgem->has_caching = test_has_caching(kgem);
1320	DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
1321	     kgem->has_caching));
1322
1323	kgem->has_userptr = test_has_userptr(kgem);
1324	DBG(("%s: has userptr? %d\n", __FUNCTION__,
1325	     kgem->has_userptr));
1326
1327	kgem->has_create2 = test_has_create2(kgem);
1328	DBG(("%s: has create2? %d\n", __FUNCTION__,
1329	     kgem->has_create2));
1330
1331	kgem->has_no_reloc = test_has_no_reloc(kgem);
1332	DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
1333	     kgem->has_no_reloc));
1334
1335	kgem->has_handle_lut = test_has_handle_lut(kgem);
1336	DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
1337	     kgem->has_handle_lut));
1338
1339	kgem->has_semaphores = false;
1340	if (kgem->has_blt && test_has_semaphores_enabled(kgem))
1341		kgem->has_semaphores = true;
1342	DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
1343	     kgem->has_semaphores));
1344
1345	kgem->can_blt_cpu = gen >= 030;
1346	DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
1347	     kgem->can_blt_cpu));
1348
1349	kgem->can_render_y = gen != 021 && (gen >> 3) != 4;
1350	DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__,
1351	     kgem->can_render_y));
1352
1353	kgem->has_secure_batches = test_has_secure_batches(kgem);
1354	DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
1355	     kgem->has_secure_batches));
1356
1357	kgem->has_pinned_batches = test_has_pinned_batches(kgem);
1358	DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
1359	     kgem->has_pinned_batches));
1360
1361	if (!is_hw_supported(kgem, dev)) {
1362		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
1363			   "Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
1364		kgem->wedged = 1;
1365	} else if (__kgem_throttle(kgem, false)) {
1366		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
1367			   "Detected a hung GPU, disabling acceleration.\n");
1368		kgem->wedged = 1;
1369	}
1370
1371	kgem->batch_size = ARRAY_SIZE(kgem->batch);
1372	if (gen == 020 && !kgem->has_pinned_batches)
1373		/* Limited to what we can pin */
1374		kgem->batch_size = 4*1024;
1375	if (gen == 022)
1376		/* 865g cannot handle a batch spanning multiple pages */
1377		kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
1378	if (gen >= 070)
1379		kgem->batch_size = 16*1024;
1380	if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
1381		kgem->batch_size = 4*1024;
1382
1383	if (!kgem_init_pinned_batches(kgem) && gen == 020) {
1384		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
1385			   "Unable to reserve memory for GPU, disabling acceleration.\n");
1386		kgem->wedged = 1;
1387	}
1388
1389	DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
1390	     kgem->batch_size));
1391
1392	kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
1393	DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n",
1394	     __FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages));
1395
1396	kgem->next_request = __kgem_request_alloc(kgem);
1397
1398	DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
1399	     !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching),
1400	     kgem->has_llc, kgem->has_caching, kgem->has_userptr));
1401
1402	VG_CLEAR(aperture);
1403	aperture.aper_size = 0;
1404	(void)do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
1405	if (aperture.aper_size == 0)
1406		aperture.aper_size = 64*1024*1024;
1407
1408	DBG(("%s: aperture size %lld, available now %lld\n",
1409	     __FUNCTION__,
1410	     (long long)aperture.aper_size,
1411	     (long long)aperture.aper_available_size));
1412
1413	kgem->aperture_total = aperture.aper_size;
1414	kgem->aperture_high = aperture.aper_size * 3/4;
1415	kgem->aperture_low = aperture.aper_size * 1/3;
1416	if (gen < 033) {
1417		/* Severe alignment penalties */
1418		kgem->aperture_high /= 2;
1419		kgem->aperture_low /= 2;
1420	}
1421	DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
1422	     kgem->aperture_low, kgem->aperture_low / (1024*1024),
1423	     kgem->aperture_high, kgem->aperture_high / (1024*1024)));
1424
1425	kgem->aperture_mappable = 256 * 1024 * 1024;
1426	if (dev != NULL)
1427		kgem->aperture_mappable = agp_aperture_size(dev, gen);
1428	if (kgem->aperture_mappable == 0 ||
1429	    kgem->aperture_mappable > aperture.aper_size)
1430		kgem->aperture_mappable = aperture.aper_size;
1431	DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__,
1432	     kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024)));
1433
1434	kgem->aperture_fenceable = MIN(256*1024*1024, kgem->aperture_mappable);
1435	DBG(("%s: aperture fenceable=%d [%d MiB]\n", __FUNCTION__,
1436	     kgem->aperture_fenceable, kgem->aperture_fenceable / (1024*1024)));
1437
1438	kgem->buffer_size = 64 * 1024;
1439	while (kgem->buffer_size < kgem->aperture_mappable >> 10)
1440		kgem->buffer_size *= 2;
1441	if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
1442		kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
1443	kgem->buffer_size = 1 << __fls(kgem->buffer_size);
1444	DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
1445	     kgem->buffer_size, kgem->buffer_size / 1024));
1446	assert(kgem->buffer_size);
1447
1448	kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
1449	kgem->max_gpu_size = kgem->max_object_size;
1450	if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE)
1451		kgem->max_gpu_size = MAX_CACHE_SIZE;
1452
1453	totalram = total_ram_size();
1454	if (totalram == 0) {
1455		DBG(("%s: total ram size unknown, assuming maximum of total aperture\n",
1456		     __FUNCTION__));
1457		totalram = kgem->aperture_total;
1458	}
1459	DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram));
1460	if (kgem->max_object_size > totalram / 2)
1461		kgem->max_object_size = totalram / 2;
1462	if (kgem->max_gpu_size > totalram / 4)
1463		kgem->max_gpu_size = totalram / 4;
1464
1465	if (kgem->aperture_high > totalram / 2) {
1466		kgem->aperture_high = totalram / 2;
1467		kgem->aperture_low = kgem->aperture_high / 4;
1468		DBG(("%s: reduced aperture watermaks to fit into ram; low=%d [%d], high=%d [%d]\n", __FUNCTION__,
1469		     kgem->aperture_low, kgem->aperture_low / (1024*1024),
1470		     kgem->aperture_high, kgem->aperture_high / (1024*1024)));
1471	}
1472
1473	kgem->max_cpu_size = kgem->max_object_size;
1474
1475	half_gpu_max = kgem->max_gpu_size / 2;
1476	kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
1477	if (kgem->max_copy_tile_size > half_gpu_max)
1478		kgem->max_copy_tile_size = half_gpu_max;
1479
1480	if (kgem->has_llc)
1481		kgem->max_upload_tile_size = kgem->max_copy_tile_size;
1482	else
1483		kgem->max_upload_tile_size = kgem->aperture_fenceable / 4;
1484	if (kgem->max_upload_tile_size > half_gpu_max)
1485		kgem->max_upload_tile_size = half_gpu_max;
1486	if (kgem->max_upload_tile_size > kgem->aperture_high/2)
1487		kgem->max_upload_tile_size = kgem->aperture_high/2;
1488	if (kgem->max_upload_tile_size > kgem->aperture_low)
1489		kgem->max_upload_tile_size = kgem->aperture_low;
1490	if (kgem->max_upload_tile_size < 16*PAGE_SIZE)
1491		kgem->max_upload_tile_size = 16*PAGE_SIZE;
1492
1493	kgem->large_object_size = MAX_CACHE_SIZE;
1494	if (kgem->large_object_size > half_gpu_max)
1495		kgem->large_object_size = half_gpu_max;
1496	if (kgem->max_copy_tile_size > kgem->aperture_high/2)
1497		kgem->max_copy_tile_size = kgem->aperture_high/2;
1498	if (kgem->max_copy_tile_size > kgem->aperture_low)
1499		kgem->max_copy_tile_size = kgem->aperture_low;
1500	if (kgem->max_copy_tile_size < 16*PAGE_SIZE)
1501		kgem->max_copy_tile_size = 16*PAGE_SIZE;
1502
1503	if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) {
1504		if (kgem->large_object_size > kgem->max_cpu_size)
1505			kgem->large_object_size = kgem->max_cpu_size;
1506	} else
1507		kgem->max_cpu_size = 0;
1508	if (DBG_NO_CPU)
1509		kgem->max_cpu_size = 0;
1510
1511	DBG(("%s: maximum object size=%d\n",
1512	     __FUNCTION__, kgem->max_object_size));
1513	DBG(("%s: large object thresold=%d\n",
1514	     __FUNCTION__, kgem->large_object_size));
1515	DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n",
1516	     __FUNCTION__,
1517	     kgem->max_gpu_size, kgem->max_cpu_size,
1518	     kgem->max_upload_tile_size, kgem->max_copy_tile_size));
1519
1520	/* Convert the aperture thresholds to pages */
1521	kgem->aperture_mappable /= PAGE_SIZE;
1522	kgem->aperture_fenceable /= PAGE_SIZE;
1523	kgem->aperture_low /= PAGE_SIZE;
1524	kgem->aperture_high /= PAGE_SIZE;
1525	kgem->aperture_total /= PAGE_SIZE;
1526
1527	kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
1528	if ((int)kgem->fence_max < 0)
1529		kgem->fence_max = 5; /* minimum safe value for all hw */
1530	DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
1531
1532	kgem->batch_flags_base = 0;
1533	if (kgem->has_no_reloc)
1534		kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
1535	if (kgem->has_handle_lut)
1536		kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
1537	if (kgem->has_pinned_batches)
1538		kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
1539
1540	kgem_init_swizzling(kgem);
1541}
1542
1543/* XXX hopefully a good approximation */
1544static uint32_t kgem_get_unique_id(struct kgem *kgem)
1545{
1546	uint32_t id;
1547	id = ++kgem->unique_id;
1548	if (id == 0)
1549		id = ++kgem->unique_id;
1550	return id;
1551}
1552
1553inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags)
1554{
1555	if (flags & CREATE_PRIME)
1556		return 256;
1557	if (flags & CREATE_SCANOUT)
1558		return 64;
1559	return 8;
1560}
1561
1562void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch,
1563			int *tile_width, int *tile_height, int *tile_size)
1564{
1565	if (kgem->gen <= 030) {
1566		if (tiling) {
1567			if (kgem->gen < 030) {
1568				*tile_width = 128;
1569				*tile_height = 16;
1570				*tile_size = 2048;
1571			} else {
1572				*tile_width = 512;
1573				*tile_height = 8;
1574				*tile_size = 4096;
1575			}
1576		} else {
1577			*tile_width = 1;
1578			*tile_height = 1;
1579			*tile_size = 1;
1580		}
1581	} else switch (tiling) {
1582	default:
1583	case I915_TILING_NONE:
1584		*tile_width = 1;
1585		*tile_height = 1;
1586		*tile_size = 1;
1587		break;
1588	case I915_TILING_X:
1589		*tile_width = 512;
1590		*tile_height = 8;
1591		*tile_size = 4096;
1592		break;
1593	case I915_TILING_Y:
1594		*tile_width = 128;
1595		*tile_height = 32;
1596		*tile_size = 4096;
1597		break;
1598	}
1599
1600	/* Force offset alignment to tile-row */
1601	if (tiling && kgem->gen < 033)
1602		*tile_width = pitch;
1603}
1604
1605static uint32_t kgem_surface_size(struct kgem *kgem,
1606				  bool relaxed_fencing,
1607				  unsigned flags,
1608				  uint32_t width,
1609				  uint32_t height,
1610				  uint32_t bpp,
1611				  uint32_t tiling,
1612				  uint32_t *pitch)
1613{
1614	uint32_t tile_width, tile_height;
1615	uint32_t size;
1616
1617	assert(width <= MAXSHORT);
1618	assert(height <= MAXSHORT);
1619	assert(bpp >= 8);
1620
1621	if (kgem->gen <= 030) {
1622		if (tiling) {
1623			if (kgem->gen < 030) {
1624				tile_width = 128;
1625				tile_height = 16;
1626			} else {
1627				tile_width = 512;
1628				tile_height = 8;
1629			}
1630		} else {
1631			tile_width = 2 * bpp >> 3;
1632			tile_width = ALIGN(tile_width,
1633					   kgem_pitch_alignment(kgem, flags));
1634			tile_height = 1;
1635		}
1636	} else switch (tiling) {
1637	default:
1638	case I915_TILING_NONE:
1639		tile_width = 2 * bpp >> 3;
1640		tile_width = ALIGN(tile_width,
1641				   kgem_pitch_alignment(kgem, flags));
1642		tile_height = 1;
1643		break;
1644
1645	case I915_TILING_X:
1646		tile_width = 512;
1647		tile_height = 8;
1648		break;
1649	case I915_TILING_Y:
1650		tile_width = 128;
1651		tile_height = 32;
1652		break;
1653	}
1654	/* XXX align to an even tile row */
1655	if (!kgem->has_relaxed_fencing)
1656		tile_height *= 2;
1657
1658	*pitch = ALIGN(width * bpp / 8, tile_width);
1659	height = ALIGN(height, tile_height);
1660	DBG(("%s: tile_width=%d, tile_height=%d => aligned pitch=%d, height=%d\n",
1661	     __FUNCTION__, tile_width, tile_height, *pitch, height));
1662
1663	if (kgem->gen >= 040)
1664		return PAGE_ALIGN(*pitch * height);
1665
1666	/* If it is too wide for the blitter, don't even bother.  */
1667	if (tiling != I915_TILING_NONE) {
1668		if (*pitch > 8192) {
1669			DBG(("%s: too wide for tiled surface (pitch=%d, limit=%d)\n",
1670			     __FUNCTION__, *pitch, 8192));
1671			return 0;
1672		}
1673
1674		for (size = tile_width; size < *pitch; size <<= 1)
1675			;
1676		*pitch = size;
1677	} else {
1678		if (*pitch >= 32768) {
1679			DBG(("%s: too wide for linear surface (pitch=%d, limit=%d)\n",
1680			     __FUNCTION__, *pitch, 32767));
1681			return 0;
1682		}
1683	}
1684
1685	size = *pitch * height;
1686	if (relaxed_fencing || tiling == I915_TILING_NONE)
1687		return PAGE_ALIGN(size);
1688
1689	/* We need to allocate a pot fence region for a tiled buffer. */
1690	if (kgem->gen < 030)
1691		tile_width = 512 * 1024;
1692	else
1693		tile_width = 1024 * 1024;
1694	while (tile_width < size)
1695		tile_width *= 2;
1696	return tile_width;
1697}
1698
1699bool kgem_check_surface_size(struct kgem *kgem,
1700			     uint32_t width,
1701			     uint32_t height,
1702			     uint32_t bpp,
1703			     uint32_t tiling,
1704			     uint32_t pitch,
1705			     uint32_t size)
1706{
1707	uint32_t min_size, min_pitch;
1708	int tile_width, tile_height, tile_size;
1709
1710	DBG(("%s(width=%d, height=%d, bpp=%d, tiling=%d, pitch=%d, size=%d)\n",
1711	     __FUNCTION__, width, height, bpp, tiling, pitch, size));
1712
1713	if (pitch & 3)
1714		return false;
1715
1716	min_size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, 0,
1717				     width, height, bpp, tiling,
1718				     &min_pitch);
1719
1720	DBG(("%s: min_pitch=%d, min_size=%d\n", __FUNCTION__, min_pitch, min_size));
1721
1722	if (size < min_size)
1723		return false;
1724
1725	if (pitch < min_pitch)
1726		return false;
1727
1728	kgem_get_tile_size(kgem, tiling, min_pitch,
1729			   &tile_width, &tile_height, &tile_size);
1730
1731	DBG(("%s: tile_width=%d, tile_size=%d\n", __FUNCTION__, tile_width, tile_size));
1732	if (pitch & (tile_width - 1))
1733		return false;
1734	if (size & (tile_size - 1))
1735		return false;
1736
1737	return true;
1738}
1739
1740static uint32_t kgem_aligned_height(struct kgem *kgem,
1741				    uint32_t height, uint32_t tiling)
1742{
1743	uint32_t tile_height;
1744
1745	if (kgem->gen <= 030) {
1746		tile_height = tiling ? kgem->gen < 030 ? 16 : 8 : 1;
1747	} else switch (tiling) {
1748		/* XXX align to an even tile row */
1749	default:
1750	case I915_TILING_NONE:
1751		tile_height = 1;
1752		break;
1753	case I915_TILING_X:
1754		tile_height = 8;
1755		break;
1756	case I915_TILING_Y:
1757		tile_height = 32;
1758		break;
1759	}
1760
1761	/* XXX align to an even tile row */
1762	if (!kgem->has_relaxed_fencing)
1763		tile_height *= 2;
1764
1765	return ALIGN(height, tile_height);
1766}
1767
1768static struct drm_i915_gem_exec_object2 *
1769kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
1770{
1771	struct drm_i915_gem_exec_object2 *exec;
1772
1773	DBG(("%s: handle=%d, index=%d\n",
1774	     __FUNCTION__, bo->handle, kgem->nexec));
1775
1776	assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
1777	bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
1778	exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
1779	exec->handle = bo->handle;
1780	exec->offset = bo->presumed_offset;
1781
1782	kgem->aperture += num_pages(bo);
1783
1784	return exec;
1785}
1786
1787static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
1788{
1789	assert(bo->refcnt);
1790	assert(bo->proxy == NULL);
1791
1792	bo->exec = kgem_add_handle(kgem, bo);
1793	bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
1794
1795	list_move_tail(&bo->request, &kgem->next_request->buffers);
1796	if (bo->io && !list_is_empty(&bo->list))
1797		list_move(&bo->list, &kgem->batch_buffers);
1798
1799	/* XXX is it worth working around gcc here? */
1800	kgem->flush |= bo->flush;
1801}
1802
1803static uint32_t kgem_end_batch(struct kgem *kgem)
1804{
1805	kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
1806	if (kgem->nbatch & 1)
1807		kgem->batch[kgem->nbatch++] = MI_NOOP;
1808
1809	return kgem->nbatch;
1810}
1811
1812static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
1813{
1814	int n;
1815
1816	assert(kgem->nreloc__self <= 256);
1817	if (kgem->nreloc__self == 0)
1818		return;
1819
1820	for (n = 0; n < kgem->nreloc__self; n++) {
1821		int i = kgem->reloc__self[n];
1822		assert(kgem->reloc[i].target_handle == ~0U);
1823		kgem->reloc[i].target_handle = bo->target_handle;
1824		kgem->reloc[i].presumed_offset = bo->presumed_offset;
1825		kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
1826			kgem->reloc[i].delta + bo->presumed_offset;
1827	}
1828
1829	if (n == 256) {
1830		for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
1831			if (kgem->reloc[n].target_handle == ~0U) {
1832				kgem->reloc[n].target_handle = bo->target_handle;
1833				kgem->reloc[n].presumed_offset = bo->presumed_offset;
1834				kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
1835					kgem->reloc[n].delta + bo->presumed_offset;
1836			}
1837		}
1838
1839	}
1840}
1841
1842static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
1843{
1844	struct kgem_bo_binding *b;
1845
1846	b = bo->binding.next;
1847	while (b) {
1848		struct kgem_bo_binding *next = b->next;
1849		free(b);
1850		b = next;
1851	}
1852}
1853
1854static void kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo)
1855{
1856	if (bo->scanout && bo->delta) {
1857		DBG(("%s: releasing fb=%d for handle=%d\n",
1858		     __FUNCTION__, bo->delta, bo->handle));
1859		/* XXX will leak if we are not DRM_MASTER. *shrug* */
1860		do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta);
1861		bo->delta = 0;
1862	}
1863}
1864
1865static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
1866{
1867	DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo)));
1868	assert(bo->refcnt == 0);
1869	assert(bo->proxy == NULL);
1870	assert(bo->exec == NULL);
1871	assert(!bo->snoop || bo->rq == NULL);
1872
1873#ifdef DEBUG_MEMORY
1874	kgem->debug_memory.bo_allocs--;
1875	kgem->debug_memory.bo_bytes -= bytes(bo);
1876#endif
1877
1878	kgem_bo_binding_free(kgem, bo);
1879	kgem_bo_rmfb(kgem, bo);
1880
1881	if (IS_USER_MAP(bo->map__cpu)) {
1882		assert(bo->rq == NULL);
1883		assert(!__kgem_busy(kgem, bo->handle));
1884		assert(MAP(bo->map__cpu) != bo || bo->io || bo->flush);
1885		if (!(bo->io || bo->flush)) {
1886			DBG(("%s: freeing snooped base\n", __FUNCTION__));
1887			assert(bo != MAP(bo->map__cpu));
1888			free(MAP(bo->map__cpu));
1889		}
1890		bo->map__cpu = NULL;
1891	}
1892
1893	DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n",
1894	     __FUNCTION__, bo->map__gtt, bo->map__cpu,
1895	     bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL].count));
1896
1897	if (!list_is_empty(&bo->vma)) {
1898		_list_del(&bo->vma);
1899		kgem->vma[bo->map__gtt == NULL].count--;
1900	}
1901
1902	if (bo->map__gtt)
1903		munmap(MAP(bo->map__gtt), bytes(bo));
1904	if (bo->map__cpu)
1905		munmap(MAP(bo->map__cpu), bytes(bo));
1906
1907	_list_del(&bo->list);
1908	_list_del(&bo->request);
1909	gem_close(kgem->fd, bo->handle);
1910
1911	if (!bo->io && !DBG_NO_MALLOC_CACHE) {
1912		*(struct kgem_bo **)bo = __kgem_freed_bo;
1913		__kgem_freed_bo = bo;
1914	} else
1915		free(bo);
1916}
1917
1918inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
1919					    struct kgem_bo *bo)
1920{
1921	DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
1922
1923	assert(bo->refcnt == 0);
1924	assert(bo->reusable);
1925	assert(bo->rq == NULL);
1926	assert(bo->exec == NULL);
1927	assert(bo->domain != DOMAIN_GPU);
1928	assert(!bo->proxy);
1929	assert(!bo->io);
1930	assert(!bo->scanout);
1931	assert(!bo->snoop);
1932	assert(!bo->flush);
1933	assert(!bo->needs_flush);
1934	assert(list_is_empty(&bo->vma));
1935	assert_tiling(kgem, bo);
1936	assert_cacheing(kgem, bo);
1937	ASSERT_IDLE(kgem, bo->handle);
1938
1939	if (bucket(bo) >= NUM_CACHE_BUCKETS) {
1940		if (bo->map__gtt) {
1941			munmap(MAP(bo->map__gtt), bytes(bo));
1942			bo->map__gtt = NULL;
1943		}
1944
1945		list_move(&bo->list, &kgem->large_inactive);
1946	} else {
1947		assert(bo->flush == false);
1948		list_move(&bo->list, &kgem->inactive[bucket(bo)]);
1949		if (bo->map__gtt) {
1950			if (!kgem_bo_can_map(kgem, bo)) {
1951				munmap(MAP(bo->map__gtt), bytes(bo));
1952				bo->map__gtt = NULL;
1953			}
1954			if (bo->map__gtt) {
1955				list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
1956				kgem->vma[0].count++;
1957			}
1958		}
1959		if (bo->map__cpu && !bo->map__gtt) {
1960			list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]);
1961			kgem->vma[1].count++;
1962		}
1963	}
1964
1965	kgem->need_expire = true;
1966}
1967
1968static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
1969{
1970	struct kgem_bo *base;
1971
1972	if (!bo->io)
1973		return bo;
1974
1975	assert(!bo->snoop);
1976	if (__kgem_freed_bo) {
1977		base = __kgem_freed_bo;
1978		__kgem_freed_bo = *(struct kgem_bo **)base;
1979	} else
1980		base = malloc(sizeof(*base));
1981	if (base) {
1982		DBG(("%s: transferring io handle=%d to bo\n",
1983		     __FUNCTION__, bo->handle));
1984		/* transfer the handle to a minimum bo */
1985		memcpy(base, bo, sizeof(*base));
1986		base->io = false;
1987		list_init(&base->list);
1988		list_replace(&bo->request, &base->request);
1989		list_replace(&bo->vma, &base->vma);
1990		free(bo);
1991		bo = base;
1992	} else
1993		bo->reusable = false;
1994
1995	return bo;
1996}
1997
1998inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
1999						struct kgem_bo *bo)
2000{
2001	DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
2002
2003	list_del(&bo->list);
2004	assert(bo->rq == NULL);
2005	assert(bo->exec == NULL);
2006	if (!list_is_empty(&bo->vma)) {
2007		assert(bo->map__gtt || bo->map__cpu);
2008		list_del(&bo->vma);
2009		kgem->vma[bo->map__gtt == NULL].count--;
2010	}
2011}
2012
2013inline static void kgem_bo_remove_from_active(struct kgem *kgem,
2014					      struct kgem_bo *bo)
2015{
2016	DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
2017
2018	list_del(&bo->list);
2019	assert(bo->rq != NULL);
2020	if (RQ(bo->rq) == (void *)kgem) {
2021		assert(bo->exec == NULL);
2022		list_del(&bo->request);
2023	}
2024	assert(list_is_empty(&bo->vma));
2025}
2026
2027static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
2028{
2029	struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
2030
2031	DBG(("%s: size=%d, offset=%d, parent used=%d\n",
2032	     __FUNCTION__, bo->size.bytes, bo->delta, io->used));
2033
2034	if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used)
2035		io->used = bo->delta;
2036}
2037
2038static bool check_scanout_size(struct kgem *kgem,
2039			       struct kgem_bo *bo,
2040			       int width, int height)
2041{
2042	struct drm_mode_fb_cmd info;
2043
2044	assert(bo->scanout);
2045
2046	VG_CLEAR(info);
2047	info.fb_id = bo->delta;
2048
2049	if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_GETFB, &info))
2050		return false;
2051
2052	gem_close(kgem->fd, info.handle);
2053
2054	if (width != info.width || height != info.height) {
2055		DBG(("%s: not using scanout %d (%dx%d), want (%dx%d)\n",
2056		     __FUNCTION__,
2057		     info.fb_id, info.width, info.height,
2058		     width, height));
2059		return false;
2060	}
2061
2062	return true;
2063}
2064
2065static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
2066{
2067	assert(bo->refcnt == 0);
2068	assert(bo->scanout);
2069	assert(!bo->flush);
2070	assert(!bo->snoop);
2071	assert(!bo->io);
2072
2073	if (bo->purged) { /* for stolen fb */
2074		if (!bo->exec) {
2075			DBG(("%s: discarding purged scanout - stolen?\n",
2076			     __FUNCTION__));
2077			kgem_bo_free(kgem, bo);
2078		}
2079		return;
2080	}
2081
2082	DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n",
2083	     __FUNCTION__, bo->handle, bo->delta, bo->rq != NULL));
2084	if (bo->rq)
2085		list_move_tail(&bo->list, &kgem->scanout);
2086	else
2087		list_move(&bo->list, &kgem->scanout);
2088
2089	kgem->need_expire = true;
2090
2091}
2092
2093static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
2094{
2095	assert(bo->reusable);
2096	assert(!bo->scanout);
2097	assert(!bo->flush);
2098	assert(!bo->needs_flush);
2099	assert(bo->refcnt == 0);
2100	assert(bo->exec == NULL);
2101
2102	if (DBG_NO_SNOOP_CACHE) {
2103		kgem_bo_free(kgem, bo);
2104		return;
2105	}
2106
2107	if (num_pages(bo) > kgem->max_cpu_size >> 13) {
2108		DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
2109		     __FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
2110		kgem_bo_free(kgem, bo);
2111		return;
2112	}
2113
2114	assert(bo->tiling == I915_TILING_NONE);
2115	assert(bo->rq == NULL);
2116
2117	DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle));
2118	list_add(&bo->list, &kgem->snoop);
2119	kgem->need_expire = true;
2120}
2121
2122static bool kgem_bo_move_to_cache(struct kgem *kgem, struct kgem_bo *bo)
2123{
2124	bool retired = false;
2125
2126	DBG(("%s: release handle=%d\n", __FUNCTION__, bo->handle));
2127
2128	if (bo->prime) {
2129		DBG(("%s: discarding imported prime handle=%d\n",
2130		     __FUNCTION__, bo->handle));
2131		kgem_bo_free(kgem, bo);
2132	} else if (bo->snoop) {
2133		kgem_bo_move_to_snoop(kgem, bo);
2134	} else if (bo->scanout) {
2135		kgem_bo_move_to_scanout(kgem, bo);
2136	} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
2137		   kgem_bo_set_purgeable(kgem, bo)) {
2138		kgem_bo_move_to_inactive(kgem, bo);
2139		retired = true;
2140	} else
2141		kgem_bo_free(kgem, bo);
2142
2143	return retired;
2144}
2145
2146static struct kgem_bo *
2147search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
2148{
2149	struct kgem_bo *bo, *first = NULL;
2150
2151	DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags));
2152
2153	if ((kgem->has_caching | kgem->has_userptr) == 0)
2154		return NULL;
2155
2156	if (list_is_empty(&kgem->snoop)) {
2157		DBG(("%s: inactive and cache empty\n", __FUNCTION__));
2158		if (!__kgem_throttle_retire(kgem, flags)) {
2159			DBG(("%s: nothing retired\n", __FUNCTION__));
2160			return NULL;
2161		}
2162	}
2163
2164	list_for_each_entry(bo, &kgem->snoop, list) {
2165		assert(bo->refcnt == 0);
2166		assert(bo->snoop);
2167		assert(!bo->scanout);
2168		assert(!bo->purged);
2169		assert(bo->proxy == NULL);
2170		assert(bo->tiling == I915_TILING_NONE);
2171		assert(bo->rq == NULL);
2172		assert(bo->exec == NULL);
2173
2174		if (num_pages > num_pages(bo))
2175			continue;
2176
2177		if (num_pages(bo) > 2*num_pages) {
2178			if (first == NULL)
2179				first = bo;
2180			continue;
2181		}
2182
2183		list_del(&bo->list);
2184		bo->pitch = 0;
2185		bo->delta = 0;
2186
2187		DBG(("  %s: found handle=%d (num_pages=%d) in snoop cache\n",
2188		     __FUNCTION__, bo->handle, num_pages(bo)));
2189		return bo;
2190	}
2191
2192	if (first) {
2193		list_del(&first->list);
2194		first->pitch = 0;
2195		first->delta = 0;
2196
2197		DBG(("  %s: found handle=%d (num_pages=%d) in snoop cache\n",
2198		     __FUNCTION__, first->handle, num_pages(first)));
2199		return first;
2200	}
2201
2202	return NULL;
2203}
2204
2205void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo)
2206{
2207	if (kgem->nexec != 1 || bo->exec == NULL)
2208		return;
2209
2210	assert(bo);
2211	DBG(("%s: only handle in batch, discarding last operations for handle=%d\n",
2212	     __FUNCTION__, bo->handle));
2213
2214	assert(bo->exec == &kgem->exec[0]);
2215	assert(kgem->exec[0].handle == bo->handle);
2216	assert(RQ(bo->rq) == kgem->next_request);
2217
2218	bo->refcnt++;
2219	kgem_reset(kgem);
2220	bo->refcnt--;
2221
2222	assert(kgem->nreloc == 0);
2223	assert(kgem->nexec == 0);
2224	assert(bo->exec == NULL);
2225}
2226
2227void kgem_bo_pair_undo(struct kgem *kgem, struct kgem_bo *a, struct kgem_bo *b)
2228{
2229	if (kgem->nexec > 2)
2230		return;
2231
2232	if (kgem->nexec == 1) {
2233		if (a)
2234			kgem_bo_undo(kgem, a);
2235		if (b)
2236			kgem_bo_undo(kgem, b);
2237		return;
2238	}
2239
2240	if (a == NULL || b == NULL)
2241		return;
2242	if (a->exec == NULL || b->exec == NULL)
2243		return;
2244
2245	DBG(("%s: only handles in batch, discarding last operations for handle=%d and handle=%d\n",
2246	     __FUNCTION__, a->handle, b->handle));
2247
2248	assert(a->exec == &kgem->exec[0] || a->exec == &kgem->exec[1]);
2249	assert(a->handle == kgem->exec[0].handle || a->handle == kgem->exec[1].handle);
2250	assert(RQ(a->rq) == kgem->next_request);
2251	assert(b->exec == &kgem->exec[0] || b->exec == &kgem->exec[1]);
2252	assert(b->handle == kgem->exec[0].handle || b->handle == kgem->exec[1].handle);
2253	assert(RQ(b->rq) == kgem->next_request);
2254
2255	a->refcnt++;
2256	b->refcnt++;
2257	kgem_reset(kgem);
2258	b->refcnt--;
2259	a->refcnt--;
2260
2261	assert(kgem->nreloc == 0);
2262	assert(kgem->nexec == 0);
2263	assert(a->exec == NULL);
2264	assert(b->exec == NULL);
2265}
2266
2267static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
2268{
2269	DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo)));
2270
2271	assert(list_is_empty(&bo->list));
2272	assert(bo->refcnt == 0);
2273	assert(bo->proxy == NULL);
2274	assert(bo->active_scanout == 0);
2275	assert_tiling(kgem, bo);
2276
2277	bo->binding.offset = 0;
2278
2279	if (DBG_NO_CACHE)
2280		goto destroy;
2281
2282	if (bo->prime)
2283		goto destroy;
2284
2285	if (bo->snoop && !bo->flush) {
2286		DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
2287		assert(bo->reusable);
2288		assert(list_is_empty(&bo->list));
2289		if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
2290			__kgem_bo_clear_busy(bo);
2291		if (bo->rq == NULL)
2292			kgem_bo_move_to_snoop(kgem, bo);
2293		return;
2294	}
2295	if (!IS_USER_MAP(bo->map__cpu))
2296		bo->flush = false;
2297
2298	if (bo->scanout) {
2299		kgem_bo_move_to_scanout(kgem, bo);
2300		return;
2301	}
2302
2303	if (bo->io)
2304		bo = kgem_bo_replace_io(bo);
2305	if (!bo->reusable) {
2306		DBG(("%s: handle=%d, not reusable\n",
2307		     __FUNCTION__, bo->handle));
2308		goto destroy;
2309	}
2310
2311	assert(list_is_empty(&bo->vma));
2312	assert(list_is_empty(&bo->list));
2313	assert(bo->flush == false);
2314	assert(bo->snoop == false);
2315	assert(bo->io == false);
2316	assert(bo->scanout == false);
2317	assert_cacheing(kgem, bo);
2318
2319	kgem_bo_undo(kgem, bo);
2320	assert(bo->refcnt == 0);
2321
2322	if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
2323		__kgem_bo_clear_busy(bo);
2324
2325	if (bo->rq) {
2326		struct list *cache;
2327
2328		DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
2329		if (bucket(bo) < NUM_CACHE_BUCKETS)
2330			cache = &kgem->active[bucket(bo)][bo->tiling];
2331		else
2332			cache = &kgem->large;
2333		list_add(&bo->list, cache);
2334		return;
2335	}
2336
2337	assert(bo->exec == NULL);
2338	assert(list_is_empty(&bo->request));
2339
2340	if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) {
2341		if (!kgem_bo_set_purgeable(kgem, bo))
2342			goto destroy;
2343
2344		if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
2345			goto destroy;
2346
2347		DBG(("%s: handle=%d, purged\n",
2348		     __FUNCTION__, bo->handle));
2349	}
2350
2351	kgem_bo_move_to_inactive(kgem, bo);
2352	return;
2353
2354destroy:
2355	if (!bo->exec)
2356		kgem_bo_free(kgem, bo);
2357}
2358
2359static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
2360{
2361	assert(bo->refcnt);
2362	if (--bo->refcnt == 0)
2363		__kgem_bo_destroy(kgem, bo);
2364}
2365
2366static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
2367{
2368	assert(bo->base.io);
2369	while (!list_is_empty(&bo->base.vma)) {
2370		struct kgem_bo *cached;
2371
2372		cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
2373		assert(cached->proxy == &bo->base);
2374		assert(cached != &bo->base);
2375		list_del(&cached->vma);
2376
2377		assert(*(struct kgem_bo **)cached->map__gtt == cached);
2378		*(struct kgem_bo **)cached->map__gtt = NULL;
2379		cached->map__gtt = NULL;
2380
2381		kgem_bo_destroy(kgem, cached);
2382	}
2383}
2384
2385void kgem_retire__buffers(struct kgem *kgem)
2386{
2387	while (!list_is_empty(&kgem->active_buffers)) {
2388		struct kgem_buffer *bo =
2389			list_last_entry(&kgem->active_buffers,
2390					struct kgem_buffer,
2391					base.list);
2392
2393		DBG(("%s: handle=%d, busy? %d [%d]\n",
2394		     __FUNCTION__, bo->base.handle, bo->base.rq != NULL, bo->base.exec != NULL));
2395
2396		assert(bo->base.exec == NULL || RQ(bo->base.rq) == kgem->next_request);
2397		if (bo->base.rq)
2398			break;
2399
2400		DBG(("%s: releasing upload cache for handle=%d? %d\n",
2401		     __FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
2402		list_del(&bo->base.list);
2403		kgem_buffer_release(kgem, bo);
2404		kgem_bo_unref(kgem, &bo->base);
2405	}
2406}
2407
2408static bool kgem_retire__flushing(struct kgem *kgem)
2409{
2410	struct kgem_bo *bo, *next;
2411	bool retired = false;
2412
2413	list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
2414		assert(RQ(bo->rq) == (void *)kgem);
2415		assert(bo->exec == NULL);
2416
2417		if (__kgem_busy(kgem, bo->handle))
2418			break;
2419
2420		__kgem_bo_clear_busy(bo);
2421
2422		if (bo->refcnt)
2423			continue;
2424
2425		retired |= kgem_bo_move_to_cache(kgem, bo);
2426	}
2427#if HAS_DEBUG_FULL
2428	{
2429		int count = 0;
2430		list_for_each_entry(bo, &kgem->flushing, request)
2431			count++;
2432		DBG(("%s: %d bo on flushing list\n", __FUNCTION__, count));
2433	}
2434#endif
2435
2436	kgem->need_retire |= !list_is_empty(&kgem->flushing);
2437
2438	return retired;
2439}
2440
2441static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
2442{
2443	bool retired = false;
2444
2445	DBG(("%s: request %d complete\n",
2446	     __FUNCTION__, rq->bo->handle));
2447	assert(RQ(rq->bo->rq) == rq);
2448
2449	while (!list_is_empty(&rq->buffers)) {
2450		struct kgem_bo *bo;
2451
2452		bo = list_first_entry(&rq->buffers,
2453				      struct kgem_bo,
2454				      request);
2455
2456		assert(RQ(bo->rq) == rq);
2457		assert(bo->exec == NULL);
2458		assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
2459
2460		list_del(&bo->request);
2461
2462		if (bo->needs_flush)
2463			bo->needs_flush = __kgem_busy(kgem, bo->handle);
2464		if (bo->needs_flush) {
2465			DBG(("%s: moving %d to flushing\n",
2466			     __FUNCTION__, bo->handle));
2467			list_add(&bo->request, &kgem->flushing);
2468			bo->rq = MAKE_REQUEST(kgem, RQ_RING(bo->rq));
2469			kgem->need_retire = true;
2470			continue;
2471		}
2472
2473		bo->domain = DOMAIN_NONE;
2474		bo->rq = NULL;
2475		if (bo->refcnt)
2476			continue;
2477
2478		retired |= kgem_bo_move_to_cache(kgem, bo);
2479	}
2480
2481	assert(rq->bo->rq == NULL);
2482	assert(rq->bo->exec == NULL);
2483	assert(list_is_empty(&rq->bo->request));
2484	assert(rq->bo->refcnt > 0);
2485
2486	if (--rq->bo->refcnt == 0) {
2487		if (kgem_bo_set_purgeable(kgem, rq->bo)) {
2488			kgem_bo_move_to_inactive(kgem, rq->bo);
2489			retired = true;
2490		} else {
2491			DBG(("%s: closing %d\n",
2492			     __FUNCTION__, rq->bo->handle));
2493			kgem_bo_free(kgem, rq->bo);
2494		}
2495	}
2496
2497	__kgem_request_free(rq);
2498	return retired;
2499}
2500
2501static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
2502{
2503	bool retired = false;
2504
2505	while (!list_is_empty(&kgem->requests[ring])) {
2506		struct kgem_request *rq;
2507
2508		rq = list_first_entry(&kgem->requests[ring],
2509				      struct kgem_request,
2510				      list);
2511		assert(rq->ring == ring);
2512		if (__kgem_busy(kgem, rq->bo->handle))
2513			break;
2514
2515		retired |= __kgem_retire_rq(kgem, rq);
2516	}
2517
2518#if HAS_DEBUG_FULL
2519	{
2520		struct kgem_bo *bo;
2521		int count = 0;
2522
2523		list_for_each_entry(bo, &kgem->requests[ring], request)
2524			count++;
2525
2526		bo = NULL;
2527		if (!list_is_empty(&kgem->requests[ring]))
2528			bo = list_first_entry(&kgem->requests[ring],
2529					      struct kgem_request,
2530					      list)->bo;
2531
2532		DBG(("%s: ring=%d, %d outstanding requests, oldest=%d\n",
2533		     __FUNCTION__, ring, count, bo ? bo->handle : 0));
2534	}
2535#endif
2536
2537	return retired;
2538}
2539
2540static bool kgem_retire__requests(struct kgem *kgem)
2541{
2542	bool retired = false;
2543	int n;
2544
2545	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
2546		retired |= kgem_retire__requests_ring(kgem, n);
2547		kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
2548	}
2549
2550	return retired;
2551}
2552
2553bool kgem_retire(struct kgem *kgem)
2554{
2555	bool retired = false;
2556
2557	DBG(("%s, need_retire?=%d\n", __FUNCTION__, kgem->need_retire));
2558
2559	kgem->need_retire = false;
2560
2561	retired |= kgem_retire__flushing(kgem);
2562	retired |= kgem_retire__requests(kgem);
2563
2564	DBG(("%s -- retired=%d, need_retire=%d\n",
2565	     __FUNCTION__, retired, kgem->need_retire));
2566
2567	kgem->retire(kgem);
2568
2569	return retired;
2570}
2571
2572bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
2573{
2574	struct kgem_request *rq;
2575
2576	assert(ring < ARRAY_SIZE(kgem->requests));
2577	assert(!list_is_empty(&kgem->requests[ring]));
2578
2579	rq = list_last_entry(&kgem->requests[ring],
2580			     struct kgem_request, list);
2581	assert(rq->ring == ring);
2582	if (__kgem_busy(kgem, rq->bo->handle)) {
2583		DBG(("%s: last requests handle=%d still busy\n",
2584		     __FUNCTION__, rq->bo->handle));
2585		return false;
2586	}
2587
2588	DBG(("%s: ring=%d idle (handle=%d)\n",
2589	     __FUNCTION__, ring, rq->bo->handle));
2590
2591	kgem_retire__requests_ring(kgem, ring);
2592
2593	assert(list_is_empty(&kgem->requests[ring]));
2594	return true;
2595}
2596
2597#if 0
2598static void kgem_commit__check_reloc(struct kgem *kgem)
2599{
2600	struct kgem_request *rq = kgem->next_request;
2601	struct kgem_bo *bo;
2602	bool has_64bit = kgem->gen >= 0100;
2603	int i;
2604
2605	for (i = 0; i < kgem->nreloc; i++) {
2606		list_for_each_entry(bo, &rq->buffers, request) {
2607			if (bo->target_handle == kgem->reloc[i].target_handle) {
2608				uint64_t value = 0;
2609				gem_read(kgem->fd, rq->bo->handle, &value, kgem->reloc[i].offset, has_64bit ? 8 : 4);
2610				assert(bo->exec->offset == -1 || value == bo->exec->offset + (int)kgem->reloc[i].delta);
2611				break;
2612			}
2613		}
2614	}
2615}
2616#else
2617#define kgem_commit__check_reloc(kgem)
2618#endif
2619
2620#ifndef NDEBUG
2621static void kgem_commit__check_buffers(struct kgem *kgem)
2622{
2623	struct kgem_buffer *bo;
2624
2625	list_for_each_entry(bo, &kgem->active_buffers, base.list)
2626		assert(bo->base.exec == NULL);
2627}
2628#else
2629#define kgem_commit__check_buffers(kgem)
2630#endif
2631
2632static void kgem_commit(struct kgem *kgem)
2633{
2634	struct kgem_request *rq = kgem->next_request;
2635	struct kgem_bo *bo, *next;
2636
2637	kgem_commit__check_reloc(kgem);
2638
2639	list_for_each_entry_safe(bo, next, &rq->buffers, request) {
2640		assert(next->request.prev == &bo->request);
2641
2642		DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n",
2643		     __FUNCTION__, bo->handle, bo->proxy != NULL,
2644		     bo->gpu_dirty, bo->needs_flush, bo->snoop,
2645		     (unsigned)bo->exec->offset));
2646
2647		assert(bo->exec);
2648		assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
2649		assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
2650
2651		bo->presumed_offset = bo->exec->offset;
2652		bo->exec = NULL;
2653		bo->target_handle = -1;
2654
2655		if (!bo->refcnt && !bo->reusable) {
2656			assert(!bo->snoop);
2657			assert(!bo->proxy);
2658			kgem_bo_free(kgem, bo);
2659			continue;
2660		}
2661
2662		bo->binding.offset = 0;
2663		bo->domain = DOMAIN_GPU;
2664		bo->gpu_dirty = false;
2665
2666		if (bo->proxy) {
2667			/* proxies are not used for domain tracking */
2668			__kgem_bo_clear_busy(bo);
2669		}
2670
2671		kgem->scanout_busy |= bo->scanout && bo->needs_flush;
2672	}
2673
2674	if (rq == &kgem->static_request) {
2675		struct drm_i915_gem_set_domain set_domain;
2676
2677		DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
2678
2679		VG_CLEAR(set_domain);
2680		set_domain.handle = rq->bo->handle;
2681		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
2682		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
2683		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
2684			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
2685			kgem_throttle(kgem);
2686		}
2687
2688		kgem_retire(kgem);
2689		assert(list_is_empty(&rq->buffers));
2690
2691		assert(rq->bo->map__gtt == NULL);
2692		assert(rq->bo->map__cpu == NULL);
2693		gem_close(kgem->fd, rq->bo->handle);
2694		kgem_cleanup_cache(kgem);
2695	} else {
2696		assert(rq->ring < ARRAY_SIZE(kgem->requests));
2697		list_add_tail(&rq->list, &kgem->requests[rq->ring]);
2698		kgem->need_throttle = kgem->need_retire = 1;
2699	}
2700
2701	kgem->next_request = NULL;
2702
2703	kgem_commit__check_buffers(kgem);
2704}
2705
2706static void kgem_close_list(struct kgem *kgem, struct list *head)
2707{
2708	while (!list_is_empty(head))
2709		kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list));
2710}
2711
2712static void kgem_close_inactive(struct kgem *kgem)
2713{
2714	unsigned int i;
2715
2716	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
2717		kgem_close_list(kgem, &kgem->inactive[i]);
2718}
2719
2720static void kgem_finish_buffers(struct kgem *kgem)
2721{
2722	struct kgem_buffer *bo, *next;
2723
2724	list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
2725		DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s, refcnt=%d\n",
2726		     __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
2727		     bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no",
2728		     bo->base.refcnt));
2729
2730		assert(next->base.list.prev == &bo->base.list);
2731		assert(bo->base.io);
2732		assert(bo->base.refcnt >= 1);
2733
2734		if (bo->base.refcnt > 1 && !bo->base.exec) {
2735			DBG(("%s: skipping unattached handle=%d, used=%d, refcnt=%d\n",
2736			     __FUNCTION__, bo->base.handle, bo->used, bo->base.refcnt));
2737			continue;
2738		}
2739
2740		if (!bo->write) {
2741			assert(bo->base.exec || bo->base.refcnt > 1);
2742			goto decouple;
2743		}
2744
2745		if (bo->mmapped) {
2746			uint32_t used;
2747
2748			assert(!bo->need_io);
2749
2750			used = ALIGN(bo->used, PAGE_SIZE);
2751			if (!DBG_NO_UPLOAD_ACTIVE &&
2752			    used + PAGE_SIZE <= bytes(&bo->base) &&
2753			    (kgem->has_llc || bo->mmapped == MMAPPED_GTT || bo->base.snoop)) {
2754				DBG(("%s: retaining upload buffer (%d/%d): used=%d, refcnt=%d\n",
2755				     __FUNCTION__, bo->used, bytes(&bo->base), used, bo->base.refcnt));
2756				bo->used = used;
2757				list_move(&bo->base.list,
2758					  &kgem->active_buffers);
2759				kgem->need_retire = true;
2760				continue;
2761			}
2762			DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
2763			     __FUNCTION__, bo->used, bo->mmapped));
2764			goto decouple;
2765		}
2766
2767		if (!bo->used || !bo->base.exec) {
2768			/* Unless we replace the handle in the execbuffer,
2769			 * then this bo will become active. So decouple it
2770			 * from the buffer list and track it in the normal
2771			 * manner.
2772			 */
2773			goto decouple;
2774		}
2775
2776		assert(bo->need_io);
2777		assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
2778		assert(bo->base.domain != DOMAIN_GPU);
2779
2780		if (bo->base.refcnt == 1 &&
2781		    bo->base.size.pages.count > 1 &&
2782		    bo->used < bytes(&bo->base) / 2) {
2783			struct kgem_bo *shrink;
2784			unsigned alloc = NUM_PAGES(bo->used);
2785
2786			shrink = search_snoop_cache(kgem, alloc,
2787						    CREATE_INACTIVE | CREATE_NO_RETIRE);
2788			if (shrink) {
2789				void *map;
2790				int n;
2791
2792				DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
2793				     __FUNCTION__,
2794				     bo->used, bytes(&bo->base), bytes(shrink),
2795				     bo->base.handle, shrink->handle));
2796
2797				assert(bo->used <= bytes(shrink));
2798				map = kgem_bo_map__cpu(kgem, shrink);
2799				if (map) {
2800					kgem_bo_sync__cpu(kgem, shrink);
2801					memcpy(map, bo->mem, bo->used);
2802
2803					shrink->target_handle =
2804						kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
2805					for (n = 0; n < kgem->nreloc; n++) {
2806						if (kgem->reloc[n].target_handle == bo->base.target_handle) {
2807							kgem->reloc[n].target_handle = shrink->target_handle;
2808							kgem->reloc[n].presumed_offset = shrink->presumed_offset;
2809							kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
2810								kgem->reloc[n].delta + shrink->presumed_offset;
2811						}
2812					}
2813
2814					bo->base.exec->handle = shrink->handle;
2815					bo->base.exec->offset = shrink->presumed_offset;
2816					shrink->exec = bo->base.exec;
2817					shrink->rq = bo->base.rq;
2818					list_replace(&bo->base.request,
2819						     &shrink->request);
2820					list_init(&bo->base.request);
2821					shrink->needs_flush = bo->base.gpu_dirty;
2822
2823					bo->base.exec = NULL;
2824					bo->base.rq = NULL;
2825					bo->base.gpu_dirty = false;
2826					bo->base.needs_flush = false;
2827					bo->used = 0;
2828
2829					goto decouple;
2830				}
2831
2832				__kgem_bo_destroy(kgem, shrink);
2833			}
2834
2835			shrink = search_linear_cache(kgem, alloc,
2836						     CREATE_INACTIVE | CREATE_NO_RETIRE);
2837			if (shrink) {
2838				int n;
2839
2840				DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
2841				     __FUNCTION__,
2842				     bo->used, bytes(&bo->base), bytes(shrink),
2843				     bo->base.handle, shrink->handle));
2844
2845				assert(bo->used <= bytes(shrink));
2846				if (gem_write__cachealigned(kgem->fd, shrink->handle,
2847							    0, bo->used, bo->mem) == 0) {
2848					shrink->target_handle =
2849						kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
2850					for (n = 0; n < kgem->nreloc; n++) {
2851						if (kgem->reloc[n].target_handle == bo->base.target_handle) {
2852							kgem->reloc[n].target_handle = shrink->target_handle;
2853							kgem->reloc[n].presumed_offset = shrink->presumed_offset;
2854							kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
2855								kgem->reloc[n].delta + shrink->presumed_offset;
2856						}
2857					}
2858
2859					bo->base.exec->handle = shrink->handle;
2860					bo->base.exec->offset = shrink->presumed_offset;
2861					shrink->exec = bo->base.exec;
2862					shrink->rq = bo->base.rq;
2863					list_replace(&bo->base.request,
2864						     &shrink->request);
2865					list_init(&bo->base.request);
2866					shrink->needs_flush = bo->base.gpu_dirty;
2867
2868					bo->base.exec = NULL;
2869					bo->base.rq = NULL;
2870					bo->base.gpu_dirty = false;
2871					bo->base.needs_flush = false;
2872					bo->used = 0;
2873
2874					goto decouple;
2875				}
2876
2877				__kgem_bo_destroy(kgem, shrink);
2878			}
2879		}
2880
2881		DBG(("%s: handle=%d, uploading %d/%d\n",
2882		     __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
2883		ASSERT_IDLE(kgem, bo->base.handle);
2884		assert(bo->used <= bytes(&bo->base));
2885		gem_write__cachealigned(kgem->fd, bo->base.handle,
2886					0, bo->used, bo->mem);
2887		bo->need_io = 0;
2888
2889decouple:
2890		DBG(("%s: releasing handle=%d\n",
2891		     __FUNCTION__, bo->base.handle));
2892		list_del(&bo->base.list);
2893		kgem_bo_unref(kgem, &bo->base);
2894	}
2895}
2896
2897static void kgem_cleanup(struct kgem *kgem)
2898{
2899	int n;
2900
2901	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
2902		while (!list_is_empty(&kgem->requests[n])) {
2903			struct kgem_request *rq;
2904
2905			rq = list_first_entry(&kgem->requests[n],
2906					      struct kgem_request,
2907					      list);
2908			assert(rq->ring == n);
2909			while (!list_is_empty(&rq->buffers)) {
2910				struct kgem_bo *bo;
2911
2912				bo = list_first_entry(&rq->buffers,
2913						      struct kgem_bo,
2914						      request);
2915
2916				bo->exec = NULL;
2917				bo->gpu_dirty = false;
2918				__kgem_bo_clear_busy(bo);
2919				if (bo->refcnt == 0)
2920					kgem_bo_free(kgem, bo);
2921			}
2922
2923			__kgem_request_free(rq);
2924		}
2925	}
2926
2927	kgem_close_inactive(kgem);
2928}
2929
2930static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
2931{
2932	int ret;
2933
2934	ASSERT_IDLE(kgem, handle);
2935
2936#if DBG_NO_EXEC
2937	{
2938		uint32_t batch[] = { MI_BATCH_BUFFER_END, 0};
2939		return gem_write(kgem->fd, handle, 0, sizeof(batch), batch);
2940	}
2941#endif
2942
2943
2944retry:
2945	/* If there is no surface data, just upload the batch */
2946	if (kgem->surface == kgem->batch_size) {
2947		if ((ret = gem_write__cachealigned(kgem->fd, handle,
2948						   0, sizeof(uint32_t)*kgem->nbatch,
2949						   kgem->batch)) == 0)
2950			return 0;
2951
2952		goto expire;
2953	}
2954
2955	/* Are the batch pages conjoint with the surface pages? */
2956	if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
2957		assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
2958		if ((ret = gem_write__cachealigned(kgem->fd, handle,
2959						   0, kgem->batch_size*sizeof(uint32_t),
2960						   kgem->batch)) == 0)
2961			return 0;
2962
2963		goto expire;
2964	}
2965
2966	/* Disjoint surface/batch, upload separately */
2967	if ((ret = gem_write__cachealigned(kgem->fd, handle,
2968					   0, sizeof(uint32_t)*kgem->nbatch,
2969					   kgem->batch)))
2970		goto expire;
2971
2972	ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
2973	ret -= sizeof(uint32_t) * kgem->surface;
2974	assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
2975	if (gem_write(kgem->fd, handle,
2976		      size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
2977		      kgem->batch + kgem->surface))
2978		goto expire;
2979
2980	return 0;
2981
2982expire:
2983	assert(ret != EINVAL);
2984
2985	(void)__kgem_throttle_retire(kgem, 0);
2986	if (kgem_expire_cache(kgem))
2987		goto retry;
2988
2989	if (kgem_cleanup_cache(kgem))
2990		goto retry;
2991
2992	ERR(("%s: failed to write batch (handle=%d): %d\n",
2993	     __FUNCTION__, handle, -ret));
2994	return ret;
2995}
2996
2997void kgem_reset(struct kgem *kgem)
2998{
2999	if (kgem->next_request) {
3000		struct kgem_request *rq = kgem->next_request;
3001
3002		while (!list_is_empty(&rq->buffers)) {
3003			struct kgem_bo *bo =
3004				list_first_entry(&rq->buffers,
3005						 struct kgem_bo,
3006						 request);
3007			list_del(&bo->request);
3008
3009			assert(RQ(bo->rq) == rq);
3010
3011			bo->binding.offset = 0;
3012			bo->exec = NULL;
3013			bo->target_handle = -1;
3014			bo->gpu_dirty = false;
3015
3016			if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
3017				assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
3018				list_add(&bo->request, &kgem->flushing);
3019				bo->rq = (void *)kgem;
3020				kgem->need_retire = true;
3021			} else
3022				__kgem_bo_clear_busy(bo);
3023
3024			if (bo->refcnt || bo->rq)
3025				continue;
3026
3027			kgem_bo_move_to_cache(kgem, bo);
3028		}
3029
3030		if (rq != &kgem->static_request) {
3031			list_init(&rq->list);
3032			__kgem_request_free(rq);
3033		}
3034	}
3035
3036	kgem->nfence = 0;
3037	kgem->nexec = 0;
3038	kgem->nreloc = 0;
3039	kgem->nreloc__self = 0;
3040	kgem->aperture = 0;
3041	kgem->aperture_fenced = 0;
3042	kgem->aperture_max_fence = 0;
3043	kgem->nbatch = 0;
3044	kgem->surface = kgem->batch_size;
3045	kgem->mode = KGEM_NONE;
3046	kgem->needs_semaphore = false;
3047	kgem->needs_reservation = false;
3048	kgem->flush = 0;
3049	kgem->batch_flags = kgem->batch_flags_base;
3050
3051	kgem->next_request = __kgem_request_alloc(kgem);
3052
3053	kgem_sna_reset(kgem);
3054}
3055
3056static int compact_batch_surface(struct kgem *kgem)
3057{
3058	int size, shrink, n;
3059
3060	if (!kgem->has_relaxed_delta)
3061		return kgem->batch_size * sizeof(uint32_t);
3062
3063	/* See if we can pack the contents into one or two pages */
3064	n = ALIGN(kgem->batch_size, 1024);
3065	size = n - kgem->surface + kgem->nbatch;
3066	size = ALIGN(size, 1024);
3067
3068	shrink = n - size;
3069	if (shrink) {
3070		DBG(("shrinking from %d to %d\n", kgem->batch_size, size));
3071
3072		shrink *= sizeof(uint32_t);
3073		for (n = 0; n < kgem->nreloc; n++) {
3074			if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
3075			    kgem->reloc[n].target_handle == ~0U)
3076				kgem->reloc[n].delta -= shrink;
3077
3078			if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
3079				kgem->reloc[n].offset -= shrink;
3080		}
3081	}
3082
3083	return size * sizeof(uint32_t);
3084}
3085
3086static struct kgem_bo *
3087kgem_create_batch(struct kgem *kgem, int size)
3088{
3089	struct drm_i915_gem_set_domain set_domain;
3090	struct kgem_bo *bo;
3091
3092	if (size <= 4096) {
3093		bo = list_first_entry(&kgem->pinned_batches[0],
3094				      struct kgem_bo,
3095				      list);
3096		if (!bo->rq) {
3097out_4096:
3098			assert(bo->refcnt > 0);
3099			list_move_tail(&bo->list, &kgem->pinned_batches[0]);
3100			return kgem_bo_reference(bo);
3101		}
3102
3103		if (!__kgem_busy(kgem, bo->handle)) {
3104			__kgem_retire_rq(kgem, RQ(bo->rq));
3105			goto out_4096;
3106		}
3107	}
3108
3109	if (size <= 16384) {
3110		bo = list_first_entry(&kgem->pinned_batches[1],
3111				      struct kgem_bo,
3112				      list);
3113		if (!bo->rq) {
3114out_16384:
3115			assert(bo->refcnt > 0);
3116			list_move_tail(&bo->list, &kgem->pinned_batches[1]);
3117			return kgem_bo_reference(bo);
3118		}
3119
3120		if (!__kgem_busy(kgem, bo->handle)) {
3121			__kgem_retire_rq(kgem, RQ(bo->rq));
3122			goto out_16384;
3123		}
3124	}
3125
3126	if (kgem->gen == 020) {
3127		bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
3128		if (bo)
3129			return bo;
3130
3131		/* Nothing available for reuse, rely on the kernel wa */
3132		if (kgem->has_pinned_batches) {
3133			bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
3134			if (bo) {
3135				kgem->batch_flags &= ~LOCAL_I915_EXEC_IS_PINNED;
3136				return bo;
3137			}
3138		}
3139
3140		if (size < 16384) {
3141			bo = list_first_entry(&kgem->pinned_batches[size > 4096],
3142					      struct kgem_bo,
3143					      list);
3144			list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
3145
3146			DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
3147
3148			VG_CLEAR(set_domain);
3149			set_domain.handle = bo->handle;
3150			set_domain.read_domains = I915_GEM_DOMAIN_GTT;
3151			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
3152			if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
3153				DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
3154				kgem_throttle(kgem);
3155				return NULL;
3156			}
3157
3158			kgem_retire(kgem);
3159			assert(bo->rq == NULL);
3160			return kgem_bo_reference(bo);
3161		}
3162	}
3163
3164	return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
3165}
3166
3167#if !NDEBUG
3168static bool dump_file(const char *path)
3169{
3170	FILE *file;
3171	size_t len = 0;
3172	char *line = NULL;
3173
3174	file = fopen(path, "r");
3175	if (file == NULL)
3176		return false;
3177
3178	while (getline(&line, &len, file) != -1)
3179		ErrorF("%s", line);
3180
3181	free(line);
3182	fclose(file);
3183	return true;
3184}
3185
3186static void dump_debugfs(struct kgem *kgem, const char *name)
3187{
3188	char path[80];
3189	int minor = kgem_get_minor(kgem);
3190
3191	if (minor < 0)
3192		return;
3193
3194	sprintf(path, "/sys/kernel/debug/dri/%d/%s", minor, name);
3195	if (dump_file(path))
3196		return;
3197
3198	sprintf(path, "/debug/dri/%d/%s", minor, name);
3199	if (dump_file(path))
3200		return;
3201}
3202
3203static void dump_gtt_info(struct kgem *kgem)
3204{
3205	dump_debugfs(kgem, "i915_gem_gtt");
3206}
3207
3208static void dump_fence_regs(struct kgem *kgem)
3209{
3210	dump_debugfs(kgem, "i915_gem_fence_regs");
3211}
3212#endif
3213
3214static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf)
3215{
3216	int ret;
3217
3218retry:
3219	ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
3220	if (ret == 0)
3221		return 0;
3222
3223	DBG(("%s: failed ret=%d, throttling and discarding cache\n", __FUNCTION__, ret));
3224	(void)__kgem_throttle_retire(kgem, 0);
3225	if (kgem_expire_cache(kgem))
3226		goto retry;
3227
3228	if (kgem_cleanup_cache(kgem))
3229		goto retry;
3230
3231	/* last gasp */
3232	return do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
3233}
3234
3235void _kgem_submit(struct kgem *kgem)
3236{
3237	struct kgem_request *rq;
3238	uint32_t batch_end;
3239	int size;
3240
3241	assert(!DBG_NO_HW);
3242	assert(!kgem->wedged);
3243
3244	assert(kgem->nbatch);
3245	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
3246	assert(kgem->nbatch <= kgem->surface);
3247
3248	batch_end = kgem_end_batch(kgem);
3249	kgem_sna_flush(kgem);
3250
3251	DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d [fenced=%d]\n",
3252	     kgem->mode, kgem->ring, kgem->batch_flags,
3253	     batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
3254	     kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced));
3255
3256	assert(kgem->nbatch <= kgem->batch_size);
3257	assert(kgem->nbatch <= kgem->surface);
3258	assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
3259	assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
3260	assert(kgem->nfence <= kgem->fence_max);
3261
3262	kgem_finish_buffers(kgem);
3263
3264#if SHOW_BATCH_BEFORE
3265	__kgem_batch_debug(kgem, batch_end);
3266#endif
3267
3268	rq = kgem->next_request;
3269	if (kgem->surface != kgem->batch_size)
3270		size = compact_batch_surface(kgem);
3271	else
3272		size = kgem->nbatch * sizeof(kgem->batch[0]);
3273	rq->bo = kgem_create_batch(kgem, size);
3274	if (rq->bo) {
3275		uint32_t handle = rq->bo->handle;
3276		int i;
3277
3278		assert(!rq->bo->needs_flush);
3279
3280		i = kgem->nexec++;
3281		kgem->exec[i].handle = handle;
3282		kgem->exec[i].relocation_count = kgem->nreloc;
3283		kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
3284		kgem->exec[i].alignment = 0;
3285		kgem->exec[i].offset = rq->bo->presumed_offset;
3286		kgem->exec[i].flags = 0;
3287		kgem->exec[i].rsvd1 = 0;
3288		kgem->exec[i].rsvd2 = 0;
3289
3290		rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
3291		rq->bo->exec = &kgem->exec[i];
3292		rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
3293		list_add(&rq->bo->request, &rq->buffers);
3294		rq->ring = kgem->ring == KGEM_BLT;
3295
3296		kgem_fixup_self_relocs(kgem, rq->bo);
3297
3298		if (kgem_batch_write(kgem, handle, size) == 0) {
3299			struct drm_i915_gem_execbuffer2 execbuf;
3300			int ret;
3301
3302			memset(&execbuf, 0, sizeof(execbuf));
3303			execbuf.buffers_ptr = (uintptr_t)kgem->exec;
3304			execbuf.buffer_count = kgem->nexec;
3305			execbuf.batch_len = batch_end*sizeof(uint32_t);
3306			execbuf.flags = kgem->ring | kgem->batch_flags;
3307
3308			if (DBG_DUMP) {
3309				int fd = open("/tmp/i915-batchbuffers.dump",
3310					      O_WRONLY | O_CREAT | O_APPEND,
3311					      0666);
3312				if (fd != -1) {
3313					ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
3314					fd = close(fd);
3315				}
3316			}
3317
3318			ret = do_execbuf(kgem, &execbuf);
3319			if (DEBUG_SYNC && ret == 0) {
3320				struct drm_i915_gem_set_domain set_domain;
3321
3322				VG_CLEAR(set_domain);
3323				set_domain.handle = handle;
3324				set_domain.read_domains = I915_GEM_DOMAIN_GTT;
3325				set_domain.write_domain = I915_GEM_DOMAIN_GTT;
3326
3327				ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
3328			}
3329			if (ret < 0) {
3330				kgem_throttle(kgem);
3331				if (!kgem->wedged) {
3332					xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
3333						   "Failed to submit rendering commands, disabling acceleration.\n");
3334					kgem->wedged = true;
3335				}
3336
3337#if !NDEBUG
3338				ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n",
3339				       kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
3340				       kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret);
3341
3342				for (i = 0; i < kgem->nexec; i++) {
3343					struct kgem_bo *bo, *found = NULL;
3344
3345					list_for_each_entry(bo, &kgem->next_request->buffers, request) {
3346						if (bo->handle == kgem->exec[i].handle) {
3347							found = bo;
3348							break;
3349						}
3350					}
3351					ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
3352					       i,
3353					       kgem->exec[i].handle,
3354					       (int)kgem->exec[i].offset,
3355					       found ? kgem_bo_size(found) : -1,
3356					       found ? found->tiling : -1,
3357					       (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
3358					       found ? found->snoop : -1,
3359					       found ? found->purged : -1);
3360				}
3361				for (i = 0; i < kgem->nreloc; i++) {
3362					ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
3363					       i,
3364					       (int)kgem->reloc[i].offset,
3365					       kgem->reloc[i].target_handle,
3366					       kgem->reloc[i].delta,
3367					       kgem->reloc[i].read_domains,
3368					       kgem->reloc[i].write_domain,
3369					       (int)kgem->reloc[i].presumed_offset);
3370				}
3371
3372				{
3373					struct drm_i915_gem_get_aperture aperture;
3374					if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0)
3375						ErrorF("Aperture size %lld, available %lld\n",
3376						       (long long)aperture.aper_size,
3377						       (long long)aperture.aper_available_size);
3378				}
3379
3380				if (ret == -ENOSPC)
3381					dump_gtt_info(kgem);
3382				if (ret == -EDEADLK)
3383					dump_fence_regs(kgem);
3384
3385				if (DEBUG_SYNC) {
3386					int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
3387					if (fd != -1) {
3388						int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
3389						assert(ignored == batch_end*sizeof(uint32_t));
3390						close(fd);
3391					}
3392
3393					FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret);
3394				}
3395#endif
3396			}
3397		}
3398	}
3399#if SHOW_BATCH_AFTER
3400	if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t) == 0))
3401		__kgem_batch_debug(kgem, batch_end);
3402#endif
3403	kgem_commit(kgem);
3404	if (kgem->wedged)
3405		kgem_cleanup(kgem);
3406
3407	kgem_reset(kgem);
3408
3409	assert(kgem->next_request != NULL);
3410}
3411
3412static bool find_hang_state(struct kgem *kgem, char *path, int maxlen)
3413{
3414	int minor = kgem_get_minor(kgem);
3415
3416	/* Search for our hang state in a few canonical locations.
3417	 * In the unlikely event of having multiple devices, we
3418	 * will need to check which minor actually corresponds to ours.
3419	 */
3420
3421	snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor);
3422	if (access(path, R_OK) == 0)
3423		return true;
3424
3425	snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor);
3426	if (access(path, R_OK) == 0)
3427		return true;
3428
3429	snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor);
3430	if (access(path, R_OK) == 0)
3431		return true;
3432
3433	path[0] = '\0';
3434	return false;
3435}
3436
3437void kgem_throttle(struct kgem *kgem)
3438{
3439	if (kgem->wedged)
3440		return;
3441
3442	kgem->wedged = __kgem_throttle(kgem, true);
3443	if (kgem->wedged) {
3444		static int once;
3445		char path[128];
3446
3447		xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
3448			   "Detected a hung GPU, disabling acceleration.\n");
3449		if (!once && find_hang_state(kgem, path, sizeof(path))) {
3450			xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
3451				   "When reporting this, please include %s and the full dmesg.\n",
3452				   path);
3453			once = 1;
3454		}
3455
3456		kgem->need_throttle = false;
3457	}
3458}
3459
3460int kgem_is_wedged(struct kgem *kgem)
3461{
3462	return __kgem_throttle(kgem, true);
3463}
3464
3465static void kgem_purge_cache(struct kgem *kgem)
3466{
3467	struct kgem_bo *bo, *next;
3468	int i;
3469
3470	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
3471		list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
3472			if (!kgem_bo_is_retained(kgem, bo)) {
3473				DBG(("%s: purging %d\n",
3474				     __FUNCTION__, bo->handle));
3475				kgem_bo_free(kgem, bo);
3476			}
3477		}
3478	}
3479
3480	kgem->need_purge = false;
3481}
3482
3483void kgem_clean_scanout_cache(struct kgem *kgem)
3484{
3485	while (!list_is_empty(&kgem->scanout)) {
3486		struct kgem_bo *bo;
3487
3488		bo = list_first_entry(&kgem->scanout, struct kgem_bo, list);
3489
3490		assert(bo->scanout);
3491		assert(!bo->refcnt);
3492		assert(!bo->prime);
3493		assert(bo->proxy == NULL);
3494
3495		if (bo->exec || __kgem_busy(kgem, bo->handle))
3496			break;
3497
3498		DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
3499		     __FUNCTION__, bo->handle, bo->delta, bo->reusable));
3500		list_del(&bo->list);
3501
3502		kgem_bo_rmfb(kgem, bo);
3503		bo->scanout = false;
3504
3505		if (!bo->purged) {
3506			bo->reusable = true;
3507			if (kgem->has_llc &&
3508			    !gem_set_caching(kgem->fd, bo->handle, SNOOPED))
3509				bo->reusable = false;
3510
3511		}
3512
3513		__kgem_bo_destroy(kgem, bo);
3514	}
3515}
3516
3517void kgem_clean_large_cache(struct kgem *kgem)
3518{
3519	while (!list_is_empty(&kgem->large_inactive)) {
3520		kgem_bo_free(kgem,
3521			     list_first_entry(&kgem->large_inactive,
3522					      struct kgem_bo, list));
3523
3524	}
3525}
3526
3527bool kgem_expire_cache(struct kgem *kgem)
3528{
3529	time_t now, expire;
3530	struct kgem_bo *bo;
3531	unsigned int size = 0, count = 0;
3532	bool idle;
3533	unsigned int i;
3534
3535	time(&now);
3536
3537	while (__kgem_freed_bo) {
3538		bo = __kgem_freed_bo;
3539		__kgem_freed_bo = *(struct kgem_bo **)bo;
3540		free(bo);
3541	}
3542
3543	while (__kgem_freed_request) {
3544		struct kgem_request *rq = __kgem_freed_request;
3545		__kgem_freed_request = *(struct kgem_request **)rq;
3546		free(rq);
3547	}
3548
3549	kgem_clean_large_cache(kgem);
3550	if (container_of(kgem, struct sna, kgem)->scrn->vtSema)
3551		kgem_clean_scanout_cache(kgem);
3552
3553	expire = 0;
3554	list_for_each_entry(bo, &kgem->snoop, list) {
3555		if (bo->delta) {
3556			expire = now - MAX_INACTIVE_TIME/2;
3557			break;
3558		}
3559
3560		bo->delta = now;
3561	}
3562	if (expire) {
3563		while (!list_is_empty(&kgem->snoop)) {
3564			bo = list_last_entry(&kgem->snoop, struct kgem_bo, list);
3565
3566			if (bo->delta > expire)
3567				break;
3568
3569			kgem_bo_free(kgem, bo);
3570		}
3571	}
3572#ifdef DEBUG_MEMORY
3573	{
3574		long snoop_size = 0;
3575		int snoop_count = 0;
3576		list_for_each_entry(bo, &kgem->snoop, list)
3577			snoop_count++, snoop_size += bytes(bo);
3578		DBG(("%s: still allocated %d bo, %ld bytes, in snoop cache\n",
3579		     __FUNCTION__, snoop_count, snoop_size));
3580	}
3581#endif
3582
3583	kgem_retire(kgem);
3584	if (kgem->wedged)
3585		kgem_cleanup(kgem);
3586
3587	kgem->expire(kgem);
3588
3589	if (kgem->need_purge)
3590		kgem_purge_cache(kgem);
3591
3592	if (kgem->need_retire)
3593		kgem_retire(kgem);
3594
3595	expire = 0;
3596	idle = true;
3597	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
3598		idle &= list_is_empty(&kgem->inactive[i]);
3599		list_for_each_entry(bo, &kgem->inactive[i], list) {
3600			if (bo->delta) {
3601				expire = now - MAX_INACTIVE_TIME;
3602				break;
3603			}
3604
3605			bo->delta = now;
3606		}
3607	}
3608	if (expire == 0) {
3609		DBG(("%s: idle? %d\n", __FUNCTION__, idle));
3610		kgem->need_expire = !idle;
3611		return false;
3612	}
3613
3614	idle = true;
3615	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
3616		struct list preserve;
3617
3618		list_init(&preserve);
3619		while (!list_is_empty(&kgem->inactive[i])) {
3620			bo = list_last_entry(&kgem->inactive[i],
3621					     struct kgem_bo, list);
3622
3623			if (bo->delta > expire) {
3624				idle = false;
3625				break;
3626			}
3627
3628			if (bo->map__cpu && bo->delta + MAP_PRESERVE_TIME > expire) {
3629				idle = false;
3630				list_move_tail(&bo->list, &preserve);
3631			} else {
3632				count++;
3633				size += bytes(bo);
3634				kgem_bo_free(kgem, bo);
3635				DBG(("%s: expiring %d\n",
3636				     __FUNCTION__, bo->handle));
3637			}
3638		}
3639		if (!list_is_empty(&preserve)) {
3640			preserve.prev->next = kgem->inactive[i].next;
3641			kgem->inactive[i].next->prev = preserve.prev;
3642			kgem->inactive[i].next = preserve.next;
3643			preserve.next->prev = &kgem->inactive[i];
3644		}
3645	}
3646
3647#ifdef DEBUG_MEMORY
3648	{
3649		long inactive_size = 0;
3650		int inactive_count = 0;
3651		for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
3652			list_for_each_entry(bo, &kgem->inactive[i], list)
3653				inactive_count++, inactive_size += bytes(bo);
3654		DBG(("%s: still allocated %d bo, %ld bytes, in inactive cache\n",
3655		     __FUNCTION__, inactive_count, inactive_size));
3656	}
3657#endif
3658
3659	DBG(("%s: expired %d objects, %d bytes, idle? %d\n",
3660	     __FUNCTION__, count, size, idle));
3661
3662	kgem->need_expire = !idle;
3663	return count;
3664	(void)count;
3665	(void)size;
3666}
3667
3668bool kgem_cleanup_cache(struct kgem *kgem)
3669{
3670	unsigned int i;
3671	int n;
3672
3673	/* sync to the most recent request */
3674	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
3675		if (!list_is_empty(&kgem->requests[n])) {
3676			struct kgem_request *rq;
3677			struct drm_i915_gem_set_domain set_domain;
3678
3679			rq = list_first_entry(&kgem->requests[n],
3680					      struct kgem_request,
3681					      list);
3682
3683			DBG(("%s: sync on cleanup\n", __FUNCTION__));
3684
3685			VG_CLEAR(set_domain);
3686			set_domain.handle = rq->bo->handle;
3687			set_domain.read_domains = I915_GEM_DOMAIN_GTT;
3688			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
3689			(void)do_ioctl(kgem->fd,
3690				       DRM_IOCTL_I915_GEM_SET_DOMAIN,
3691				       &set_domain);
3692		}
3693	}
3694
3695	kgem_retire(kgem);
3696	kgem_cleanup(kgem);
3697
3698	if (!kgem->need_expire)
3699		return false;
3700
3701	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
3702		while (!list_is_empty(&kgem->inactive[i]))
3703			kgem_bo_free(kgem,
3704				     list_last_entry(&kgem->inactive[i],
3705						     struct kgem_bo, list));
3706	}
3707
3708	kgem_clean_large_cache(kgem);
3709	kgem_clean_scanout_cache(kgem);
3710
3711	while (!list_is_empty(&kgem->snoop))
3712		kgem_bo_free(kgem,
3713			     list_last_entry(&kgem->snoop,
3714					     struct kgem_bo, list));
3715
3716	while (__kgem_freed_bo) {
3717		struct kgem_bo *bo = __kgem_freed_bo;
3718		__kgem_freed_bo = *(struct kgem_bo **)bo;
3719		free(bo);
3720	}
3721
3722	kgem->need_purge = false;
3723	kgem->need_expire = false;
3724	return true;
3725}
3726
3727static struct kgem_bo *
3728search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
3729{
3730	struct kgem_bo *bo, *first = NULL;
3731	bool use_active = (flags & CREATE_INACTIVE) == 0;
3732	struct list *cache;
3733
3734	DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n",
3735	     __FUNCTION__, num_pages, flags, use_active,
3736	     num_pages >= MAX_CACHE_SIZE / PAGE_SIZE,
3737	     MAX_CACHE_SIZE / PAGE_SIZE));
3738
3739	assert(num_pages);
3740
3741	if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) {
3742		DBG(("%s: searching large buffers\n", __FUNCTION__));
3743retry_large:
3744		cache = use_active ? &kgem->large : &kgem->large_inactive;
3745		list_for_each_entry_safe(bo, first, cache, list) {
3746			assert(bo->refcnt == 0);
3747			assert(bo->reusable);
3748			assert(!bo->scanout);
3749
3750			if (num_pages > num_pages(bo))
3751				goto discard;
3752
3753			if (bo->tiling != I915_TILING_NONE) {
3754				if (use_active)
3755					goto discard;
3756
3757				if (!gem_set_tiling(kgem->fd, bo->handle,
3758						    I915_TILING_NONE, 0))
3759					goto discard;
3760
3761				bo->tiling = I915_TILING_NONE;
3762				bo->pitch = 0;
3763			}
3764
3765			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo))
3766				goto discard;
3767
3768			list_del(&bo->list);
3769			if (RQ(bo->rq) == (void *)kgem) {
3770				assert(bo->exec == NULL);
3771				list_del(&bo->request);
3772			}
3773
3774			bo->delta = 0;
3775			assert_tiling(kgem, bo);
3776			return bo;
3777
3778discard:
3779			if (!use_active)
3780				kgem_bo_free(kgem, bo);
3781		}
3782
3783		if (use_active) {
3784			use_active = false;
3785			goto retry_large;
3786		}
3787
3788		if (__kgem_throttle_retire(kgem, flags))
3789			goto retry_large;
3790
3791		return NULL;
3792	}
3793
3794	if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
3795		DBG(("%s: inactive and cache bucket empty\n",
3796		     __FUNCTION__));
3797
3798		if (flags & CREATE_NO_RETIRE) {
3799			DBG(("%s: can not retire\n", __FUNCTION__));
3800			return NULL;
3801		}
3802
3803		if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) {
3804			DBG(("%s: active cache bucket empty\n", __FUNCTION__));
3805			return NULL;
3806		}
3807
3808		if (!__kgem_throttle_retire(kgem, flags)) {
3809			DBG(("%s: nothing retired\n", __FUNCTION__));
3810			return NULL;
3811		}
3812
3813		if (list_is_empty(inactive(kgem, num_pages))) {
3814			DBG(("%s: active cache bucket still empty after retire\n",
3815			     __FUNCTION__));
3816			return NULL;
3817		}
3818	}
3819
3820	if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
3821		int for_cpu = !!(flags & CREATE_CPU_MAP);
3822		DBG(("%s: searching for inactive %s map\n",
3823		     __FUNCTION__, for_cpu ? "cpu" : "gtt"));
3824		cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
3825		list_for_each_entry(bo, cache, vma) {
3826			assert(for_cpu ? bo->map__cpu : bo->map__gtt);
3827			assert(bucket(bo) == cache_bucket(num_pages));
3828			assert(bo->proxy == NULL);
3829			assert(bo->rq == NULL);
3830			assert(bo->exec == NULL);
3831			assert(!bo->scanout);
3832
3833			if (num_pages > num_pages(bo)) {
3834				DBG(("inactive too small: %d < %d\n",
3835				     num_pages(bo), num_pages));
3836				continue;
3837			}
3838
3839			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
3840				kgem_bo_free(kgem, bo);
3841				break;
3842			}
3843
3844			if (I915_TILING_NONE != bo->tiling &&
3845			    !gem_set_tiling(kgem->fd, bo->handle,
3846					    I915_TILING_NONE, 0))
3847				continue;
3848
3849			kgem_bo_remove_from_inactive(kgem, bo);
3850			assert(list_is_empty(&bo->vma));
3851			assert(list_is_empty(&bo->list));
3852
3853			bo->tiling = I915_TILING_NONE;
3854			bo->pitch = 0;
3855			bo->delta = 0;
3856			DBG(("  %s: found handle=%d (num_pages=%d) in linear vma cache\n",
3857			     __FUNCTION__, bo->handle, num_pages(bo)));
3858			assert(use_active || bo->domain != DOMAIN_GPU);
3859			assert(!bo->needs_flush);
3860			assert_tiling(kgem, bo);
3861			ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
3862			return bo;
3863		}
3864
3865		if (flags & CREATE_EXACT)
3866			return NULL;
3867
3868		if (flags & CREATE_CPU_MAP && !kgem->has_llc)
3869			return NULL;
3870	}
3871
3872	cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
3873	list_for_each_entry(bo, cache, list) {
3874		assert(bo->refcnt == 0);
3875		assert(bo->reusable);
3876		assert(!!bo->rq == !!use_active);
3877		assert(bo->proxy == NULL);
3878		assert(!bo->scanout);
3879
3880		if (num_pages > num_pages(bo))
3881			continue;
3882
3883		if (use_active &&
3884		    kgem->gen <= 040 &&
3885		    bo->tiling != I915_TILING_NONE)
3886			continue;
3887
3888		if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
3889			kgem_bo_free(kgem, bo);
3890			break;
3891		}
3892
3893		if (I915_TILING_NONE != bo->tiling) {
3894			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP))
3895				continue;
3896
3897			if (first)
3898				continue;
3899
3900			if (!gem_set_tiling(kgem->fd, bo->handle,
3901					    I915_TILING_NONE, 0))
3902				continue;
3903
3904			bo->tiling = I915_TILING_NONE;
3905			bo->pitch = 0;
3906		}
3907
3908		if (bo->map__gtt || bo->map__cpu) {
3909			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
3910				int for_cpu = !!(flags & CREATE_CPU_MAP);
3911				if (for_cpu ? bo->map__cpu : bo->map__gtt){
3912					if (first != NULL)
3913						break;
3914
3915					first = bo;
3916					continue;
3917				}
3918			} else {
3919				if (first != NULL)
3920					break;
3921
3922				first = bo;
3923				continue;
3924			}
3925		} else {
3926			if (flags & CREATE_GTT_MAP && !kgem_bo_can_map(kgem, bo))
3927				continue;
3928
3929			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
3930				if (first != NULL)
3931					break;
3932
3933				first = bo;
3934				continue;
3935			}
3936		}
3937
3938		if (use_active)
3939			kgem_bo_remove_from_active(kgem, bo);
3940		else
3941			kgem_bo_remove_from_inactive(kgem, bo);
3942
3943		assert(bo->tiling == I915_TILING_NONE);
3944		bo->pitch = 0;
3945		bo->delta = 0;
3946		DBG(("  %s: found handle=%d (num_pages=%d) in linear %s cache\n",
3947		     __FUNCTION__, bo->handle, num_pages(bo),
3948		     use_active ? "active" : "inactive"));
3949		assert(list_is_empty(&bo->list));
3950		assert(list_is_empty(&bo->vma));
3951		assert(use_active || bo->domain != DOMAIN_GPU);
3952		assert(!bo->needs_flush || use_active);
3953		assert_tiling(kgem, bo);
3954		ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
3955		return bo;
3956	}
3957
3958	if (first) {
3959		assert(first->tiling == I915_TILING_NONE);
3960
3961		if (use_active)
3962			kgem_bo_remove_from_active(kgem, first);
3963		else
3964			kgem_bo_remove_from_inactive(kgem, first);
3965
3966		first->pitch = 0;
3967		first->delta = 0;
3968		DBG(("  %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
3969		     __FUNCTION__, first->handle, num_pages(first),
3970		     use_active ? "active" : "inactive"));
3971		assert(list_is_empty(&first->list));
3972		assert(list_is_empty(&first->vma));
3973		assert(use_active || first->domain != DOMAIN_GPU);
3974		assert(!first->needs_flush || use_active);
3975		ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
3976		return first;
3977	}
3978
3979	return NULL;
3980}
3981
3982struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name)
3983{
3984	struct drm_gem_open open_arg;
3985	struct drm_i915_gem_get_tiling tiling;
3986	struct kgem_bo *bo;
3987
3988	DBG(("%s(name=%d)\n", __FUNCTION__, name));
3989
3990	VG_CLEAR(open_arg);
3991	open_arg.name = name;
3992	if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_OPEN, &open_arg))
3993		return NULL;
3994
3995	DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle));
3996
3997	VG_CLEAR(tiling);
3998	tiling.handle = open_arg.handle;
3999	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) {
4000		DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno));
4001		gem_close(kgem->fd, open_arg.handle);
4002		return NULL;
4003	}
4004
4005	DBG(("%s: handle=%d, tiling=%d\n", __FUNCTION__, tiling.handle, tiling.tiling_mode));
4006
4007	bo = __kgem_bo_alloc(open_arg.handle, open_arg.size / PAGE_SIZE);
4008	if (bo == NULL) {
4009		gem_close(kgem->fd, open_arg.handle);
4010		return NULL;
4011	}
4012
4013	bo->unique_id = kgem_get_unique_id(kgem);
4014	bo->tiling = tiling.tiling_mode;
4015	bo->reusable = false;
4016	bo->prime = true;
4017	bo->purged = true; /* no coherency guarantees */
4018
4019	debug_alloc__bo(kgem, bo);
4020	return bo;
4021}
4022
4023struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size)
4024{
4025#ifdef DRM_IOCTL_PRIME_FD_TO_HANDLE
4026	struct drm_prime_handle args;
4027	struct drm_i915_gem_get_tiling tiling;
4028	struct local_i915_gem_caching caching;
4029	struct kgem_bo *bo;
4030	off_t seek;
4031
4032	DBG(("%s(name=%d)\n", __FUNCTION__, name));
4033
4034	VG_CLEAR(args);
4035	args.fd = name;
4036	args.flags = 0;
4037	if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args)) {
4038		DBG(("%s(name=%d) fd-to-handle failed, ret=%d\n", __FUNCTION__, name, errno));
4039		return NULL;
4040	}
4041
4042	VG_CLEAR(tiling);
4043	tiling.handle = args.handle;
4044	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) {
4045		DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno));
4046		gem_close(kgem->fd, args.handle);
4047		return NULL;
4048	}
4049
4050	/* Query actual size, overriding specified if available */
4051	seek = lseek(args.fd, 0, SEEK_END);
4052	DBG(("%s: estimated size=%ld, actual=%lld\n",
4053	     __FUNCTION__, (long)size, (long long)seek));
4054	if (seek != -1) {
4055		if (size > seek) {
4056			DBG(("%s(name=%d) estimated required size [%d] is larger than actual [%ld]\n", __FUNCTION__, name, size, (long)seek));
4057			gem_close(kgem->fd, args.handle);
4058			return NULL;
4059		}
4060		size = seek;
4061	}
4062
4063	DBG(("%s: new handle=%d, tiling=%d\n", __FUNCTION__,
4064	     args.handle, tiling.tiling_mode));
4065	bo = __kgem_bo_alloc(args.handle, NUM_PAGES(size));
4066	if (bo == NULL) {
4067		gem_close(kgem->fd, args.handle);
4068		return NULL;
4069	}
4070
4071	bo->unique_id = kgem_get_unique_id(kgem);
4072	bo->tiling = tiling.tiling_mode;
4073	bo->reusable = false;
4074	bo->prime = true;
4075	bo->domain = DOMAIN_NONE;
4076
4077	/* is this a special bo (e.g. scanout or CPU coherent)? */
4078
4079	VG_CLEAR(caching);
4080	caching.handle = args.handle;
4081	caching.caching = kgem->has_llc;
4082	(void)drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &caching);
4083	DBG(("%s: imported handle=%d has caching %d\n", __FUNCTION__, args.handle, caching.caching));
4084	switch (caching.caching) {
4085	case 0:
4086		if (kgem->has_llc) {
4087			DBG(("%s: interpreting handle=%d as a foreign scanout\n",
4088			     __FUNCTION__, args.handle));
4089			bo->scanout = true;
4090		}
4091		break;
4092	case 1:
4093		if (!kgem->has_llc) {
4094			DBG(("%s: interpreting handle=%d as a foreign snooped buffer\n",
4095			     __FUNCTION__, args.handle));
4096			bo->snoop = true;
4097			if (bo->tiling) {
4098				DBG(("%s: illegal snooped tiled buffer\n", __FUNCTION__));
4099				kgem_bo_free(kgem, bo);
4100				return NULL;
4101			}
4102		}
4103		break;
4104	case 2:
4105		DBG(("%s: interpreting handle=%d as a foreign scanout\n",
4106		     __FUNCTION__, args.handle));
4107		bo->scanout = true;
4108		break;
4109	}
4110
4111	debug_alloc__bo(kgem, bo);
4112	return bo;
4113#else
4114	return NULL;
4115#endif
4116}
4117
4118int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo)
4119{
4120#if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC)
4121	struct drm_prime_handle args;
4122
4123	VG_CLEAR(args);
4124	args.handle = bo->handle;
4125	args.flags = O_CLOEXEC;
4126
4127	if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args))
4128		return -1;
4129
4130	bo->reusable = false;
4131	return args.fd;
4132#else
4133	return -1;
4134#endif
4135}
4136
4137struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
4138{
4139	struct kgem_bo *bo;
4140	uint32_t handle;
4141
4142	DBG(("%s(%d)\n", __FUNCTION__, size));
4143	assert(size);
4144
4145	if (flags & CREATE_GTT_MAP && kgem->has_llc) {
4146		flags &= ~CREATE_GTT_MAP;
4147		flags |= CREATE_CPU_MAP;
4148	}
4149
4150	size = NUM_PAGES(size);
4151	bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
4152	if (bo) {
4153		assert(bo->domain != DOMAIN_GPU);
4154		ASSERT_IDLE(kgem, bo->handle);
4155		bo->refcnt = 1;
4156		return bo;
4157	}
4158
4159	if (flags & CREATE_CACHED)
4160		return NULL;
4161
4162	handle = gem_create(kgem->fd, size);
4163	if (handle == 0)
4164		return NULL;
4165
4166	DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size));
4167	bo = __kgem_bo_alloc(handle, size);
4168	if (bo == NULL) {
4169		gem_close(kgem->fd, handle);
4170		return NULL;
4171	}
4172
4173	debug_alloc__bo(kgem, bo);
4174	return bo;
4175}
4176
4177int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp)
4178{
4179	if (DBG_NO_TILING)
4180		return tiling < 0 ? tiling : I915_TILING_NONE;
4181
4182	if (kgem->gen < 040) {
4183		if (tiling && width * bpp > 8192 * 8) {
4184			DBG(("%s: pitch too large for tliing [%d]\n",
4185			     __FUNCTION__, width*bpp/8));
4186			tiling = I915_TILING_NONE;
4187			goto done;
4188		}
4189	} else {
4190		if (width*bpp > (MAXSHORT-512) * 8) {
4191			if (tiling > 0)
4192				tiling = -tiling;
4193			else if (tiling == 0)
4194				tiling = -I915_TILING_X;
4195			DBG(("%s: large pitch [%d], forcing TILING [%d]\n",
4196			     __FUNCTION__, width*bpp/8, tiling));
4197		} else if (tiling && (width|height) > 8192) {
4198			DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n",
4199			     __FUNCTION__, width, height));
4200			tiling = -I915_TILING_X;
4201		}
4202
4203		/* fences limited to 128k (256k on ivb) */
4204		assert(width * bpp <= 128 * 1024 * 8);
4205	}
4206
4207	if (tiling < 0)
4208		return tiling;
4209
4210	if (tiling == I915_TILING_Y && !kgem->can_render_y)
4211		tiling = I915_TILING_X;
4212
4213	if (tiling && (height == 1 || width == 1)) {
4214		DBG(("%s: disabling tiling [%dx%d] for single row/col\n",
4215		     __FUNCTION__,width, height));
4216		tiling = I915_TILING_NONE;
4217		goto done;
4218	}
4219	if (tiling == I915_TILING_Y && height <= 16) {
4220		DBG(("%s: too short [%d] for TILING_Y\n",
4221		     __FUNCTION__,height));
4222		tiling = I915_TILING_X;
4223	}
4224	if (tiling && width * bpp > 8 * (4096 - 64)) {
4225		DBG(("%s: TLB miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
4226		     __FUNCTION__,
4227		     width, height, width*bpp/8,
4228		     tiling));
4229		return -tiling;
4230	}
4231	if (tiling == I915_TILING_X && height < 4) {
4232		DBG(("%s: too short [%d] for TILING_X\n",
4233		     __FUNCTION__, height));
4234		tiling = I915_TILING_NONE;
4235		goto done;
4236	}
4237
4238	if (tiling == I915_TILING_X && width * bpp <= 8*512) {
4239		DBG(("%s: too thin [width %d, %d bpp] for TILING_X\n",
4240		     __FUNCTION__, width, bpp));
4241		tiling = I915_TILING_NONE;
4242		goto done;
4243	}
4244	if (tiling == I915_TILING_Y && width * bpp < 8*128) {
4245		DBG(("%s: too thin [%d] for TILING_Y\n",
4246		     __FUNCTION__, width));
4247		tiling = I915_TILING_NONE;
4248		goto done;
4249	}
4250
4251	if (tiling && ALIGN(height, 2) * ALIGN(width*bpp, 8*64) <= 4096 * 8) {
4252		DBG(("%s: too small [%d bytes] for TILING_%c\n", __FUNCTION__,
4253		     ALIGN(height, 2) * ALIGN(width*bpp, 8*64) / 8,
4254		     tiling == I915_TILING_X ? 'X' : 'Y'));
4255		tiling = I915_TILING_NONE;
4256		goto done;
4257	}
4258
4259	if (tiling && width * bpp >= 8 * 4096 / 2) {
4260		DBG(("%s: TLB near-miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
4261		     __FUNCTION__,
4262		     width, height, width*bpp/8,
4263		     tiling));
4264		return -tiling;
4265	}
4266
4267done:
4268	DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling));
4269	return tiling;
4270}
4271
4272static int bits_per_pixel(int depth)
4273{
4274	switch (depth) {
4275	case 8: return 8;
4276	case 15:
4277	case 16: return 16;
4278	case 24:
4279	case 30:
4280	case 32: return 32;
4281	default: return 0;
4282	}
4283}
4284
4285unsigned kgem_can_create_2d(struct kgem *kgem,
4286			    int width, int height, int depth)
4287{
4288	uint32_t pitch, size;
4289	unsigned flags = 0;
4290	int tiling;
4291	int bpp;
4292
4293	DBG(("%s: %dx%d @ %d\n", __FUNCTION__, width, height, depth));
4294
4295	bpp = bits_per_pixel(depth);
4296	if (bpp == 0) {
4297		DBG(("%s: unhandled depth %d\n", __FUNCTION__, depth));
4298		return 0;
4299	}
4300
4301	if (width > MAXSHORT || height > MAXSHORT) {
4302		DBG(("%s: unhandled size %dx%d\n",
4303		     __FUNCTION__, width, height));
4304		return 0;
4305	}
4306
4307	size = kgem_surface_size(kgem, false, 0,
4308				 width, height, bpp,
4309				 I915_TILING_NONE, &pitch);
4310	DBG(("%s: untiled size=%d\n", __FUNCTION__, size));
4311	if (size > 0) {
4312		if (size <= kgem->max_cpu_size)
4313			flags |= KGEM_CAN_CREATE_CPU;
4314		if (size > 4096 && size <= kgem->max_gpu_size)
4315			flags |= KGEM_CAN_CREATE_GPU;
4316		if (size <= PAGE_SIZE*kgem->aperture_mappable/4)
4317			flags |= KGEM_CAN_CREATE_GTT;
4318		if (size > kgem->large_object_size)
4319			flags |= KGEM_CAN_CREATE_LARGE;
4320		if (size > kgem->max_object_size) {
4321			DBG(("%s: too large (untiled) %d > %d\n",
4322			     __FUNCTION__, size, kgem->max_object_size));
4323			return 0;
4324		}
4325	}
4326
4327	tiling = kgem_choose_tiling(kgem, I915_TILING_X,
4328				    width, height, bpp);
4329	if (tiling != I915_TILING_NONE) {
4330		size = kgem_surface_size(kgem, false, 0,
4331					 width, height, bpp, tiling,
4332					 &pitch);
4333		DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size));
4334		if (size > 0 && size <= kgem->max_gpu_size)
4335			flags |= KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED;
4336		if (size > 0 && size <= PAGE_SIZE*kgem->aperture_mappable/4)
4337			flags |= KGEM_CAN_CREATE_GTT;
4338		if (size > PAGE_SIZE*kgem->aperture_mappable/4)
4339			flags &= ~KGEM_CAN_CREATE_GTT;
4340		if (size > kgem->large_object_size)
4341			flags |= KGEM_CAN_CREATE_LARGE;
4342		if (size > kgem->max_object_size) {
4343			DBG(("%s: too large (tiled) %d > %d\n",
4344			     __FUNCTION__, size, kgem->max_object_size));
4345			return 0;
4346		}
4347		if (kgem->gen < 040) {
4348			int fence_size = 1024 * 1024;
4349			while (fence_size < size)
4350				fence_size <<= 1;
4351			if (fence_size > kgem->max_gpu_size)
4352				flags &= ~KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED;
4353			if (fence_size > PAGE_SIZE*kgem->aperture_fenceable/4)
4354				flags &= ~KGEM_CAN_CREATE_GTT;
4355		}
4356	}
4357
4358	return flags;
4359}
4360
4361inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
4362{
4363	unsigned int size;
4364
4365	assert(bo->tiling);
4366	assert_tiling(kgem, bo);
4367	assert(kgem->gen < 040);
4368
4369	if (kgem->gen < 030)
4370		size = 512 * 1024 / PAGE_SIZE;
4371	else
4372		size = 1024 * 1024 / PAGE_SIZE;
4373	while (size < num_pages(bo))
4374		size <<= 1;
4375
4376	return size;
4377}
4378
4379static struct kgem_bo *
4380__kgem_bo_create_as_display(struct kgem *kgem, int size, int tiling, int pitch)
4381{
4382	struct local_i915_gem_create2 args;
4383	struct kgem_bo *bo;
4384
4385	if (!kgem->has_create2)
4386		return NULL;
4387
4388	memset(&args, 0, sizeof(args));
4389	args.size = size * PAGE_SIZE;
4390	args.placement = LOCAL_I915_CREATE_PLACEMENT_STOLEN;
4391	args.caching = DISPLAY;
4392	args.tiling_mode = tiling;
4393	args.stride = pitch;
4394
4395	if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args)) {
4396		args.placement = LOCAL_I915_CREATE_PLACEMENT_SYSTEM;
4397		if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args))
4398			return NULL;
4399	}
4400
4401	bo = __kgem_bo_alloc(args.handle, size);
4402	if (bo == NULL) {
4403		gem_close(kgem->fd, args.handle);
4404		return NULL;
4405	}
4406
4407	bo->unique_id = kgem_get_unique_id(kgem);
4408	bo->tiling = tiling;
4409	bo->pitch = pitch;
4410	if (args.placement == LOCAL_I915_CREATE_PLACEMENT_STOLEN) {
4411		bo->purged = true; /* for asserts against CPU access */
4412	}
4413	bo->reusable = false; /* so that unclaimed scanouts are freed */
4414	bo->domain = DOMAIN_NONE;
4415
4416	if (__kgem_busy(kgem, bo->handle)) {
4417		assert(bo->exec == NULL);
4418		list_add(&bo->request, &kgem->flushing);
4419		bo->rq = (void *)kgem;
4420		kgem->need_retire = true;
4421	}
4422
4423	assert_tiling(kgem, bo);
4424	debug_alloc__bo(kgem, bo);
4425
4426	return bo;
4427}
4428
4429static void __kgem_bo_make_scanout(struct kgem *kgem,
4430				   struct kgem_bo *bo,
4431				   int width, int height)
4432{
4433	ScrnInfoPtr scrn =
4434		container_of(kgem, struct sna, kgem)->scrn;
4435	struct drm_mode_fb_cmd arg;
4436
4437	assert(bo->proxy == NULL);
4438
4439	if (!scrn->vtSema)
4440		return;
4441
4442	DBG(("%s: create fb %dx%d@%d/%d\n",
4443	     __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel));
4444
4445	VG_CLEAR(arg);
4446	arg.width = width;
4447	arg.height = height;
4448	arg.pitch = bo->pitch;
4449	arg.bpp = scrn->bitsPerPixel;
4450	arg.depth = scrn->depth;
4451	arg.handle = bo->handle;
4452
4453	/* First move the scanout out of cached memory */
4454	if (kgem->has_llc) {
4455		if (!gem_set_caching(kgem->fd, bo->handle, DISPLAY) &&
4456		    !gem_set_caching(kgem->fd, bo->handle, UNCACHED))
4457			return;
4458	}
4459
4460	bo->scanout = true;
4461
4462	/* Then pre-emptively move the object into the mappable
4463	 * portion to avoid rebinding later when busy.
4464	 */
4465	if (bo->map__gtt == NULL)
4466		bo->map__gtt = __kgem_bo_map__gtt(kgem, bo);
4467	if (bo->map__gtt) {
4468		*(uint32_t *)bo->map__gtt = 0;
4469		bo->domain = DOMAIN_GTT;
4470	}
4471
4472	if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0) {
4473		DBG(("%s: attached fb=%d to handle=%d\n",
4474		     __FUNCTION__, arg.fb_id, arg.handle));
4475		bo->delta = arg.fb_id;
4476	}
4477}
4478
4479struct kgem_bo *kgem_create_2d(struct kgem *kgem,
4480			       int width,
4481			       int height,
4482			       int bpp,
4483			       int tiling,
4484			       uint32_t flags)
4485{
4486	struct list *cache;
4487	struct kgem_bo *bo;
4488	uint32_t pitch, tiled_height, size;
4489	uint32_t handle;
4490	int i, bucket, retry;
4491	bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT);
4492
4493	if (tiling < 0)
4494		exact = true, tiling = -tiling;
4495
4496	DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
4497	     width, height, bpp, tiling, exact,
4498	     !!(flags & CREATE_INACTIVE),
4499	     !!(flags & CREATE_CPU_MAP),
4500	     !!(flags & CREATE_GTT_MAP),
4501	     !!(flags & CREATE_SCANOUT),
4502	     !!(flags & CREATE_PRIME),
4503	     !!(flags & CREATE_TEMPORARY)));
4504
4505	size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
4506				 width, height, bpp, tiling, &pitch);
4507	if (size == 0) {
4508		DBG(("%s: invalid surface size (too large?)\n", __FUNCTION__));
4509		return NULL;
4510	}
4511
4512	size /= PAGE_SIZE;
4513	bucket = cache_bucket(size);
4514
4515	if (flags & CREATE_SCANOUT) {
4516		struct kgem_bo *last = NULL;
4517
4518		list_for_each_entry_reverse(bo, &kgem->scanout, list) {
4519			assert(bo->scanout);
4520			assert(!bo->flush);
4521			assert(!bo->refcnt);
4522			assert_tiling(kgem, bo);
4523
4524			if (size > num_pages(bo) || num_pages(bo) > 2*size)
4525				continue;
4526
4527			if (bo->tiling != tiling || bo->pitch != pitch)
4528				/* No tiling/pitch without recreating fb */
4529				continue;
4530
4531			if (bo->delta && !check_scanout_size(kgem, bo, width, height))
4532				continue;
4533
4534			if (flags & CREATE_INACTIVE && bo->rq) {
4535				last = bo;
4536				continue;
4537			}
4538
4539			list_del(&bo->list);
4540
4541			bo->unique_id = kgem_get_unique_id(kgem);
4542			DBG(("  1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4543			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4544			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4545			assert_tiling(kgem, bo);
4546			bo->refcnt = 1;
4547			return bo;
4548		}
4549
4550		if (last) {
4551			list_del(&last->list);
4552
4553			last->unique_id = kgem_get_unique_id(kgem);
4554			DBG(("  1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4555			     last->pitch, last->tiling, last->handle, last->unique_id));
4556			assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last));
4557			assert_tiling(kgem, last);
4558			last->refcnt = 1;
4559			return last;
4560		}
4561
4562		if (container_of(kgem, struct sna, kgem)->scrn->vtSema) {
4563			ScrnInfoPtr scrn = container_of(kgem, struct sna, kgem)->scrn;
4564
4565			list_for_each_entry_reverse(bo, &kgem->scanout, list) {
4566				struct drm_mode_fb_cmd arg;
4567
4568				assert(bo->scanout);
4569				assert(!bo->refcnt);
4570
4571				if (size > num_pages(bo) || num_pages(bo) > 2*size)
4572					continue;
4573
4574				if (flags & CREATE_INACTIVE && bo->rq)
4575					continue;
4576
4577				list_del(&bo->list);
4578
4579				if (bo->tiling != tiling || bo->pitch != pitch) {
4580					if (bo->delta) {
4581						kgem_bo_rmfb(kgem, bo);
4582						bo->delta = 0;
4583					}
4584
4585					if (gem_set_tiling(kgem->fd, bo->handle,
4586							   tiling, pitch)) {
4587						bo->tiling = tiling;
4588						bo->pitch = pitch;
4589					} else {
4590						kgem_bo_free(kgem, bo);
4591						break;
4592					}
4593				}
4594
4595				VG_CLEAR(arg);
4596				arg.width = width;
4597				arg.height = height;
4598				arg.pitch = bo->pitch;
4599				arg.bpp = scrn->bitsPerPixel;
4600				arg.depth = scrn->depth;
4601				arg.handle = bo->handle;
4602
4603				if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg)) {
4604					kgem_bo_free(kgem, bo);
4605					break;
4606				}
4607
4608				bo->delta = arg.fb_id;
4609				bo->unique_id = kgem_get_unique_id(kgem);
4610
4611				DBG(("  2:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4612				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4613				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4614				assert_tiling(kgem, bo);
4615				bo->refcnt = 1;
4616				return bo;
4617			}
4618		}
4619
4620		bo = __kgem_bo_create_as_display(kgem, size, tiling, pitch);
4621		if (bo)
4622			return bo;
4623
4624		flags |= CREATE_INACTIVE;
4625	}
4626
4627	if (bucket >= NUM_CACHE_BUCKETS) {
4628		DBG(("%s: large bo num pages=%d, bucket=%d\n",
4629		     __FUNCTION__, size, bucket));
4630
4631		if (flags & CREATE_INACTIVE)
4632			goto large_inactive;
4633
4634		tiled_height = kgem_aligned_height(kgem, height, tiling);
4635
4636		list_for_each_entry(bo, &kgem->large, list) {
4637			assert(!bo->purged);
4638			assert(!bo->scanout);
4639			assert(bo->refcnt == 0);
4640			assert(bo->reusable);
4641			assert_tiling(kgem, bo);
4642
4643			if (kgem->gen < 040) {
4644				if (bo->pitch < pitch) {
4645					DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
4646					     bo->tiling, tiling,
4647					     bo->pitch, pitch));
4648					continue;
4649				}
4650
4651				if (bo->pitch * tiled_height > bytes(bo))
4652					continue;
4653			} else {
4654				if (num_pages(bo) < size)
4655					continue;
4656
4657				if (bo->pitch != pitch || bo->tiling != tiling) {
4658					if (!gem_set_tiling(kgem->fd, bo->handle,
4659							    tiling, pitch))
4660						continue;
4661
4662					bo->pitch = pitch;
4663					bo->tiling = tiling;
4664				}
4665			}
4666
4667			kgem_bo_remove_from_active(kgem, bo);
4668
4669			bo->unique_id = kgem_get_unique_id(kgem);
4670			bo->delta = 0;
4671			DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4672			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4673			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4674			assert_tiling(kgem, bo);
4675			bo->refcnt = 1;
4676			return bo;
4677		}
4678
4679large_inactive:
4680		__kgem_throttle_retire(kgem, flags);
4681		list_for_each_entry(bo, &kgem->large_inactive, list) {
4682			assert(bo->refcnt == 0);
4683			assert(bo->reusable);
4684			assert(!bo->scanout);
4685			assert_tiling(kgem, bo);
4686
4687			if (size > num_pages(bo))
4688				continue;
4689
4690			if (bo->tiling != tiling ||
4691			    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
4692				if (!gem_set_tiling(kgem->fd, bo->handle,
4693						    tiling, pitch))
4694					continue;
4695
4696				bo->tiling = tiling;
4697				bo->pitch = pitch;
4698			}
4699
4700			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
4701				kgem_bo_free(kgem, bo);
4702				break;
4703			}
4704
4705			list_del(&bo->list);
4706
4707			assert(bo->domain != DOMAIN_GPU);
4708			bo->unique_id = kgem_get_unique_id(kgem);
4709			bo->pitch = pitch;
4710			bo->delta = 0;
4711			DBG(("  1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4712			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4713			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4714			assert_tiling(kgem, bo);
4715			bo->refcnt = 1;
4716
4717			if (flags & CREATE_SCANOUT)
4718				__kgem_bo_make_scanout(kgem, bo, width, height);
4719
4720			return bo;
4721		}
4722
4723		goto create;
4724	}
4725
4726	if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4727		int for_cpu = !!(flags & CREATE_CPU_MAP);
4728		if (kgem->has_llc && tiling == I915_TILING_NONE)
4729			for_cpu = 1;
4730		/* We presume that we will need to upload to this bo,
4731		 * and so would prefer to have an active VMA.
4732		 */
4733		cache = &kgem->vma[for_cpu].inactive[bucket];
4734		do {
4735			list_for_each_entry(bo, cache, vma) {
4736				assert(bucket(bo) == bucket);
4737				assert(bo->refcnt == 0);
4738				assert(!bo->scanout);
4739				assert(for_cpu ? bo->map__cpu : bo->map__gtt);
4740				assert(bo->rq == NULL);
4741				assert(bo->exec == NULL);
4742				assert(list_is_empty(&bo->request));
4743				assert(bo->flush == false);
4744				assert_tiling(kgem, bo);
4745
4746				if (size > num_pages(bo)) {
4747					DBG(("inactive too small: %d < %d\n",
4748					     num_pages(bo), size));
4749					continue;
4750				}
4751
4752				if (flags & UNCACHED && !kgem->has_llc && bo->domain != DOMAIN_CPU)
4753					continue;
4754
4755				if (bo->tiling != tiling ||
4756				    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
4757					if (bo->map__gtt ||
4758					    !gem_set_tiling(kgem->fd, bo->handle,
4759							    tiling, pitch)) {
4760						DBG(("inactive GTT vma with wrong tiling: %d < %d\n",
4761						     bo->tiling, tiling));
4762						continue;
4763					}
4764					bo->tiling = tiling;
4765					bo->pitch = pitch;
4766				}
4767
4768				if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
4769					kgem_bo_free(kgem, bo);
4770					break;
4771				}
4772
4773				assert(bo->tiling == tiling);
4774				bo->pitch = pitch;
4775				bo->delta = 0;
4776				bo->unique_id = kgem_get_unique_id(kgem);
4777
4778				kgem_bo_remove_from_inactive(kgem, bo);
4779				assert(list_is_empty(&bo->list));
4780				assert(list_is_empty(&bo->vma));
4781
4782				DBG(("  from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
4783				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4784				assert(bo->reusable);
4785				assert(bo->domain != DOMAIN_GPU);
4786				ASSERT_IDLE(kgem, bo->handle);
4787				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4788				assert_tiling(kgem, bo);
4789				bo->refcnt = 1;
4790				return bo;
4791			}
4792		} while (!list_is_empty(cache) &&
4793			 __kgem_throttle_retire(kgem, flags));
4794
4795		if (flags & CREATE_CPU_MAP && !kgem->has_llc) {
4796			if (list_is_empty(&kgem->active[bucket][tiling]) &&
4797			    list_is_empty(&kgem->inactive[bucket]))
4798				flags &= ~CREATE_CACHED;
4799
4800			goto create;
4801		}
4802	}
4803
4804	if (flags & CREATE_INACTIVE)
4805		goto skip_active_search;
4806
4807	/* Best active match */
4808	retry = NUM_CACHE_BUCKETS - bucket;
4809	if (retry > 3 && (flags & CREATE_TEMPORARY) == 0)
4810		retry = 3;
4811search_active:
4812	assert(bucket < NUM_CACHE_BUCKETS);
4813	cache = &kgem->active[bucket][tiling];
4814	if (tiling) {
4815		tiled_height = kgem_aligned_height(kgem, height, tiling);
4816		list_for_each_entry(bo, cache, list) {
4817			assert(!bo->purged);
4818			assert(bo->refcnt == 0);
4819			assert(bucket(bo) == bucket);
4820			assert(bo->reusable);
4821			assert(bo->tiling == tiling);
4822			assert(bo->flush == false);
4823			assert(!bo->scanout);
4824			assert_tiling(kgem, bo);
4825
4826			if (kgem->gen < 040) {
4827				if (bo->pitch < pitch) {
4828					DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
4829					     bo->tiling, tiling,
4830					     bo->pitch, pitch));
4831					continue;
4832				}
4833
4834				if (bo->pitch * tiled_height > bytes(bo))
4835					continue;
4836			} else {
4837				if (num_pages(bo) < size)
4838					continue;
4839
4840				if (bo->pitch != pitch) {
4841					if (!gem_set_tiling(kgem->fd,
4842							    bo->handle,
4843							    tiling, pitch))
4844						continue;
4845
4846					bo->pitch = pitch;
4847				}
4848			}
4849
4850			kgem_bo_remove_from_active(kgem, bo);
4851
4852			bo->unique_id = kgem_get_unique_id(kgem);
4853			bo->delta = 0;
4854			DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4855			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4856			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4857			assert_tiling(kgem, bo);
4858			bo->refcnt = 1;
4859			return bo;
4860		}
4861	} else {
4862		list_for_each_entry(bo, cache, list) {
4863			assert(bucket(bo) == bucket);
4864			assert(!bo->purged);
4865			assert(bo->refcnt == 0);
4866			assert(bo->reusable);
4867			assert(!bo->scanout);
4868			assert(bo->tiling == tiling);
4869			assert(bo->flush == false);
4870			assert_tiling(kgem, bo);
4871
4872			if (num_pages(bo) < size)
4873				continue;
4874
4875			kgem_bo_remove_from_active(kgem, bo);
4876
4877			bo->pitch = pitch;
4878			bo->unique_id = kgem_get_unique_id(kgem);
4879			bo->delta = 0;
4880			DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4881			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4882			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4883			assert_tiling(kgem, bo);
4884			bo->refcnt = 1;
4885			return bo;
4886		}
4887	}
4888
4889	if (kgem->gen >= 040) {
4890		for (i = I915_TILING_Y; i >= I915_TILING_NONE; i--) {
4891			cache = &kgem->active[bucket][i];
4892			list_for_each_entry(bo, cache, list) {
4893				assert(!bo->purged);
4894				assert(bo->refcnt == 0);
4895				assert(bo->reusable);
4896				assert(!bo->scanout);
4897				assert(bo->flush == false);
4898				assert_tiling(kgem, bo);
4899
4900				if (num_pages(bo) < size)
4901					continue;
4902
4903				if (bo->tiling != tiling ||
4904				    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
4905					if (!gem_set_tiling(kgem->fd,
4906							    bo->handle,
4907							    tiling, pitch))
4908						continue;
4909				}
4910
4911				kgem_bo_remove_from_active(kgem, bo);
4912
4913				bo->unique_id = kgem_get_unique_id(kgem);
4914				bo->pitch = pitch;
4915				bo->tiling = tiling;
4916				bo->delta = 0;
4917				DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4918				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4919				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4920				assert_tiling(kgem, bo);
4921				bo->refcnt = 1;
4922				return bo;
4923			}
4924		}
4925	} else if (!exact) { /* allow an active near-miss? */
4926		for (i = tiling; i >= I915_TILING_NONE; i--) {
4927			tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
4928							 width, height, bpp, tiling, &pitch);
4929			cache = active(kgem, tiled_height / PAGE_SIZE, i);
4930			tiled_height = kgem_aligned_height(kgem, height, i);
4931			list_for_each_entry(bo, cache, list) {
4932				assert(!bo->purged);
4933				assert(bo->refcnt == 0);
4934				assert(bo->reusable);
4935				assert(!bo->scanout);
4936				assert(bo->flush == false);
4937				assert_tiling(kgem, bo);
4938
4939				if (bo->tiling) {
4940					if (bo->pitch < pitch) {
4941						DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
4942						     bo->tiling, tiling,
4943						     bo->pitch, pitch));
4944						continue;
4945					}
4946				} else
4947					bo->pitch = pitch;
4948
4949				if (bo->pitch * tiled_height > bytes(bo))
4950					continue;
4951
4952				kgem_bo_remove_from_active(kgem, bo);
4953
4954				bo->unique_id = kgem_get_unique_id(kgem);
4955				bo->delta = 0;
4956				DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
4957				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
4958				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
4959				assert_tiling(kgem, bo);
4960				bo->refcnt = 1;
4961				return bo;
4962			}
4963		}
4964	}
4965
4966	if (--retry) {
4967		bucket++;
4968		goto search_active;
4969	}
4970
4971skip_active_search:
4972	bucket = cache_bucket(size);
4973	retry = NUM_CACHE_BUCKETS - bucket;
4974	if (retry > 3)
4975		retry = 3;
4976search_inactive:
4977	/* Now just look for a close match and prefer any currently active */
4978	assert(bucket < NUM_CACHE_BUCKETS);
4979	cache = &kgem->inactive[bucket];
4980	list_for_each_entry(bo, cache, list) {
4981		assert(bucket(bo) == bucket);
4982		assert(bo->reusable);
4983		assert(!bo->scanout);
4984		assert(bo->flush == false);
4985		assert_tiling(kgem, bo);
4986
4987		if (size > num_pages(bo)) {
4988			DBG(("inactive too small: %d < %d\n",
4989			     num_pages(bo), size));
4990			continue;
4991		}
4992
4993		if (bo->tiling != tiling ||
4994		    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
4995			if (!gem_set_tiling(kgem->fd, bo->handle,
4996					    tiling, pitch))
4997				continue;
4998		}
4999
5000		if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5001			kgem_bo_free(kgem, bo);
5002			break;
5003		}
5004
5005		kgem_bo_remove_from_inactive(kgem, bo);
5006		assert(list_is_empty(&bo->list));
5007		assert(list_is_empty(&bo->vma));
5008
5009		bo->pitch = pitch;
5010		bo->tiling = tiling;
5011
5012		bo->delta = 0;
5013		bo->unique_id = kgem_get_unique_id(kgem);
5014		assert(bo->pitch);
5015		DBG(("  from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
5016		     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5017		assert(bo->refcnt == 0);
5018		assert(bo->reusable);
5019		assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
5020		ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
5021		assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5022		assert_tiling(kgem, bo);
5023		bo->refcnt = 1;
5024
5025		if (flags & CREATE_SCANOUT)
5026			__kgem_bo_make_scanout(kgem, bo, width, height);
5027
5028		return bo;
5029	}
5030
5031	if ((flags & CREATE_NO_RETIRE) == 0) {
5032		list_for_each_entry_reverse(bo, &kgem->active[bucket][tiling], list) {
5033			if (bo->exec)
5034				break;
5035
5036			if (size > num_pages(bo))
5037				continue;
5038
5039			if (__kgem_busy(kgem, bo->handle)) {
5040				if (flags & CREATE_NO_THROTTLE)
5041					goto no_retire;
5042
5043				do {
5044					if (!kgem->need_throttle) {
5045						DBG(("%s: not throttling for active handle=%d\n", __FUNCTION__, bo->handle));
5046						goto no_retire;
5047					}
5048
5049					__kgem_throttle(kgem, false);
5050				} while (__kgem_busy(kgem, bo->handle));
5051			}
5052
5053			DBG(("%s: flushed active handle=%d\n", __FUNCTION__, bo->handle));
5054
5055			kgem_bo_remove_from_active(kgem, bo);
5056			__kgem_bo_clear_busy(bo);
5057
5058			if (tiling != I915_TILING_NONE && bo->pitch != pitch) {
5059				if (!gem_set_tiling(kgem->fd, bo->handle, tiling, pitch)) {
5060					kgem_bo_free(kgem, bo);
5061					goto no_retire;
5062				}
5063			}
5064
5065			bo->pitch = pitch;
5066			bo->unique_id = kgem_get_unique_id(kgem);
5067			bo->delta = 0;
5068			DBG(("  2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5069			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5070			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5071			assert_tiling(kgem, bo);
5072			bo->refcnt = 1;
5073
5074			if (flags & CREATE_SCANOUT)
5075				__kgem_bo_make_scanout(kgem, bo, width, height);
5076
5077			return bo;
5078		}
5079no_retire:
5080		flags |= CREATE_NO_RETIRE;
5081	}
5082
5083	if (--retry) {
5084		bucket++;
5085		goto search_inactive;
5086	}
5087
5088create:
5089	if (flags & CREATE_CACHED) {
5090		DBG(("%s: no cached bo found, requested not to create a new bo\n", __FUNCTION__));
5091		return NULL;
5092	}
5093
5094	if (bucket >= NUM_CACHE_BUCKETS)
5095		size = ALIGN(size, 1024);
5096	handle = gem_create(kgem->fd, size);
5097	if (handle == 0) {
5098		DBG(("%s: kernel allocation (gem_create) failure\n", __FUNCTION__));
5099		return NULL;
5100	}
5101
5102	bo = __kgem_bo_alloc(handle, size);
5103	if (!bo) {
5104		DBG(("%s: malloc failed\n", __FUNCTION__));
5105		gem_close(kgem->fd, handle);
5106		return NULL;
5107	}
5108
5109	bo->unique_id = kgem_get_unique_id(kgem);
5110	if (tiling == I915_TILING_NONE ||
5111	    gem_set_tiling(kgem->fd, handle, tiling, pitch)) {
5112		bo->tiling = tiling;
5113		bo->pitch = pitch;
5114		if (flags & CREATE_SCANOUT)
5115			__kgem_bo_make_scanout(kgem, bo, width, height);
5116	} else {
5117		if (flags & CREATE_EXACT) {
5118			DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__));
5119			gem_close(kgem->fd, handle);
5120			free(bo);
5121			return NULL;
5122		}
5123	}
5124
5125	assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
5126	assert_tiling(kgem, bo);
5127
5128	debug_alloc__bo(kgem, bo);
5129
5130	DBG(("  new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n",
5131	     bo->pitch, bo->tiling, bo->handle, bo->unique_id,
5132	     size, num_pages(bo), bucket(bo)));
5133	return bo;
5134}
5135
5136struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
5137				   int width,
5138				   int height,
5139				   int bpp,
5140				   uint32_t flags)
5141{
5142	struct kgem_bo *bo;
5143	int stride, size;
5144
5145	if (DBG_NO_CPU)
5146		return NULL;
5147
5148	DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp));
5149
5150	if (kgem->has_llc) {
5151		bo = kgem_create_2d(kgem, width, height, bpp,
5152				    I915_TILING_NONE, flags);
5153		if (bo == NULL)
5154			return bo;
5155
5156		assert(bo->tiling == I915_TILING_NONE);
5157		assert_tiling(kgem, bo);
5158
5159		if (kgem_bo_map__cpu(kgem, bo) == NULL) {
5160			kgem_bo_destroy(kgem, bo);
5161			return NULL;
5162		}
5163
5164		return bo;
5165	}
5166
5167	assert(width > 0 && height > 0);
5168	stride = ALIGN(width, 2) * bpp >> 3;
5169	stride = ALIGN(stride, 4);
5170	size = stride * ALIGN(height, 2);
5171	assert(size >= PAGE_SIZE);
5172
5173	DBG(("%s: %dx%d, %d bpp, stride=%d\n",
5174	     __FUNCTION__, width, height, bpp, stride));
5175
5176	bo = search_snoop_cache(kgem, NUM_PAGES(size), 0);
5177	if (bo) {
5178		assert(bo->tiling == I915_TILING_NONE);
5179		assert_tiling(kgem, bo);
5180		assert(bo->snoop);
5181		bo->refcnt = 1;
5182		bo->pitch = stride;
5183		bo->unique_id = kgem_get_unique_id(kgem);
5184		return bo;
5185	}
5186
5187	if (kgem->has_caching) {
5188		bo = kgem_create_linear(kgem, size, flags);
5189		if (bo == NULL)
5190			return NULL;
5191
5192		assert(bo->tiling == I915_TILING_NONE);
5193		assert_tiling(kgem, bo);
5194
5195		if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) {
5196			kgem_bo_destroy(kgem, bo);
5197			return NULL;
5198		}
5199		bo->snoop = true;
5200
5201		if (kgem_bo_map__cpu(kgem, bo) == NULL) {
5202			kgem_bo_destroy(kgem, bo);
5203			return NULL;
5204		}
5205
5206		bo->pitch = stride;
5207		bo->unique_id = kgem_get_unique_id(kgem);
5208		return bo;
5209	}
5210
5211	if (kgem->has_userptr) {
5212		void *ptr;
5213
5214		/* XXX */
5215		//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
5216		if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE)))
5217			return NULL;
5218
5219		bo = kgem_create_map(kgem, ptr, size, false);
5220		if (bo == NULL) {
5221			free(ptr);
5222			return NULL;
5223		}
5224
5225		bo->pitch = stride;
5226		bo->unique_id = kgem_get_unique_id(kgem);
5227		return bo;
5228	}
5229
5230	return NULL;
5231}
5232
5233void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
5234{
5235	DBG(("%s: handle=%d, proxy? %d\n",
5236	     __FUNCTION__, bo->handle, bo->proxy != NULL));
5237
5238	if (bo->proxy) {
5239		assert(!bo->reusable);
5240		kgem_bo_binding_free(kgem, bo);
5241
5242		assert(list_is_empty(&bo->list));
5243		_list_del(&bo->vma);
5244		_list_del(&bo->request);
5245
5246		if (bo->io && bo->domain == DOMAIN_CPU)
5247			_kgem_bo_delete_buffer(kgem, bo);
5248
5249		kgem_bo_unref(kgem, bo->proxy);
5250
5251		if (DBG_NO_MALLOC_CACHE) {
5252			free(bo);
5253		} else {
5254			*(struct kgem_bo **)bo = __kgem_freed_bo;
5255			__kgem_freed_bo = bo;
5256		}
5257	} else
5258		__kgem_bo_destroy(kgem, bo);
5259}
5260
5261static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
5262{
5263	assert(bo->rq);
5264	assert(bo->exec == NULL);
5265	assert(bo->needs_flush);
5266
5267	/* The kernel will emit a flush *and* update its own flushing lists. */
5268	if (!__kgem_busy(kgem, bo->handle))
5269		__kgem_bo_clear_busy(bo);
5270
5271	DBG(("%s: handle=%d, busy?=%d\n",
5272	     __FUNCTION__, bo->handle, bo->rq != NULL));
5273}
5274
5275void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo)
5276{
5277	if (!bo->needs_flush)
5278		return;
5279
5280	kgem_bo_submit(kgem, bo);
5281
5282	/* If the kernel fails to emit the flush, then it will be forced when
5283	 * we assume direct access. And as the usual failure is EIO, we do
5284	 * not actually care.
5285	 */
5286	assert(bo->exec == NULL);
5287	if (bo->rq)
5288		__kgem_flush(kgem, bo);
5289
5290	/* Whatever actually happens, we can regard the GTT write domain
5291	 * as being flushed.
5292	 */
5293	bo->gtt_dirty = false;
5294	bo->needs_flush = false;
5295	bo->domain = DOMAIN_NONE;
5296}
5297
5298inline static bool nearly_idle(struct kgem *kgem)
5299{
5300	int ring = kgem->ring == KGEM_BLT;
5301
5302	if (list_is_singular(&kgem->requests[ring]))
5303		return true;
5304
5305	return __kgem_ring_is_idle(kgem, ring);
5306}
5307
5308inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
5309{
5310	if (kgem->needs_semaphore)
5311		return false;
5312
5313	if (bo->rq == NULL || RQ_RING(bo->rq) == kgem->ring)
5314		return false;
5315
5316	kgem->needs_semaphore = true;
5317	return true;
5318}
5319
5320inline static bool needs_reservation(struct kgem *kgem, struct kgem_bo *bo)
5321{
5322	if (kgem->needs_reservation)
5323		return false;
5324
5325	if (bo->presumed_offset)
5326		return false;
5327
5328	kgem->needs_reservation = true;
5329	return nearly_idle(kgem);
5330}
5331
5332inline static bool needs_batch_flush(struct kgem *kgem, struct kgem_bo *bo)
5333{
5334	bool flush = false;
5335
5336	if (needs_semaphore(kgem, bo)) {
5337		DBG(("%s: flushing before handle=%d for required semaphore\n", __FUNCTION__, bo->handle));
5338		flush = true;
5339	}
5340
5341	if (needs_reservation(kgem, bo)) {
5342		DBG(("%s: flushing before handle=%d for new reservation\n", __FUNCTION__, bo->handle));
5343		flush = true;
5344	}
5345
5346	return kgem->nreloc ? flush : false;
5347}
5348
5349static bool aperture_check(struct kgem *kgem, unsigned num_pages)
5350{
5351	struct drm_i915_gem_get_aperture aperture;
5352	int reserve;
5353
5354	if (kgem->aperture)
5355		return false;
5356
5357	/* Leave some space in case of alignment issues */
5358	reserve = kgem->aperture_mappable / 2;
5359	if (kgem->gen < 033 && reserve < kgem->aperture_max_fence)
5360		reserve = kgem->aperture_max_fence;
5361	if (!kgem->has_llc)
5362		reserve += kgem->nexec * PAGE_SIZE * 2;
5363
5364	DBG(("%s: num_pages=%d, holding %d pages in reserve, total aperture %d\n",
5365	     __FUNCTION__, num_pages, reserve, kgem->aperture_total));
5366	num_pages += reserve;
5367
5368	VG_CLEAR(aperture);
5369	aperture.aper_available_size = kgem->aperture_total;
5370	aperture.aper_available_size *= PAGE_SIZE;
5371	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
5372
5373	DBG(("%s: aperture required %ld bytes, available %ld bytes\n",
5374	     __FUNCTION__,
5375	     (long)num_pages * PAGE_SIZE,
5376	     (long)aperture.aper_available_size));
5377
5378	return num_pages <= aperture.aper_available_size / PAGE_SIZE;
5379}
5380
5381static inline bool kgem_flush(struct kgem *kgem, bool flush)
5382{
5383	if (unlikely(kgem->wedged))
5384		return false;
5385
5386	if (kgem->nreloc == 0)
5387		return true;
5388
5389	if (container_of(kgem, struct sna, kgem)->flags & SNA_POWERSAVE)
5390		return true;
5391
5392	if (kgem->flush == flush && kgem->aperture < kgem->aperture_low)
5393		return true;
5394
5395	DBG(("%s: opportunistic flushing? flush=%d,%d, aperture=%d/%d, idle?=%d\n",
5396	     __FUNCTION__, kgem->flush, flush, kgem->aperture, kgem->aperture_low, kgem_ring_is_idle(kgem, kgem->ring)));
5397	return !kgem_ring_is_idle(kgem, kgem->ring);
5398}
5399
5400bool kgem_check_bo(struct kgem *kgem, ...)
5401{
5402	va_list ap;
5403	struct kgem_bo *bo;
5404	int num_exec = 0;
5405	int num_pages = 0;
5406	bool flush = false;
5407	bool busy = true;
5408
5409	va_start(ap, kgem);
5410	while ((bo = va_arg(ap, struct kgem_bo *))) {
5411		while (bo->proxy)
5412			bo = bo->proxy;
5413		if (bo->exec)
5414			continue;
5415
5416		if (needs_batch_flush(kgem, bo)) {
5417			va_end(ap);
5418			return false;
5419		}
5420
5421		num_pages += num_pages(bo);
5422		num_exec++;
5423
5424		flush |= bo->flush;
5425		busy &= bo->rq != NULL;
5426	}
5427	va_end(ap);
5428
5429	DBG(("%s: num_pages=+%d, num_exec=+%d\n",
5430	     __FUNCTION__, num_pages, num_exec));
5431
5432	if (!num_pages)
5433		return true;
5434
5435	if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
5436		DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
5437		     kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
5438		return false;
5439	}
5440
5441	if (num_pages + kgem->aperture > kgem->aperture_high) {
5442		DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n",
5443		     __FUNCTION__, kgem->aperture, num_pages, kgem->aperture_high));
5444		return aperture_check(kgem, num_pages);
5445	}
5446
5447	if (busy)
5448		return true;
5449
5450	return kgem_flush(kgem, flush);
5451}
5452
5453bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
5454{
5455	assert(bo->refcnt);
5456	while (bo->proxy)
5457		bo = bo->proxy;
5458	assert(bo->refcnt);
5459
5460	if (bo->exec) {
5461		if (kgem->gen < 040 &&
5462		    bo->tiling != I915_TILING_NONE &&
5463		    (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
5464			uint32_t size;
5465
5466			assert(bo->tiling == I915_TILING_X);
5467
5468			if (kgem->nfence >= kgem->fence_max)
5469				return false;
5470
5471			if (kgem->aperture_fenced) {
5472				size = 3*kgem->aperture_fenced;
5473				if (kgem->aperture_total == kgem->aperture_mappable)
5474					size += kgem->aperture;
5475				if (size > kgem->aperture_fenceable &&
5476				    kgem_ring_is_idle(kgem, kgem->ring)) {
5477					DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
5478					return false;
5479				}
5480			}
5481
5482			size = kgem_bo_fenced_size(kgem, bo);
5483			if (size > kgem->aperture_max_fence)
5484				kgem->aperture_max_fence = size;
5485			size += kgem->aperture_fenced;
5486			if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
5487				size = 2 * kgem->aperture_max_fence;
5488			if (kgem->aperture_total == kgem->aperture_mappable)
5489				size += kgem->aperture;
5490			if (size > kgem->aperture_fenceable) {
5491				DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
5492				     __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
5493				return false;
5494			}
5495		}
5496
5497		return true;
5498	}
5499
5500	if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1)
5501		return false;
5502
5503	if (needs_batch_flush(kgem, bo))
5504		return false;
5505
5506	assert_tiling(kgem, bo);
5507	if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) {
5508		uint32_t size;
5509
5510		assert(bo->tiling == I915_TILING_X);
5511
5512		if (kgem->nfence >= kgem->fence_max)
5513			return false;
5514
5515		if (kgem->aperture_fenced) {
5516			size = 3*kgem->aperture_fenced;
5517			if (kgem->aperture_total == kgem->aperture_mappable)
5518				size += kgem->aperture;
5519			if (size > kgem->aperture_fenceable &&
5520			    kgem_ring_is_idle(kgem, kgem->ring)) {
5521				DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
5522				return false;
5523			}
5524		}
5525
5526		size = kgem_bo_fenced_size(kgem, bo);
5527		if (size > kgem->aperture_max_fence)
5528			kgem->aperture_max_fence = size;
5529		size += kgem->aperture_fenced;
5530		if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
5531			size = 2 * kgem->aperture_max_fence;
5532		if (kgem->aperture_total == kgem->aperture_mappable)
5533			size += kgem->aperture;
5534		if (size > kgem->aperture_fenceable) {
5535			DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
5536			     __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
5537			return false;
5538		}
5539	}
5540
5541	if (kgem->aperture + kgem->aperture_fenced + num_pages(bo) > kgem->aperture_high) {
5542		DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n",
5543		     __FUNCTION__, kgem->aperture, num_pages(bo), kgem->aperture_high));
5544		return aperture_check(kgem, num_pages(bo));
5545	}
5546
5547	if (bo->rq)
5548		return true;
5549
5550	return kgem_flush(kgem, bo->flush);
5551}
5552
5553bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
5554{
5555	va_list ap;
5556	struct kgem_bo *bo;
5557	int num_fence = 0;
5558	int num_exec = 0;
5559	int num_pages = 0;
5560	int fenced_size = 0;
5561	bool flush = false;
5562	bool busy = true;
5563
5564	va_start(ap, kgem);
5565	while ((bo = va_arg(ap, struct kgem_bo *))) {
5566		assert(bo->refcnt);
5567		while (bo->proxy)
5568			bo = bo->proxy;
5569		assert(bo->refcnt);
5570		if (bo->exec) {
5571			if (kgem->gen >= 040 || bo->tiling == I915_TILING_NONE)
5572				continue;
5573
5574			if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
5575				fenced_size += kgem_bo_fenced_size(kgem, bo);
5576				num_fence++;
5577			}
5578
5579			continue;
5580		}
5581
5582		if (needs_batch_flush(kgem, bo)) {
5583			va_end(ap);
5584			return false;
5585		}
5586
5587		assert_tiling(kgem, bo);
5588		num_pages += num_pages(bo);
5589		num_exec++;
5590		if (kgem->gen < 040 && bo->tiling) {
5591			uint32_t size = kgem_bo_fenced_size(kgem, bo);
5592			if (size > kgem->aperture_max_fence)
5593				kgem->aperture_max_fence = size;
5594			fenced_size += size;
5595			num_fence++;
5596		}
5597
5598		flush |= bo->flush;
5599		busy &= bo->rq != NULL;
5600	}
5601	va_end(ap);
5602
5603	if (num_fence) {
5604		uint32_t size;
5605
5606		if (kgem->nfence + num_fence > kgem->fence_max)
5607			return false;
5608
5609		if (kgem->aperture_fenced) {
5610			size = 3*kgem->aperture_fenced;
5611			if (kgem->aperture_total == kgem->aperture_mappable)
5612				size += kgem->aperture;
5613			if (size > kgem->aperture_fenceable &&
5614			    kgem_ring_is_idle(kgem, kgem->ring)) {
5615				DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
5616				return false;
5617			}
5618		}
5619
5620		size = kgem->aperture_fenced;
5621		size += fenced_size;
5622		if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
5623			size = 2 * kgem->aperture_max_fence;
5624		if (kgem->aperture_total == kgem->aperture_mappable)
5625			size += kgem->aperture;
5626		if (size > kgem->aperture_fenceable) {
5627			DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
5628			     __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
5629			return false;
5630		}
5631	}
5632
5633	if (num_pages == 0)
5634		return true;
5635
5636	if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem))
5637		return false;
5638
5639	if (num_pages + kgem->aperture > kgem->aperture_high - kgem->aperture_fenced) {
5640		DBG(("%s: final aperture usage (%d + %d + %d) is greater than high water mark (%d)\n",
5641		     __FUNCTION__, kgem->aperture, kgem->aperture_fenced, num_pages, kgem->aperture_high));
5642		return aperture_check(kgem, num_pages);
5643	}
5644
5645	if (busy)
5646		return true;
5647
5648	return kgem_flush(kgem, flush);
5649}
5650
5651uint32_t kgem_add_reloc(struct kgem *kgem,
5652			uint32_t pos,
5653			struct kgem_bo *bo,
5654			uint32_t read_write_domain,
5655			uint32_t delta)
5656{
5657	int index;
5658
5659	DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
5660	     __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
5661
5662	assert(kgem->gen < 0100);
5663	assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
5664
5665	index = kgem->nreloc++;
5666	assert(index < ARRAY_SIZE(kgem->reloc));
5667	kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
5668	if (bo) {
5669		assert(kgem->mode != KGEM_NONE);
5670		assert(bo->refcnt);
5671		while (bo->proxy) {
5672			DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
5673			     __FUNCTION__, bo->delta, bo->handle));
5674			delta += bo->delta;
5675			assert(bo->handle == bo->proxy->handle);
5676			/* need to release the cache upon batch submit */
5677			if (bo->exec == NULL) {
5678				list_move_tail(&bo->request,
5679					       &kgem->next_request->buffers);
5680				bo->rq = MAKE_REQUEST(kgem->next_request,
5681						      kgem->ring);
5682				bo->exec = &_kgem_dummy_exec;
5683				bo->domain = DOMAIN_GPU;
5684			}
5685
5686			if (read_write_domain & 0x7fff && !bo->gpu_dirty)
5687				__kgem_bo_mark_dirty(bo);
5688
5689			bo = bo->proxy;
5690			assert(bo->refcnt);
5691		}
5692		assert(bo->refcnt);
5693
5694		if (bo->exec == NULL)
5695			kgem_add_bo(kgem, bo);
5696		assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
5697		assert(RQ_RING(bo->rq) == kgem->ring);
5698
5699		if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
5700			if (bo->tiling &&
5701			    (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
5702				assert(bo->tiling == I915_TILING_X);
5703				assert(kgem->nfence < kgem->fence_max);
5704				kgem->aperture_fenced +=
5705					kgem_bo_fenced_size(kgem, bo);
5706				kgem->nfence++;
5707			}
5708			bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
5709		}
5710
5711		kgem->reloc[index].delta = delta;
5712		kgem->reloc[index].target_handle = bo->target_handle;
5713		kgem->reloc[index].presumed_offset = bo->presumed_offset;
5714
5715		if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
5716			assert(!bo->snoop || kgem->can_blt_cpu);
5717			__kgem_bo_mark_dirty(bo);
5718		}
5719
5720		delta += bo->presumed_offset;
5721	} else {
5722		kgem->reloc[index].delta = delta;
5723		kgem->reloc[index].target_handle = ~0U;
5724		kgem->reloc[index].presumed_offset = 0;
5725		if (kgem->nreloc__self < 256)
5726			kgem->reloc__self[kgem->nreloc__self++] = index;
5727	}
5728	kgem->reloc[index].read_domains = read_write_domain >> 16;
5729	kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
5730
5731	return delta;
5732}
5733
5734uint64_t kgem_add_reloc64(struct kgem *kgem,
5735			  uint32_t pos,
5736			  struct kgem_bo *bo,
5737			  uint32_t read_write_domain,
5738			  uint64_t delta)
5739{
5740	int index;
5741
5742	DBG(("%s: handle=%d, pos=%d, delta=%ld, domains=%08x\n",
5743	     __FUNCTION__, bo ? bo->handle : 0, pos, (long)delta, read_write_domain));
5744
5745	assert(kgem->gen >= 0100);
5746	assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
5747
5748	index = kgem->nreloc++;
5749	assert(index < ARRAY_SIZE(kgem->reloc));
5750	kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
5751	if (bo) {
5752		assert(kgem->mode != KGEM_NONE);
5753		assert(bo->refcnt);
5754		while (bo->proxy) {
5755			DBG(("%s: adding proxy [delta=%ld] for handle=%d\n",
5756			     __FUNCTION__, (long)bo->delta, bo->handle));
5757			delta += bo->delta;
5758			assert(bo->handle == bo->proxy->handle);
5759			/* need to release the cache upon batch submit */
5760			if (bo->exec == NULL) {
5761				list_move_tail(&bo->request,
5762					       &kgem->next_request->buffers);
5763				bo->rq = MAKE_REQUEST(kgem->next_request,
5764						      kgem->ring);
5765				bo->exec = &_kgem_dummy_exec;
5766				bo->domain = DOMAIN_GPU;
5767			}
5768
5769			if (read_write_domain & 0x7fff && !bo->gpu_dirty)
5770				__kgem_bo_mark_dirty(bo);
5771
5772			bo = bo->proxy;
5773			assert(bo->refcnt);
5774		}
5775		assert(bo->refcnt);
5776
5777		if (bo->exec == NULL)
5778			kgem_add_bo(kgem, bo);
5779		assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
5780		assert(RQ_RING(bo->rq) == kgem->ring);
5781
5782		DBG(("%s[%d] = (delta=%d, target handle=%d, presumed=%llx)\n",
5783					__FUNCTION__, index, delta, bo->target_handle, (long long)bo->presumed_offset));
5784		kgem->reloc[index].delta = delta;
5785		kgem->reloc[index].target_handle = bo->target_handle;
5786		kgem->reloc[index].presumed_offset = bo->presumed_offset;
5787
5788		if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
5789			assert(!bo->snoop || kgem->can_blt_cpu);
5790			__kgem_bo_mark_dirty(bo);
5791		}
5792
5793		delta += bo->presumed_offset;
5794	} else {
5795		DBG(("%s[%d] = (delta=%d, target handle=batch)\n",
5796					__FUNCTION__, index, delta));
5797		kgem->reloc[index].delta = delta;
5798		kgem->reloc[index].target_handle = ~0U;
5799		kgem->reloc[index].presumed_offset = 0;
5800		if (kgem->nreloc__self < 256)
5801			kgem->reloc__self[kgem->nreloc__self++] = index;
5802	}
5803	kgem->reloc[index].read_domains = read_write_domain >> 16;
5804	kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
5805
5806	return delta;
5807}
5808
5809static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
5810{
5811	int i, j;
5812
5813	DBG(("%s: type=%d, count=%d (bucket: %d)\n",
5814	     __FUNCTION__, type, kgem->vma[type].count, bucket));
5815	if (kgem->vma[type].count <= 0)
5816	       return;
5817
5818	if (kgem->need_purge)
5819		kgem_purge_cache(kgem);
5820
5821	/* vma are limited on a per-process basis to around 64k.
5822	 * This includes all malloc arenas as well as other file
5823	 * mappings. In order to be fair and not hog the cache,
5824	 * and more importantly not to exhaust that limit and to
5825	 * start failing mappings, we keep our own number of open
5826	 * vma to within a conservative value.
5827	 */
5828	i = 0;
5829	while (kgem->vma[type].count > 0) {
5830		struct kgem_bo *bo = NULL;
5831		void **ptr;
5832
5833		for (j = 0;
5834		     bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
5835		     j++) {
5836			struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
5837			if (!list_is_empty(head))
5838				bo = list_last_entry(head, struct kgem_bo, vma);
5839		}
5840		if (bo == NULL)
5841			break;
5842
5843		DBG(("%s: discarding inactive %s vma cache for %d\n",
5844		     __FUNCTION__, type ? "CPU" : "GTT", bo->handle));
5845
5846		ptr = type ? &bo->map__cpu : &bo->map__gtt;
5847		assert(bo->rq == NULL);
5848
5849		VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(*ptr), bytes(bo)));
5850		munmap(MAP(*ptr), bytes(bo));
5851		*ptr = NULL;
5852		list_del(&bo->vma);
5853		kgem->vma[type].count--;
5854
5855		if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
5856			DBG(("%s: freeing unpurgeable old mapping\n",
5857			     __FUNCTION__));
5858			kgem_bo_free(kgem, bo);
5859		}
5860	}
5861}
5862
5863void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
5864{
5865	void *ptr;
5866
5867	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
5868	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
5869
5870	assert(bo->proxy == NULL);
5871	assert(list_is_empty(&bo->list));
5872	assert_tiling(kgem, bo);
5873	assert(!bo->purged || bo->reusable);
5874
5875	if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) {
5876		DBG(("%s: converting request for GTT map into CPU map\n",
5877		     __FUNCTION__));
5878		return kgem_bo_map__cpu(kgem, bo);
5879	}
5880
5881	ptr = MAP(bo->map__gtt);
5882	if (ptr == NULL) {
5883		assert(num_pages(bo) <= kgem->aperture_mappable / 2);
5884
5885		kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
5886
5887		ptr = __kgem_bo_map__gtt(kgem, bo);
5888		if (ptr == NULL)
5889			return NULL;
5890
5891		/* Cache this mapping to avoid the overhead of an
5892		 * excruciatingly slow GTT pagefault. This is more an
5893		 * issue with compositing managers which need to frequently
5894		 * flush CPU damage to their GPU bo.
5895		 */
5896		bo->map__gtt = ptr;
5897		DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
5898	}
5899
5900	return ptr;
5901}
5902
5903void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
5904{
5905	void *ptr;
5906
5907	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
5908	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
5909
5910	assert(bo->proxy == NULL);
5911	assert(list_is_empty(&bo->list));
5912	assert(bo->exec == NULL);
5913	assert_tiling(kgem, bo);
5914	assert(!bo->purged || bo->reusable);
5915
5916	if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
5917	    (kgem->has_llc || bo->domain == DOMAIN_CPU)) {
5918		DBG(("%s: converting request for GTT map into CPU map\n",
5919		     __FUNCTION__));
5920		ptr = kgem_bo_map__cpu(kgem, bo);
5921		if (ptr)
5922			kgem_bo_sync__cpu(kgem, bo);
5923		return ptr;
5924	}
5925
5926	ptr = MAP(bo->map__gtt);
5927	if (ptr == NULL) {
5928		assert(num_pages(bo) <= kgem->aperture_mappable / 2);
5929		assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
5930
5931		kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
5932
5933		ptr = __kgem_bo_map__gtt(kgem, bo);
5934		if (ptr == NULL)
5935			return NULL;
5936
5937		/* Cache this mapping to avoid the overhead of an
5938		 * excruciatingly slow GTT pagefault. This is more an
5939		 * issue with compositing managers which need to frequently
5940		 * flush CPU damage to their GPU bo.
5941		 */
5942		bo->map__gtt = ptr;
5943		DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
5944	}
5945
5946	if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
5947		struct drm_i915_gem_set_domain set_domain;
5948
5949		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
5950		     bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
5951
5952		/* XXX use PROT_READ to avoid the write flush? */
5953
5954		VG_CLEAR(set_domain);
5955		set_domain.handle = bo->handle;
5956		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
5957		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
5958		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
5959			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
5960			kgem_throttle(kgem);
5961		}
5962		kgem_bo_retire(kgem, bo);
5963		bo->domain = DOMAIN_GTT;
5964		bo->gtt_dirty = true;
5965	}
5966
5967	return ptr;
5968}
5969
5970void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
5971{
5972	void *ptr;
5973
5974	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
5975	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
5976
5977	assert(bo->proxy == NULL);
5978	assert(bo->exec == NULL);
5979	assert(list_is_empty(&bo->list));
5980	assert_tiling(kgem, bo);
5981	assert(!bo->purged || bo->reusable);
5982
5983	ptr = MAP(bo->map__gtt);
5984	if (ptr == NULL) {
5985		assert(num_pages(bo) <= kgem->aperture_mappable / 4);
5986
5987		kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
5988
5989		ptr = __kgem_bo_map__gtt(kgem, bo);
5990		if (ptr == NULL)
5991			return NULL;
5992
5993		/* Cache this mapping to avoid the overhead of an
5994		 * excruciatingly slow GTT pagefault. This is more an
5995		 * issue with compositing managers which need to frequently
5996		 * flush CPU damage to their GPU bo.
5997		 */
5998		bo->map__gtt = ptr;
5999		DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
6000	}
6001
6002	return ptr;
6003}
6004
6005void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
6006{
6007	return kgem_bo_map__async(kgem, bo);
6008}
6009
6010void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
6011{
6012	struct drm_i915_gem_mmap mmap_arg;
6013	int err;
6014
6015	DBG(("%s(handle=%d, size=%d, map=%p:%p)\n",
6016	     __FUNCTION__, bo->handle, bytes(bo), bo->map__gtt, bo->map__cpu));
6017	assert(!bo->purged);
6018	assert(list_is_empty(&bo->list));
6019	assert(bo->proxy == NULL);
6020	assert_tiling(kgem, bo);
6021
6022	if (bo->map__cpu)
6023		return MAP(bo->map__cpu);
6024
6025	kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
6026
6027retry:
6028	VG_CLEAR(mmap_arg);
6029	mmap_arg.handle = bo->handle;
6030	mmap_arg.offset = 0;
6031	mmap_arg.size = bytes(bo);
6032	if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) {
6033		assert(err != EINVAL);
6034
6035		if (__kgem_throttle_retire(kgem, 0))
6036			goto retry;
6037
6038		if (kgem_cleanup_cache(kgem))
6039			goto retry;
6040
6041		ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n",
6042		     __FUNCTION__, bo->handle, bytes(bo), -err));
6043		return NULL;
6044	}
6045
6046	VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
6047
6048	DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
6049	return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr;
6050}
6051
6052uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
6053{
6054	struct drm_gem_flink flink;
6055
6056	VG_CLEAR(flink);
6057	flink.handle = bo->handle;
6058	if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink))
6059		return 0;
6060
6061	DBG(("%s: flinked handle=%d to name=%d, marking non-reusable\n",
6062	     __FUNCTION__, flink.handle, flink.name));
6063
6064	/* Ordinarily giving the name aware makes the buffer non-reusable.
6065	 * However, we track the lifetime of all clients and their hold
6066	 * on the buffer, and *presuming* they do not pass it on to a third
6067	 * party, we track the lifetime accurately.
6068	 */
6069	bo->reusable = false;
6070
6071	kgem_bo_unclean(kgem, bo);
6072
6073	return flink.name;
6074}
6075
6076struct kgem_bo *kgem_create_map(struct kgem *kgem,
6077				void *ptr, uint32_t size,
6078				bool read_only)
6079{
6080	struct kgem_bo *bo;
6081	uintptr_t first_page, last_page;
6082	uint32_t handle;
6083
6084	assert(MAP(ptr) == ptr);
6085
6086	DBG(("%s(%p size=%d, read-only?=%d) - has_userptr?=%d\n", __FUNCTION__,
6087	     ptr, size, read_only, kgem->has_userptr));
6088	if (!kgem->has_userptr)
6089		return NULL;
6090
6091	first_page = (uintptr_t)ptr;
6092	last_page = first_page + size + PAGE_SIZE - 1;
6093
6094	first_page &= ~(PAGE_SIZE-1);
6095	last_page &= ~(PAGE_SIZE-1);
6096	assert(last_page > first_page);
6097
6098	handle = gem_userptr(kgem->fd,
6099			     (void *)first_page, last_page-first_page,
6100			     read_only);
6101	if (handle == 0) {
6102		DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno));
6103		return NULL;
6104	}
6105
6106	bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE);
6107	if (bo == NULL) {
6108		gem_close(kgem->fd, handle);
6109		return NULL;
6110	}
6111
6112	bo->unique_id = kgem_get_unique_id(kgem);
6113	bo->snoop = !kgem->has_llc;
6114	debug_alloc__bo(kgem, bo);
6115
6116	if (first_page != (uintptr_t)ptr) {
6117		struct kgem_bo *proxy;
6118
6119		proxy = kgem_create_proxy(kgem, bo,
6120					  (uintptr_t)ptr - first_page, size);
6121		kgem_bo_destroy(kgem, bo);
6122		if (proxy == NULL)
6123			return NULL;
6124
6125		bo = proxy;
6126	}
6127
6128	bo->map__cpu = MAKE_USER_MAP(ptr);
6129
6130	DBG(("%s(ptr=%p, size=%d, pages=%d, read_only=%d) => handle=%d (proxy? %d)\n",
6131	     __FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle, bo->proxy != NULL));
6132	return bo;
6133}
6134
6135void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
6136{
6137	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
6138	assert(!bo->scanout);
6139	assert_tiling(kgem, bo);
6140
6141	kgem_bo_submit(kgem, bo);
6142
6143	/* SHM pixmaps use proxies for subpage offsets */
6144	assert(!bo->purged);
6145	while (bo->proxy)
6146		bo = bo->proxy;
6147	assert(!bo->purged);
6148
6149	if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
6150		struct drm_i915_gem_set_domain set_domain;
6151
6152		DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
6153		     __FUNCTION__, bo->handle,
6154		     bo->needs_flush, bo->domain,
6155		     __kgem_busy(kgem, bo->handle)));
6156
6157		VG_CLEAR(set_domain);
6158		set_domain.handle = bo->handle;
6159		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
6160		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
6161
6162		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
6163			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
6164			kgem_throttle(kgem);
6165		}
6166		kgem_bo_retire(kgem, bo);
6167		bo->domain = DOMAIN_CPU;
6168	}
6169}
6170
6171void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write)
6172{
6173	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
6174	assert(!bo->scanout || !write);
6175	assert_tiling(kgem, bo);
6176
6177	if (write || bo->needs_flush)
6178		kgem_bo_submit(kgem, bo);
6179
6180	/* SHM pixmaps use proxies for subpage offsets */
6181	assert(!bo->purged);
6182	assert(bo->refcnt);
6183	while (bo->proxy)
6184		bo = bo->proxy;
6185	assert(bo->refcnt);
6186	assert(!bo->purged);
6187
6188	if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
6189		struct drm_i915_gem_set_domain set_domain;
6190
6191		DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
6192		     __FUNCTION__, bo->handle,
6193		     bo->needs_flush, bo->domain,
6194		     __kgem_busy(kgem, bo->handle)));
6195
6196		VG_CLEAR(set_domain);
6197		set_domain.handle = bo->handle;
6198		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
6199		set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0;
6200
6201		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
6202			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
6203			kgem_throttle(kgem);
6204		}
6205		if (write) {
6206			kgem_bo_retire(kgem, bo);
6207			bo->domain = DOMAIN_CPU;
6208		} else {
6209			if (bo->exec == NULL)
6210				kgem_bo_maybe_retire(kgem, bo);
6211			bo->domain = DOMAIN_NONE;
6212		}
6213	}
6214}
6215
6216void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
6217{
6218	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
6219	assert(bo->refcnt);
6220	assert(bo->proxy == NULL);
6221	assert_tiling(kgem, bo);
6222
6223	kgem_bo_submit(kgem, bo);
6224
6225	if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
6226		struct drm_i915_gem_set_domain set_domain;
6227
6228		DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
6229		     __FUNCTION__, bo->handle,
6230		     bo->needs_flush, bo->domain,
6231		     __kgem_busy(kgem, bo->handle)));
6232
6233		VG_CLEAR(set_domain);
6234		set_domain.handle = bo->handle;
6235		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
6236		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
6237
6238		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
6239			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
6240			kgem_throttle(kgem);
6241		}
6242		kgem_bo_retire(kgem, bo);
6243		bo->domain = DOMAIN_GTT;
6244		bo->gtt_dirty = true;
6245	}
6246}
6247
6248void kgem_clear_dirty(struct kgem *kgem)
6249{
6250	struct list * const buffers = &kgem->next_request->buffers;
6251	struct kgem_bo *bo;
6252
6253	list_for_each_entry(bo, buffers, request) {
6254		if (!bo->gpu_dirty)
6255			break;
6256
6257		bo->gpu_dirty = false;
6258	}
6259}
6260
6261struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
6262				  struct kgem_bo *target,
6263				  int offset, int length)
6264{
6265	struct kgem_bo *bo;
6266
6267	DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
6268	     __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
6269	     offset, length, target->io));
6270
6271	bo = __kgem_bo_alloc(target->handle, length);
6272	if (bo == NULL)
6273		return NULL;
6274
6275	bo->unique_id = kgem_get_unique_id(kgem);
6276	bo->reusable = false;
6277	bo->size.bytes = length;
6278
6279	bo->io = target->io && target->proxy == NULL;
6280	bo->gpu_dirty = target->gpu_dirty;
6281	bo->tiling = target->tiling;
6282	bo->pitch = target->pitch;
6283	bo->flush = target->flush;
6284	bo->snoop = target->snoop;
6285
6286	assert(!bo->scanout);
6287	bo->proxy = kgem_bo_reference(target);
6288	bo->delta = offset;
6289
6290	/* Proxies are only tracked for busyness on the current rq */
6291	if (target->exec && !bo->io) {
6292		assert(RQ(target->rq) == kgem->next_request);
6293		list_move_tail(&bo->request, &kgem->next_request->buffers);
6294		bo->exec = &_kgem_dummy_exec;
6295		bo->rq = target->rq;
6296	}
6297
6298	return bo;
6299}
6300
6301static struct kgem_buffer *
6302buffer_alloc(void)
6303{
6304	struct kgem_buffer *bo;
6305
6306	bo = malloc(sizeof(*bo));
6307	if (bo == NULL)
6308		return NULL;
6309
6310	bo->mem = NULL;
6311	bo->need_io = false;
6312	bo->mmapped = MMAPPED_CPU;
6313
6314	return bo;
6315}
6316
6317static struct kgem_buffer *
6318buffer_alloc_with_data(int num_pages)
6319{
6320	struct kgem_buffer *bo;
6321
6322	bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE);
6323	if (bo == NULL)
6324		return NULL;
6325
6326	bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT);
6327	bo->mmapped = false;
6328	return bo;
6329}
6330
6331static inline bool
6332use_snoopable_buffer(struct kgem *kgem, uint32_t flags)
6333{
6334	if ((flags & KGEM_BUFFER_WRITE) == 0)
6335		return kgem->gen >= 030;
6336
6337	return true;
6338}
6339
6340static void
6341init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
6342{
6343	DBG(("%s: reusing handle=%d for buffer\n",
6344	     __FUNCTION__, old->handle));
6345
6346	assert(old->proxy == NULL);
6347
6348	memcpy(&bo->base, old, sizeof(*old));
6349	if (old->rq)
6350		list_replace(&old->request, &bo->base.request);
6351	else
6352		list_init(&bo->base.request);
6353	list_replace(&old->vma, &bo->base.vma);
6354	list_init(&bo->base.list);
6355	free(old);
6356
6357	assert(bo->base.tiling == I915_TILING_NONE);
6358
6359	bo->base.refcnt = 1;
6360}
6361
6362static struct kgem_buffer *
6363search_snoopable_buffer(struct kgem *kgem, unsigned alloc)
6364{
6365	struct kgem_buffer *bo;
6366	struct kgem_bo *old;
6367
6368	old = search_snoop_cache(kgem, alloc, 0);
6369	if (old) {
6370		if (!old->io) {
6371			bo = buffer_alloc();
6372			if (bo == NULL)
6373				return NULL;
6374
6375			init_buffer_from_bo(bo, old);
6376		} else {
6377			bo = (struct kgem_buffer *)old;
6378			bo->base.refcnt = 1;
6379		}
6380
6381		DBG(("%s: created CPU handle=%d for buffer, size %d\n",
6382		     __FUNCTION__, bo->base.handle, num_pages(&bo->base)));
6383
6384		assert(bo->base.snoop);
6385		assert(bo->base.tiling == I915_TILING_NONE);
6386		assert(num_pages(&bo->base) >= alloc);
6387		assert(bo->mmapped == MMAPPED_CPU);
6388		assert(bo->need_io == false);
6389
6390		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
6391		if (bo->mem == NULL) {
6392			bo->base.refcnt = 0;
6393			kgem_bo_free(kgem, &bo->base);
6394			bo = NULL;
6395		}
6396
6397		return bo;
6398	}
6399
6400	return NULL;
6401}
6402
6403static struct kgem_buffer *
6404create_snoopable_buffer(struct kgem *kgem, unsigned alloc)
6405{
6406	struct kgem_buffer *bo;
6407	uint32_t handle;
6408
6409	if (kgem->has_llc) {
6410		struct kgem_bo *old;
6411
6412		bo = buffer_alloc();
6413		if (bo == NULL)
6414			return NULL;
6415
6416		old = search_linear_cache(kgem, alloc,
6417					 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
6418		if (old) {
6419			init_buffer_from_bo(bo, old);
6420		} else {
6421			handle = gem_create(kgem->fd, alloc);
6422			if (handle == 0) {
6423				free(bo);
6424				return NULL;
6425			}
6426
6427			__kgem_bo_init(&bo->base, handle, alloc);
6428			debug_alloc__bo(kgem, &bo->base);
6429			DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n",
6430			     __FUNCTION__, bo->base.handle, alloc));
6431		}
6432
6433		assert(bo->base.refcnt == 1);
6434		assert(bo->mmapped == MMAPPED_CPU);
6435		assert(bo->need_io == false);
6436
6437		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
6438		if (bo->mem != NULL)
6439			return bo;
6440
6441		bo->base.refcnt = 0; /* for valgrind */
6442		kgem_bo_free(kgem, &bo->base);
6443	}
6444
6445	if (kgem->has_caching) {
6446		struct kgem_bo *old;
6447
6448		bo = buffer_alloc();
6449		if (bo == NULL)
6450			return NULL;
6451
6452		old = search_linear_cache(kgem, alloc,
6453					 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
6454		if (old) {
6455			init_buffer_from_bo(bo, old);
6456		} else {
6457			handle = gem_create(kgem->fd, alloc);
6458			if (handle == 0) {
6459				free(bo);
6460				return NULL;
6461			}
6462
6463			__kgem_bo_init(&bo->base, handle, alloc);
6464			debug_alloc__bo(kgem, &bo->base);
6465			DBG(("%s: created CPU handle=%d for buffer, size %d\n",
6466			     __FUNCTION__, bo->base.handle, alloc));
6467		}
6468
6469		assert(bo->base.refcnt == 1);
6470		assert(bo->mmapped == MMAPPED_CPU);
6471		assert(bo->need_io == false);
6472
6473		if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED))
6474			goto free_caching;
6475
6476		bo->base.snoop = true;
6477
6478		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
6479		if (bo->mem == NULL)
6480			goto free_caching;
6481
6482		return bo;
6483
6484free_caching:
6485		bo->base.refcnt = 0; /* for valgrind */
6486		kgem_bo_free(kgem, &bo->base);
6487	}
6488
6489	if (kgem->has_userptr) {
6490		bo = buffer_alloc();
6491		if (bo == NULL)
6492			return NULL;
6493
6494		//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
6495		if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) {
6496			free(bo);
6497			return NULL;
6498		}
6499
6500		handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false);
6501		if (handle == 0) {
6502			free(bo->mem);
6503			free(bo);
6504			return NULL;
6505		}
6506
6507		__kgem_bo_init(&bo->base, handle, alloc);
6508		debug_alloc__bo(kgem, &bo->base);
6509		DBG(("%s: created snoop handle=%d for buffer\n",
6510		     __FUNCTION__, bo->base.handle));
6511
6512		assert(bo->mmapped == MMAPPED_CPU);
6513		assert(bo->need_io == false);
6514
6515		bo->base.refcnt = 1;
6516		bo->base.snoop = true;
6517		bo->base.map__cpu = MAKE_USER_MAP(bo->mem);
6518
6519		return bo;
6520	}
6521
6522	return NULL;
6523}
6524
6525struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
6526				   uint32_t size, uint32_t flags,
6527				   void **ret)
6528{
6529	struct kgem_buffer *bo;
6530	unsigned offset, alloc;
6531	struct kgem_bo *old;
6532
6533	DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n",
6534	     __FUNCTION__, size, flags,
6535	     !!(flags & KGEM_BUFFER_WRITE),
6536	     !!(flags & KGEM_BUFFER_INPLACE),
6537	     !!(flags & KGEM_BUFFER_LAST)));
6538	assert(size);
6539	/* we should never be asked to create anything TOO large */
6540	assert(size <= kgem->max_object_size);
6541
6542#if !DBG_NO_UPLOAD_CACHE
6543	list_for_each_entry(bo, &kgem->batch_buffers, base.list) {
6544		assert(bo->base.io);
6545		assert(bo->base.refcnt >= 1);
6546
6547		/* We can reuse any write buffer which we can fit */
6548		if (flags == KGEM_BUFFER_LAST &&
6549		    bo->write == KGEM_BUFFER_WRITE &&
6550		    bo->base.refcnt == 1 &&
6551		    bo->mmapped == MMAPPED_NONE &&
6552		    size <= bytes(&bo->base)) {
6553			DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n",
6554			     __FUNCTION__, size, bo->used, bytes(&bo->base)));
6555			gem_write__cachealigned(kgem->fd, bo->base.handle,
6556						0, bo->used, bo->mem);
6557			assert(list_is_empty(&bo->base.vma));
6558			bo->need_io = 0;
6559			bo->write = 0;
6560			offset = 0;
6561			bo->used = size;
6562			goto done;
6563		}
6564
6565		if (flags & KGEM_BUFFER_WRITE) {
6566			if ((bo->write & KGEM_BUFFER_WRITE) == 0 ||
6567			    (((bo->write & ~flags) & KGEM_BUFFER_INPLACE) &&
6568			     !bo->base.snoop)) {
6569				DBG(("%s: skip write %x buffer, need %x\n",
6570				     __FUNCTION__, bo->write, flags));
6571				continue;
6572			}
6573			assert(bo->mmapped || bo->need_io);
6574		} else {
6575			if (bo->write & KGEM_BUFFER_WRITE) {
6576				DBG(("%s: skip write %x buffer, need %x\n",
6577				     __FUNCTION__, bo->write, flags));
6578				continue;
6579			}
6580		}
6581
6582		if (bo->used + size <= bytes(&bo->base)) {
6583			DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
6584			     __FUNCTION__, bo->used, size, bytes(&bo->base)));
6585			offset = bo->used;
6586			bo->used += size;
6587			goto done;
6588		}
6589	}
6590
6591	if (flags & KGEM_BUFFER_WRITE) {
6592		list_for_each_entry(bo, &kgem->active_buffers, base.list) {
6593			assert(bo->base.io);
6594			assert(bo->base.refcnt >= 1);
6595			assert(bo->base.exec == NULL);
6596			assert(bo->mmapped);
6597			assert(bo->mmapped == MMAPPED_GTT || kgem->has_llc || bo->base.snoop);
6598
6599			if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE && !bo->base.snoop) {
6600				DBG(("%s: skip write %x buffer, need %x\n",
6601				     __FUNCTION__, bo->write, flags));
6602				continue;
6603			}
6604
6605			if (bo->used + size <= bytes(&bo->base)) {
6606				DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
6607				     __FUNCTION__, bo->used, size, bytes(&bo->base)));
6608				offset = bo->used;
6609				bo->used += size;
6610				list_move(&bo->base.list, &kgem->batch_buffers);
6611				goto done;
6612			}
6613
6614			if (bo->base.refcnt == 1 &&
6615			    size <= bytes(&bo->base) &&
6616			    (bo->base.rq == NULL ||
6617			     !__kgem_busy(kgem, bo->base.handle))) {
6618				DBG(("%s: reusing whole buffer? size=%d, total=%d\n",
6619				     __FUNCTION__, size, bytes(&bo->base)));
6620				__kgem_bo_clear_busy(&bo->base);
6621				assert(list_is_empty(&bo->base.vma));
6622
6623				switch (bo->mmapped) {
6624				case MMAPPED_CPU:
6625					kgem_bo_sync__cpu(kgem, &bo->base);
6626					break;
6627				case MMAPPED_GTT:
6628					kgem_bo_sync__gtt(kgem, &bo->base);
6629					break;
6630				}
6631
6632				offset = 0;
6633				bo->used = size;
6634				list_move(&bo->base.list, &kgem->batch_buffers);
6635				goto done;
6636			}
6637		}
6638	}
6639#endif
6640
6641#if !DBG_NO_MAP_UPLOAD
6642	/* Be a little more generous and hope to hold fewer mmappings */
6643	alloc = ALIGN(2*size, kgem->buffer_size);
6644	if (alloc > MAX_CACHE_SIZE)
6645		alloc = ALIGN(size, kgem->buffer_size);
6646	if (alloc > MAX_CACHE_SIZE)
6647		alloc = PAGE_ALIGN(size);
6648	assert(alloc);
6649
6650	alloc /= PAGE_SIZE;
6651	if (alloc > kgem->aperture_mappable / 4)
6652		flags &= ~KGEM_BUFFER_INPLACE;
6653
6654	if (kgem->has_llc &&
6655	    (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) {
6656		bo = buffer_alloc();
6657		if (bo == NULL)
6658			goto skip_llc;
6659
6660		old = NULL;
6661		if ((flags & KGEM_BUFFER_WRITE) == 0)
6662			old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP);
6663		if (old == NULL)
6664			old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP);
6665		if (old == NULL)
6666			old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP);
6667		if (old) {
6668			DBG(("%s: found LLC handle=%d for buffer\n",
6669			     __FUNCTION__, old->handle));
6670
6671			init_buffer_from_bo(bo, old);
6672		} else {
6673			uint32_t handle = gem_create(kgem->fd, alloc);
6674			if (handle == 0) {
6675				free(bo);
6676				goto skip_llc;
6677			}
6678			__kgem_bo_init(&bo->base, handle, alloc);
6679			debug_alloc__bo(kgem, &bo->base);
6680			DBG(("%s: created LLC handle=%d for buffer\n",
6681			     __FUNCTION__, bo->base.handle));
6682		}
6683
6684		assert(bo->mmapped);
6685		assert(!bo->need_io);
6686
6687		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
6688		if (bo->mem) {
6689			if (flags & KGEM_BUFFER_WRITE)
6690				kgem_bo_sync__cpu(kgem, &bo->base);
6691			flags &= ~KGEM_BUFFER_INPLACE;
6692			goto init;
6693		} else {
6694			bo->base.refcnt = 0; /* for valgrind */
6695			kgem_bo_free(kgem, &bo->base);
6696		}
6697	}
6698skip_llc:
6699
6700	if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) {
6701		/* The issue with using a GTT upload buffer is that we may
6702		 * cause eviction-stalls in order to free up some GTT space.
6703		 * An is-mappable? ioctl could help us detect when we are
6704		 * about to block, or some per-page magic in the kernel.
6705		 *
6706		 * XXX This is especially noticeable on memory constrained
6707		 * devices like gen2 or with relatively slow gpu like i3.
6708		 */
6709		DBG(("%s: searching for an inactive GTT map for upload\n",
6710		     __FUNCTION__));
6711		old = search_linear_cache(kgem, alloc,
6712					  CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
6713#if HAVE_I915_GEM_BUFFER_INFO
6714		if (old) {
6715			struct drm_i915_gem_buffer_info info;
6716
6717			/* An example of such a non-blocking ioctl might work */
6718
6719			VG_CLEAR(info);
6720			info.handle = handle;
6721			if (do_ioctl(kgem->fd,
6722				     DRM_IOCTL_I915_GEM_BUFFER_INFO,
6723				     &fino) == 0) {
6724				old->presumed_offset = info.addr;
6725				if ((info.flags & I915_GEM_MAPPABLE) == 0) {
6726					kgem_bo_move_to_inactive(kgem, old);
6727					old = NULL;
6728				}
6729			}
6730		}
6731#endif
6732		if (old == NULL)
6733			old = search_linear_cache(kgem, NUM_PAGES(size),
6734						  CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
6735		if (old == NULL) {
6736			old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
6737			if (old && !kgem_bo_can_map(kgem, old)) {
6738				_kgem_bo_destroy(kgem, old);
6739				old = NULL;
6740			}
6741		}
6742		if (old) {
6743			DBG(("%s: reusing handle=%d for buffer\n",
6744			     __FUNCTION__, old->handle));
6745			assert(kgem_bo_can_map(kgem, old));
6746			assert(!old->snoop);
6747			assert(old->rq == NULL);
6748
6749			bo = buffer_alloc();
6750			if (bo == NULL)
6751				return NULL;
6752
6753			init_buffer_from_bo(bo, old);
6754			assert(num_pages(&bo->base) >= NUM_PAGES(size));
6755
6756			assert(bo->mmapped);
6757			assert(bo->base.refcnt == 1);
6758
6759			bo->mem = kgem_bo_map(kgem, &bo->base);
6760			if (bo->mem) {
6761				if (bo->mem == MAP(bo->base.map__cpu))
6762					flags &= ~KGEM_BUFFER_INPLACE;
6763				else
6764					bo->mmapped = MMAPPED_GTT;
6765				goto init;
6766			} else {
6767				bo->base.refcnt = 0;
6768				kgem_bo_free(kgem, &bo->base);
6769			}
6770		}
6771	}
6772#else
6773	flags &= ~KGEM_BUFFER_INPLACE;
6774#endif
6775	/* Be more parsimonious with pwrite/pread/cacheable buffers */
6776	if ((flags & KGEM_BUFFER_INPLACE) == 0)
6777		alloc = NUM_PAGES(size);
6778
6779	if (use_snoopable_buffer(kgem, flags)) {
6780		bo = search_snoopable_buffer(kgem, alloc);
6781		if (bo) {
6782			if (flags & KGEM_BUFFER_WRITE)
6783				kgem_bo_sync__cpu(kgem, &bo->base);
6784			flags &= ~KGEM_BUFFER_INPLACE;
6785			goto init;
6786		}
6787
6788		if ((flags & KGEM_BUFFER_INPLACE) == 0) {
6789			bo = create_snoopable_buffer(kgem, alloc);
6790			if (bo)
6791				goto init;
6792		}
6793	}
6794
6795	flags &= ~KGEM_BUFFER_INPLACE;
6796
6797	old = NULL;
6798	if ((flags & KGEM_BUFFER_WRITE) == 0)
6799		old = search_linear_cache(kgem, alloc, 0);
6800	if (old == NULL)
6801		old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
6802	if (old) {
6803		DBG(("%s: reusing ordinary handle %d for io\n",
6804		     __FUNCTION__, old->handle));
6805		bo = buffer_alloc_with_data(num_pages(old));
6806		if (bo == NULL)
6807			return NULL;
6808
6809		init_buffer_from_bo(bo, old);
6810		bo->need_io = flags & KGEM_BUFFER_WRITE;
6811	} else {
6812		unsigned hint;
6813
6814		if (use_snoopable_buffer(kgem, flags)) {
6815			bo = create_snoopable_buffer(kgem, alloc);
6816			if (bo)
6817				goto init;
6818		}
6819
6820		bo = buffer_alloc();
6821		if (bo == NULL)
6822			return NULL;
6823
6824		hint = CREATE_INACTIVE;
6825		if (flags & KGEM_BUFFER_WRITE)
6826			hint |= CREATE_CPU_MAP;
6827		old = search_linear_cache(kgem, alloc, hint);
6828		if (old) {
6829			DBG(("%s: reusing handle=%d for buffer\n",
6830			     __FUNCTION__, old->handle));
6831
6832			init_buffer_from_bo(bo, old);
6833		} else {
6834			uint32_t handle = gem_create(kgem->fd, alloc);
6835			if (handle == 0) {
6836				free(bo);
6837				return NULL;
6838			}
6839
6840			DBG(("%s: created handle=%d for buffer\n",
6841			     __FUNCTION__, handle));
6842
6843			__kgem_bo_init(&bo->base, handle, alloc);
6844			debug_alloc__bo(kgem, &bo->base);
6845		}
6846
6847		assert(bo->mmapped);
6848		assert(!bo->need_io);
6849		assert(bo->base.refcnt == 1);
6850
6851		if (flags & KGEM_BUFFER_WRITE) {
6852			bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
6853			if (bo->mem != NULL) {
6854				kgem_bo_sync__cpu(kgem, &bo->base);
6855				goto init;
6856			}
6857		}
6858
6859		DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__));
6860		old = &bo->base;
6861		bo = buffer_alloc_with_data(num_pages(old));
6862		if (bo == NULL) {
6863			old->refcnt= 0;
6864			kgem_bo_free(kgem, old);
6865			return NULL;
6866		}
6867
6868		init_buffer_from_bo(bo, old);
6869
6870		assert(bo->mem);
6871		assert(!bo->mmapped);
6872		assert(bo->base.refcnt == 1);
6873
6874		bo->need_io = flags & KGEM_BUFFER_WRITE;
6875	}
6876init:
6877	bo->base.io = true;
6878	assert(bo->base.refcnt == 1);
6879	assert(num_pages(&bo->base) >= NUM_PAGES(size));
6880	assert(!bo->need_io || !bo->base.needs_flush);
6881	assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
6882	assert(bo->mem);
6883	assert(bo->mmapped != MMAPPED_GTT || MAP(bo->base.map__gtt) == bo->mem);
6884	assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem);
6885
6886	bo->used = size;
6887	bo->write = flags & KGEM_BUFFER_WRITE_INPLACE;
6888	offset = 0;
6889
6890	assert(list_is_empty(&bo->base.list));
6891	list_add(&bo->base.list, &kgem->batch_buffers);
6892
6893	DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n",
6894	     __FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write));
6895
6896done:
6897	bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT);
6898	assert(bo->used && bo->used <= bytes(&bo->base));
6899	assert(bo->mem);
6900	*ret = (char *)bo->mem + offset;
6901	return kgem_create_proxy(kgem, &bo->base, offset, size);
6902}
6903
6904bool kgem_buffer_is_inplace(struct kgem_bo *_bo)
6905{
6906	struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy;
6907	return bo->write & KGEM_BUFFER_WRITE_INPLACE;
6908}
6909
6910struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
6911				      int width, int height, int bpp,
6912				      uint32_t flags,
6913				      void **ret)
6914{
6915	struct kgem_bo *bo;
6916	int stride;
6917
6918	assert(width > 0 && height > 0);
6919	assert(ret != NULL);
6920	stride = ALIGN(width, 2) * bpp >> 3;
6921	stride = ALIGN(stride, 4);
6922
6923	DBG(("%s: %dx%d, %d bpp, stride=%d\n",
6924	     __FUNCTION__, width, height, bpp, stride));
6925
6926	bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret);
6927	if (bo == NULL) {
6928		DBG(("%s: allocation failure for upload buffer\n",
6929		     __FUNCTION__));
6930		return NULL;
6931	}
6932	assert(*ret != NULL);
6933	assert(bo->proxy != NULL);
6934
6935	if (height & 1) {
6936		struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
6937		int min;
6938
6939		assert(io->used);
6940
6941		/* Having padded this surface to ensure that accesses to
6942		 * the last pair of rows is valid, remove the padding so
6943		 * that it can be allocated to other pixmaps.
6944		 */
6945		min = bo->delta + height * stride;
6946		min = ALIGN(min, UPLOAD_ALIGNMENT);
6947		if (io->used != min) {
6948			DBG(("%s: trimming buffer from %d to %d\n",
6949			     __FUNCTION__, io->used, min));
6950			io->used = min;
6951		}
6952		bo->size.bytes -= stride;
6953	}
6954
6955	bo->map__cpu = *ret;
6956	bo->pitch = stride;
6957	bo->unique_id = kgem_get_unique_id(kgem);
6958	return bo;
6959}
6960
6961struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
6962					 const void *data,
6963					 const BoxRec *box,
6964					 int stride, int bpp)
6965{
6966	int width  = box->x2 - box->x1;
6967	int height = box->y2 - box->y1;
6968	struct kgem_bo *bo;
6969	void *dst;
6970
6971	if (!kgem_can_create_2d(kgem, width, height, bpp))
6972		return NULL;
6973
6974	DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n",
6975	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp));
6976
6977	assert(data);
6978	assert(width > 0);
6979	assert(height > 0);
6980	assert(stride);
6981	assert(bpp);
6982
6983	bo = kgem_create_buffer_2d(kgem,
6984				   width, height, bpp,
6985				   KGEM_BUFFER_WRITE_INPLACE, &dst);
6986	if (bo == NULL)
6987		return NULL;
6988
6989	if (sigtrap_get()) {
6990		kgem_bo_destroy(kgem, bo);
6991		return NULL;
6992	}
6993
6994	memcpy_blt(data, dst, bpp,
6995		   stride, bo->pitch,
6996		   box->x1, box->y1,
6997		   0, 0,
6998		   width, height);
6999
7000	sigtrap_put();
7001	return bo;
7002}
7003
7004void kgem_proxy_bo_attach(struct kgem_bo *bo,
7005			  struct kgem_bo **ptr)
7006{
7007	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
7008	assert(bo->map__gtt == NULL);
7009	assert(bo->proxy);
7010	list_add(&bo->vma, &bo->proxy->vma);
7011	bo->map__gtt = ptr;
7012	*ptr = kgem_bo_reference(bo);
7013}
7014
7015void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
7016{
7017	struct kgem_buffer *bo;
7018	uint32_t offset = _bo->delta, length = _bo->size.bytes;
7019
7020	/* We expect the caller to have already submitted the batch */
7021	assert(_bo->io);
7022	assert(_bo->exec == NULL);
7023	assert(_bo->rq == NULL);
7024	assert(_bo->proxy);
7025
7026	_bo = _bo->proxy;
7027	assert(_bo->proxy == NULL);
7028	assert(_bo->exec == NULL);
7029
7030	bo = (struct kgem_buffer *)_bo;
7031
7032	DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__,
7033	     offset, length, bo->base.snoop));
7034
7035	if (bo->mmapped) {
7036		struct drm_i915_gem_set_domain set_domain;
7037
7038		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n",
7039		     __FUNCTION__,
7040		     bo->base.needs_flush,
7041		     bo->base.domain,
7042		     __kgem_busy(kgem, bo->base.handle)));
7043
7044		assert(bo->mmapped == MMAPPED_GTT || bo->base.snoop || kgem->has_llc);
7045
7046		VG_CLEAR(set_domain);
7047		set_domain.handle = bo->base.handle;
7048		set_domain.write_domain = 0;
7049		set_domain.read_domains =
7050			bo->mmapped == MMAPPED_CPU ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
7051
7052		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
7053			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
7054			kgem_throttle(kgem);
7055		}
7056	} else {
7057		if (gem_read(kgem->fd,
7058			     bo->base.handle, (char *)bo->mem+offset,
7059			     offset, length))
7060			return;
7061	}
7062	kgem_bo_maybe_retire(kgem, &bo->base);
7063	bo->base.domain = DOMAIN_NONE;
7064}
7065
7066uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
7067{
7068	struct kgem_bo_binding *b;
7069
7070	assert(bo->refcnt);
7071
7072	for (b = &bo->binding; b && b->offset; b = b->next)
7073		if (format == b->format)
7074			return b->offset;
7075
7076	return 0;
7077}
7078
7079void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
7080{
7081	struct kgem_bo_binding *b;
7082
7083	assert(bo->refcnt);
7084
7085	for (b = &bo->binding; b; b = b->next) {
7086		if (b->offset)
7087			continue;
7088
7089		b->offset = offset;
7090		b->format = format;
7091
7092		if (b->next)
7093			b->next->offset = 0;
7094
7095		return;
7096	}
7097
7098	b = malloc(sizeof(*b));
7099	if (b) {
7100		b->next = bo->binding.next;
7101		b->format = format;
7102		b->offset = offset;
7103		bo->binding.next = b;
7104	}
7105}
7106
7107struct kgem_bo *
7108kgem_replace_bo(struct kgem *kgem,
7109		struct kgem_bo *src,
7110		uint32_t width,
7111		uint32_t height,
7112		uint32_t pitch,
7113		uint32_t bpp)
7114{
7115	struct kgem_bo *dst;
7116	uint32_t br00, br13;
7117	uint32_t handle;
7118	uint32_t size;
7119	uint32_t *b;
7120
7121	DBG(("%s: replacing bo handle=%d, size=%dx%d pitch=%d, with pitch=%d\n",
7122	     __FUNCTION__, src->handle,  width, height, src->pitch, pitch));
7123
7124	/* We only expect to be called to fixup small buffers, hence why
7125	 * we only attempt to allocate a linear bo.
7126	 */
7127	assert(src->tiling == I915_TILING_NONE);
7128	assert(kgem_bo_can_blt(kgem, src));
7129
7130	size = height * pitch;
7131	size = NUM_PAGES(size);
7132
7133	dst = search_linear_cache(kgem, size, 0);
7134	if (dst == NULL)
7135		dst = search_linear_cache(kgem, size, CREATE_INACTIVE);
7136	if (dst == NULL) {
7137		handle = gem_create(kgem->fd, size);
7138		if (handle == 0)
7139			return NULL;
7140
7141		dst = __kgem_bo_alloc(handle, size);
7142		if (dst == NULL) {
7143			gem_close(kgem->fd, handle);
7144			return NULL;
7145		}
7146
7147		debug_alloc__bo(kgem, dst);
7148	}
7149	dst->pitch = pitch;
7150	dst->unique_id = kgem_get_unique_id(kgem);
7151	dst->refcnt = 1;
7152	assert(dst->tiling == I915_TILING_NONE);
7153	assert(kgem_bo_can_blt(kgem, dst));
7154
7155	kgem_set_mode(kgem, KGEM_BLT, dst);
7156	if (!kgem_check_batch(kgem, 10) ||
7157	    !kgem_check_reloc(kgem, 2) ||
7158	    !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
7159		kgem_submit(kgem);
7160		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
7161			kgem_bo_destroy(kgem, dst);
7162			return NULL;
7163		}
7164		_kgem_set_mode(kgem, KGEM_BLT);
7165	}
7166
7167	br00 = XY_SRC_COPY_BLT_CMD;
7168	br13 = pitch;
7169	pitch = src->pitch;
7170	if (kgem->gen >= 040 && src->tiling) {
7171		br00 |= BLT_SRC_TILED;
7172		pitch >>= 2;
7173	}
7174
7175	br13 |= 0xcc << 16;
7176	switch (bpp) {
7177	default:
7178	case 32: br00 |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
7179		 br13 |= 1 << 25; /* RGB8888 */
7180	case 16: br13 |= 1 << 24; /* RGB565 */
7181	case 8: break;
7182	}
7183
7184	b = kgem->batch + kgem->nbatch;
7185	if (kgem->gen >= 0100) {
7186		b[0] = br00 | 8;
7187		b[1] = br13;
7188		b[2] = 0;
7189		b[3] = height << 16 | width;
7190		*(uint64_t *)(b+4) =
7191			kgem_add_reloc64(kgem, kgem->nbatch + 4, dst,
7192					 I915_GEM_DOMAIN_RENDER << 16 |
7193					 I915_GEM_DOMAIN_RENDER |
7194					 KGEM_RELOC_FENCED,
7195					 0);
7196		b[6] = 0;
7197		b[7] = pitch;
7198		*(uint64_t *)(b+8) =
7199			kgem_add_reloc64(kgem, kgem->nbatch + 8, src,
7200					 I915_GEM_DOMAIN_RENDER << 16 |
7201					 KGEM_RELOC_FENCED,
7202					 0);
7203		kgem->nbatch += 10;
7204	} else {
7205		b[0] = br00 | 6;
7206		b[1] = br13;
7207		b[2] = 0;
7208		b[3] = height << 16 | width;
7209		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst,
7210				      I915_GEM_DOMAIN_RENDER << 16 |
7211				      I915_GEM_DOMAIN_RENDER |
7212				      KGEM_RELOC_FENCED,
7213				      0);
7214		b[5] = 0;
7215		b[6] = pitch;
7216		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src,
7217				      I915_GEM_DOMAIN_RENDER << 16 |
7218				      KGEM_RELOC_FENCED,
7219				      0);
7220		kgem->nbatch += 8;
7221	}
7222
7223	return dst;
7224}
7225
7226bool kgem_bo_convert_to_gpu(struct kgem *kgem,
7227			    struct kgem_bo *bo,
7228			    unsigned flags)
7229{
7230	DBG(("%s: converting handle=%d from CPU to GPU, flags=%x\n", __FUNCTION__, bo->handle));
7231	assert(bo->tiling == I915_TILING_NONE);
7232
7233	if (kgem->has_llc)
7234		return true;
7235
7236	if (flags & MOVE_ASYNC_HINT && __kgem_bo_is_busy(kgem, bo))
7237		return false;
7238
7239	assert(bo->snoop);
7240
7241	kgem_bo_submit(kgem, bo);
7242
7243	if (!gem_set_caching(kgem->fd, bo->handle, UNCACHED))
7244		return false;
7245
7246	bo->snoop = false;
7247	return true;
7248}
7249