intel_bufmgr_gem.c revision 3b115362
1/**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30/*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 *	    Eric Anholt <eric@anholt.net>
34 *	    Dave Airlie <airlied@linux.ie>
35 */
36
37#include <xf86drm.h>
38#include <xf86atomic.h>
39#include <fcntl.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44#include <assert.h>
45#include <pthread.h>
46#include <sys/ioctl.h>
47#include <sys/stat.h>
48#include <sys/types.h>
49#include <stdbool.h>
50
51#include "errno.h"
52#ifndef ETIME
53#define ETIME ETIMEDOUT
54#endif
55#include "libdrm_macros.h"
56#include "libdrm_lists.h"
57#include "intel_bufmgr.h"
58#include "intel_bufmgr_priv.h"
59#include "intel_chipset.h"
60#include "string.h"
61
62#include "i915_drm.h"
63#include "uthash.h"
64
65#if HAVE_VALGRIND
66#include <valgrind.h>
67#include <memcheck.h>
68#define VG(x) x
69#else
70#define VG(x)
71#endif
72
73#define memclear(s) memset(&s, 0, sizeof(s))
74
75#define DBG(...) do {					\
76	if (bufmgr_gem->bufmgr.debug)			\
77		fprintf(stderr, __VA_ARGS__);		\
78} while (0)
79
80#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
81#define MAX2(A, B) ((A) > (B) ? (A) : (B))
82
83/**
84 * upper_32_bits - return bits 32-63 of a number
85 * @n: the number we're accessing
86 *
87 * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
88 * the "right shift count >= width of type" warning when that quantity is
89 * 32-bits.
90 */
91#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
92
93/**
94 * lower_32_bits - return bits 0-31 of a number
95 * @n: the number we're accessing
96 */
97#define lower_32_bits(n) ((__u32)(n))
98
99typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
100
101struct drm_intel_gem_bo_bucket {
102	drmMMListHead head;
103	unsigned long size;
104};
105
106typedef struct _drm_intel_bufmgr_gem {
107	drm_intel_bufmgr bufmgr;
108
109	atomic_t refcount;
110
111	int fd;
112
113	int max_relocs;
114
115	pthread_mutex_t lock;
116
117	struct drm_i915_gem_exec_object2 *exec2_objects;
118	drm_intel_bo **exec_bos;
119	int exec_size;
120	int exec_count;
121
122	/** Array of lists of cached gem objects of power-of-two sizes */
123	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
124	int num_buckets;
125	time_t time;
126
127	drmMMListHead managers;
128
129	drm_intel_bo_gem *name_table;
130	drm_intel_bo_gem *handle_table;
131
132	drmMMListHead vma_cache;
133	int vma_count, vma_open, vma_max;
134
135	uint64_t gtt_size;
136	int available_fences;
137	int pci_device;
138	int gen;
139	unsigned int has_bsd : 1;
140	unsigned int has_blt : 1;
141	unsigned int has_relaxed_fencing : 1;
142	unsigned int has_llc : 1;
143	unsigned int has_wait_timeout : 1;
144	unsigned int bo_reuse : 1;
145	unsigned int no_exec : 1;
146	unsigned int has_vebox : 1;
147	unsigned int has_exec_async : 1;
148	bool fenced_relocs;
149
150	struct {
151		void *ptr;
152		uint32_t handle;
153	} userptr_active;
154
155} drm_intel_bufmgr_gem;
156
157#define DRM_INTEL_RELOC_FENCE (1<<0)
158
159typedef struct _drm_intel_reloc_target_info {
160	drm_intel_bo *bo;
161	int flags;
162} drm_intel_reloc_target;
163
164struct _drm_intel_bo_gem {
165	drm_intel_bo bo;
166
167	atomic_t refcount;
168	uint32_t gem_handle;
169	const char *name;
170
171	/**
172	 * Kenel-assigned global name for this object
173         *
174         * List contains both flink named and prime fd'd objects
175	 */
176	unsigned int global_name;
177
178	UT_hash_handle handle_hh;
179	UT_hash_handle name_hh;
180
181	/**
182	 * Index of the buffer within the validation list while preparing a
183	 * batchbuffer execution.
184	 */
185	int validate_index;
186
187	/**
188	 * Current tiling mode
189	 */
190	uint32_t tiling_mode;
191	uint32_t swizzle_mode;
192	unsigned long stride;
193
194	unsigned long kflags;
195
196	time_t free_time;
197
198	/** Array passed to the DRM containing relocation information. */
199	struct drm_i915_gem_relocation_entry *relocs;
200	/**
201	 * Array of info structs corresponding to relocs[i].target_handle etc
202	 */
203	drm_intel_reloc_target *reloc_target_info;
204	/** Number of entries in relocs */
205	int reloc_count;
206	/** Array of BOs that are referenced by this buffer and will be softpinned */
207	drm_intel_bo **softpin_target;
208	/** Number softpinned BOs that are referenced by this buffer */
209	int softpin_target_count;
210	/** Maximum amount of softpinned BOs that are referenced by this buffer */
211	int softpin_target_size;
212
213	/** Mapped address for the buffer, saved across map/unmap cycles */
214	void *mem_virtual;
215	/** GTT virtual address for the buffer, saved across map/unmap cycles */
216	void *gtt_virtual;
217	/** WC CPU address for the buffer, saved across map/unmap cycles */
218	void *wc_virtual;
219	/**
220	 * Virtual address of the buffer allocated by user, used for userptr
221	 * objects only.
222	 */
223	void *user_virtual;
224	int map_count;
225	drmMMListHead vma_list;
226
227	/** BO cache list */
228	drmMMListHead head;
229
230	/**
231	 * Boolean of whether this BO and its children have been included in
232	 * the current drm_intel_bufmgr_check_aperture_space() total.
233	 */
234	bool included_in_check_aperture;
235
236	/**
237	 * Boolean of whether this buffer has been used as a relocation
238	 * target and had its size accounted for, and thus can't have any
239	 * further relocations added to it.
240	 */
241	bool used_as_reloc_target;
242
243	/**
244	 * Boolean of whether we have encountered an error whilst building the relocation tree.
245	 */
246	bool has_error;
247
248	/**
249	 * Boolean of whether this buffer can be re-used
250	 */
251	bool reusable;
252
253	/**
254	 * Boolean of whether the GPU is definitely not accessing the buffer.
255	 *
256	 * This is only valid when reusable, since non-reusable
257	 * buffers are those that have been shared with other
258	 * processes, so we don't know their state.
259	 */
260	bool idle;
261
262	/**
263	 * Boolean of whether this buffer was allocated with userptr
264	 */
265	bool is_userptr;
266
267	/**
268	 * Size in bytes of this buffer and its relocation descendents.
269	 *
270	 * Used to avoid costly tree walking in
271	 * drm_intel_bufmgr_check_aperture in the common case.
272	 */
273	int reloc_tree_size;
274
275	/**
276	 * Number of potential fence registers required by this buffer and its
277	 * relocations.
278	 */
279	int reloc_tree_fences;
280
281	/** Flags that we may need to do the SW_FINISH ioctl on unmap. */
282	bool mapped_cpu_write;
283};
284
285static unsigned int
286drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
287
288static unsigned int
289drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
290
291static int
292drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
293			    uint32_t * swizzle_mode);
294
295static int
296drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
297				     uint32_t tiling_mode,
298				     uint32_t stride);
299
300static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
301						      time_t time);
302
303static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
304
305static void drm_intel_gem_bo_free(drm_intel_bo *bo);
306
307static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo)
308{
309        return (drm_intel_bo_gem *)bo;
310}
311
312static unsigned long
313drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
314			   uint32_t *tiling_mode)
315{
316	unsigned long min_size, max_size;
317	unsigned long i;
318
319	if (*tiling_mode == I915_TILING_NONE)
320		return size;
321
322	/* 965+ just need multiples of page size for tiling */
323	if (bufmgr_gem->gen >= 4)
324		return ROUND_UP_TO(size, 4096);
325
326	/* Older chips need powers of two, of at least 512k or 1M */
327	if (bufmgr_gem->gen == 3) {
328		min_size = 1024*1024;
329		max_size = 128*1024*1024;
330	} else {
331		min_size = 512*1024;
332		max_size = 64*1024*1024;
333	}
334
335	if (size > max_size) {
336		*tiling_mode = I915_TILING_NONE;
337		return size;
338	}
339
340	/* Do we need to allocate every page for the fence? */
341	if (bufmgr_gem->has_relaxed_fencing)
342		return ROUND_UP_TO(size, 4096);
343
344	for (i = min_size; i < size; i <<= 1)
345		;
346
347	return i;
348}
349
350/*
351 * Round a given pitch up to the minimum required for X tiling on a
352 * given chip.  We use 512 as the minimum to allow for a later tiling
353 * change.
354 */
355static unsigned long
356drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
357			    unsigned long pitch, uint32_t *tiling_mode)
358{
359	unsigned long tile_width;
360	unsigned long i;
361
362	/* If untiled, then just align it so that we can do rendering
363	 * to it with the 3D engine.
364	 */
365	if (*tiling_mode == I915_TILING_NONE)
366		return ALIGN(pitch, 64);
367
368	if (*tiling_mode == I915_TILING_X
369			|| (IS_915(bufmgr_gem->pci_device)
370			    && *tiling_mode == I915_TILING_Y))
371		tile_width = 512;
372	else
373		tile_width = 128;
374
375	/* 965 is flexible */
376	if (bufmgr_gem->gen >= 4)
377		return ROUND_UP_TO(pitch, tile_width);
378
379	/* The older hardware has a maximum pitch of 8192 with tiled
380	 * surfaces, so fallback to untiled if it's too large.
381	 */
382	if (pitch > 8192) {
383		*tiling_mode = I915_TILING_NONE;
384		return ALIGN(pitch, 64);
385	}
386
387	/* Pre-965 needs power of two tile width */
388	for (i = tile_width; i < pitch; i <<= 1)
389		;
390
391	return i;
392}
393
394static struct drm_intel_gem_bo_bucket *
395drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
396				 unsigned long size)
397{
398	int i;
399
400	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
401		struct drm_intel_gem_bo_bucket *bucket =
402		    &bufmgr_gem->cache_bucket[i];
403		if (bucket->size >= size) {
404			return bucket;
405		}
406	}
407
408	return NULL;
409}
410
411static void
412drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
413{
414	int i, j;
415
416	for (i = 0; i < bufmgr_gem->exec_count; i++) {
417		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
418		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
419
420		if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
421			DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
422			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
423			    bo_gem->name);
424			continue;
425		}
426
427		for (j = 0; j < bo_gem->reloc_count; j++) {
428			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
429			drm_intel_bo_gem *target_gem =
430			    (drm_intel_bo_gem *) target_bo;
431
432			DBG("%2d: %d %s(%s)@0x%08x %08x -> "
433			    "%d (%s)@0x%08x %08x + 0x%08x\n",
434			    i,
435			    bo_gem->gem_handle,
436			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
437			    bo_gem->name,
438			    upper_32_bits(bo_gem->relocs[j].offset),
439			    lower_32_bits(bo_gem->relocs[j].offset),
440			    target_gem->gem_handle,
441			    target_gem->name,
442			    upper_32_bits(target_bo->offset64),
443			    lower_32_bits(target_bo->offset64),
444			    bo_gem->relocs[j].delta);
445		}
446
447		for (j = 0; j < bo_gem->softpin_target_count; j++) {
448			drm_intel_bo *target_bo = bo_gem->softpin_target[j];
449			drm_intel_bo_gem *target_gem =
450			    (drm_intel_bo_gem *) target_bo;
451			DBG("%2d: %d %s(%s) -> "
452			    "%d *(%s)@0x%08x %08x\n",
453			    i,
454			    bo_gem->gem_handle,
455			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
456			    bo_gem->name,
457			    target_gem->gem_handle,
458			    target_gem->name,
459			    upper_32_bits(target_bo->offset64),
460			    lower_32_bits(target_bo->offset64));
461		}
462	}
463}
464
465static inline void
466drm_intel_gem_bo_reference(drm_intel_bo *bo)
467{
468	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
469
470	atomic_inc(&bo_gem->refcount);
471}
472
473/**
474 * Adds the given buffer to the list of buffers to be validated (moved into the
475 * appropriate memory type) with the next batch submission.
476 *
477 * If a buffer is validated multiple times in a batch submission, it ends up
478 * with the intersection of the memory type flags and the union of the
479 * access flags.
480 */
481static void
482drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
483{
484	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
485	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
486	int index;
487	unsigned long flags;
488
489	flags = 0;
490	if (need_fence)
491		flags |= EXEC_OBJECT_NEEDS_FENCE;
492
493	if (bo_gem->validate_index != -1) {
494		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
495		return;
496	}
497
498	/* Extend the array of validation entries as necessary. */
499	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
500		int new_size = bufmgr_gem->exec_size * 2;
501
502		if (new_size == 0)
503			new_size = 5;
504
505		bufmgr_gem->exec2_objects =
506			realloc(bufmgr_gem->exec2_objects,
507				sizeof(*bufmgr_gem->exec2_objects) * new_size);
508		bufmgr_gem->exec_bos =
509			realloc(bufmgr_gem->exec_bos,
510				sizeof(*bufmgr_gem->exec_bos) * new_size);
511		bufmgr_gem->exec_size = new_size;
512	}
513
514	index = bufmgr_gem->exec_count;
515	bo_gem->validate_index = index;
516	/* Fill in array entry */
517	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
518	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
519	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
520	bufmgr_gem->exec2_objects[index].alignment = bo->align;
521	bufmgr_gem->exec2_objects[index].offset = bo->offset64;
522	bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags;
523	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
524	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
525	bufmgr_gem->exec_bos[index] = bo;
526	bufmgr_gem->exec_count++;
527}
528
529#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
530	sizeof(uint32_t))
531
532static void
533drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
534				      drm_intel_bo_gem *bo_gem,
535				      unsigned int alignment)
536{
537	unsigned int size;
538
539	assert(!bo_gem->used_as_reloc_target);
540
541	/* The older chipsets are far-less flexible in terms of tiling,
542	 * and require tiled buffer to be size aligned in the aperture.
543	 * This means that in the worst possible case we will need a hole
544	 * twice as large as the object in order for it to fit into the
545	 * aperture. Optimal packing is for wimps.
546	 */
547	size = bo_gem->bo.size;
548	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
549		unsigned int min_size;
550
551		if (bufmgr_gem->has_relaxed_fencing) {
552			if (bufmgr_gem->gen == 3)
553				min_size = 1024*1024;
554			else
555				min_size = 512*1024;
556
557			while (min_size < size)
558				min_size *= 2;
559		} else
560			min_size = size;
561
562		/* Account for worst-case alignment. */
563		alignment = MAX2(alignment, min_size);
564	}
565
566	bo_gem->reloc_tree_size = size + alignment;
567}
568
569static int
570drm_intel_setup_reloc_list(drm_intel_bo *bo)
571{
572	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
573	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
574	unsigned int max_relocs = bufmgr_gem->max_relocs;
575
576	if (bo->size / 4 < max_relocs)
577		max_relocs = bo->size / 4;
578
579	bo_gem->relocs = malloc(max_relocs *
580				sizeof(struct drm_i915_gem_relocation_entry));
581	bo_gem->reloc_target_info = malloc(max_relocs *
582					   sizeof(drm_intel_reloc_target));
583	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
584		bo_gem->has_error = true;
585
586		free (bo_gem->relocs);
587		bo_gem->relocs = NULL;
588
589		free (bo_gem->reloc_target_info);
590		bo_gem->reloc_target_info = NULL;
591
592		return 1;
593	}
594
595	return 0;
596}
597
598static int
599drm_intel_gem_bo_busy(drm_intel_bo *bo)
600{
601	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
602	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
603	struct drm_i915_gem_busy busy;
604	int ret;
605
606	if (bo_gem->reusable && bo_gem->idle)
607		return false;
608
609	memclear(busy);
610	busy.handle = bo_gem->gem_handle;
611
612	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
613	if (ret == 0) {
614		bo_gem->idle = !busy.busy;
615		return busy.busy;
616	} else {
617		return false;
618	}
619}
620
621static int
622drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
623				  drm_intel_bo_gem *bo_gem, int state)
624{
625	struct drm_i915_gem_madvise madv;
626
627	memclear(madv);
628	madv.handle = bo_gem->gem_handle;
629	madv.madv = state;
630	madv.retained = 1;
631	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
632
633	return madv.retained;
634}
635
636static int
637drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
638{
639	return drm_intel_gem_bo_madvise_internal
640		((drm_intel_bufmgr_gem *) bo->bufmgr,
641		 (drm_intel_bo_gem *) bo,
642		 madv);
643}
644
645/* drop the oldest entries that have been purged by the kernel */
646static void
647drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
648				    struct drm_intel_gem_bo_bucket *bucket)
649{
650	while (!DRMLISTEMPTY(&bucket->head)) {
651		drm_intel_bo_gem *bo_gem;
652
653		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
654				      bucket->head.next, head);
655		if (drm_intel_gem_bo_madvise_internal
656		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
657			break;
658
659		DRMLISTDEL(&bo_gem->head);
660		drm_intel_gem_bo_free(&bo_gem->bo);
661	}
662}
663
664static drm_intel_bo *
665drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
666				const char *name,
667				unsigned long size,
668				unsigned long flags,
669				uint32_t tiling_mode,
670				unsigned long stride,
671				unsigned int alignment)
672{
673	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
674	drm_intel_bo_gem *bo_gem;
675	unsigned int page_size = getpagesize();
676	int ret;
677	struct drm_intel_gem_bo_bucket *bucket;
678	bool alloc_from_cache;
679	unsigned long bo_size;
680	bool for_render = false;
681
682	if (flags & BO_ALLOC_FOR_RENDER)
683		for_render = true;
684
685	/* Round the allocated size up to a power of two number of pages. */
686	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
687
688	/* If we don't have caching at this size, don't actually round the
689	 * allocation up.
690	 */
691	if (bucket == NULL) {
692		bo_size = size;
693		if (bo_size < page_size)
694			bo_size = page_size;
695	} else {
696		bo_size = bucket->size;
697	}
698
699	pthread_mutex_lock(&bufmgr_gem->lock);
700	/* Get a buffer out of the cache if available */
701retry:
702	alloc_from_cache = false;
703	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
704		if (for_render) {
705			/* Allocate new render-target BOs from the tail (MRU)
706			 * of the list, as it will likely be hot in the GPU
707			 * cache and in the aperture for us.
708			 */
709			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
710					      bucket->head.prev, head);
711			DRMLISTDEL(&bo_gem->head);
712			alloc_from_cache = true;
713			bo_gem->bo.align = alignment;
714		} else {
715			assert(alignment == 0);
716			/* For non-render-target BOs (where we're probably
717			 * going to map it first thing in order to fill it
718			 * with data), check if the last BO in the cache is
719			 * unbusy, and only reuse in that case. Otherwise,
720			 * allocating a new buffer is probably faster than
721			 * waiting for the GPU to finish.
722			 */
723			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
724					      bucket->head.next, head);
725			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
726				alloc_from_cache = true;
727				DRMLISTDEL(&bo_gem->head);
728			}
729		}
730
731		if (alloc_from_cache) {
732			if (!drm_intel_gem_bo_madvise_internal
733			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
734				drm_intel_gem_bo_free(&bo_gem->bo);
735				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
736								    bucket);
737				goto retry;
738			}
739
740			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
741								 tiling_mode,
742								 stride)) {
743				drm_intel_gem_bo_free(&bo_gem->bo);
744				goto retry;
745			}
746		}
747	}
748
749	if (!alloc_from_cache) {
750		struct drm_i915_gem_create create;
751
752		bo_gem = calloc(1, sizeof(*bo_gem));
753		if (!bo_gem)
754			goto err;
755
756		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
757		   list (vma_list), so better set the list head here */
758		DRMINITLISTHEAD(&bo_gem->vma_list);
759
760		bo_gem->bo.size = bo_size;
761
762		memclear(create);
763		create.size = bo_size;
764
765		ret = drmIoctl(bufmgr_gem->fd,
766			       DRM_IOCTL_I915_GEM_CREATE,
767			       &create);
768		if (ret != 0) {
769			free(bo_gem);
770			goto err;
771		}
772
773		bo_gem->gem_handle = create.handle;
774		HASH_ADD(handle_hh, bufmgr_gem->handle_table,
775			 gem_handle, sizeof(bo_gem->gem_handle),
776			 bo_gem);
777
778		bo_gem->bo.handle = bo_gem->gem_handle;
779		bo_gem->bo.bufmgr = bufmgr;
780		bo_gem->bo.align = alignment;
781
782		bo_gem->tiling_mode = I915_TILING_NONE;
783		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
784		bo_gem->stride = 0;
785
786		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
787							 tiling_mode,
788							 stride))
789			goto err_free;
790	}
791
792	bo_gem->name = name;
793	atomic_set(&bo_gem->refcount, 1);
794	bo_gem->validate_index = -1;
795	bo_gem->reloc_tree_fences = 0;
796	bo_gem->used_as_reloc_target = false;
797	bo_gem->has_error = false;
798	bo_gem->reusable = true;
799
800	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
801	pthread_mutex_unlock(&bufmgr_gem->lock);
802
803	DBG("bo_create: buf %d (%s) %ldb\n",
804	    bo_gem->gem_handle, bo_gem->name, size);
805
806	return &bo_gem->bo;
807
808err_free:
809	drm_intel_gem_bo_free(&bo_gem->bo);
810err:
811	pthread_mutex_unlock(&bufmgr_gem->lock);
812	return NULL;
813}
814
815static drm_intel_bo *
816drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
817				  const char *name,
818				  unsigned long size,
819				  unsigned int alignment)
820{
821	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
822					       BO_ALLOC_FOR_RENDER,
823					       I915_TILING_NONE, 0,
824					       alignment);
825}
826
827static drm_intel_bo *
828drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
829		       const char *name,
830		       unsigned long size,
831		       unsigned int alignment)
832{
833	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
834					       I915_TILING_NONE, 0, 0);
835}
836
837static drm_intel_bo *
838drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
839			     int x, int y, int cpp, uint32_t *tiling_mode,
840			     unsigned long *pitch, unsigned long flags)
841{
842	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
843	unsigned long size, stride;
844	uint32_t tiling;
845
846	do {
847		unsigned long aligned_y, height_alignment;
848
849		tiling = *tiling_mode;
850
851		/* If we're tiled, our allocations are in 8 or 32-row blocks,
852		 * so failure to align our height means that we won't allocate
853		 * enough pages.
854		 *
855		 * If we're untiled, we still have to align to 2 rows high
856		 * because the data port accesses 2x2 blocks even if the
857		 * bottom row isn't to be rendered, so failure to align means
858		 * we could walk off the end of the GTT and fault.  This is
859		 * documented on 965, and may be the case on older chipsets
860		 * too so we try to be careful.
861		 */
862		aligned_y = y;
863		height_alignment = 2;
864
865		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
866			height_alignment = 16;
867		else if (tiling == I915_TILING_X
868			|| (IS_915(bufmgr_gem->pci_device)
869			    && tiling == I915_TILING_Y))
870			height_alignment = 8;
871		else if (tiling == I915_TILING_Y)
872			height_alignment = 32;
873		aligned_y = ALIGN(y, height_alignment);
874
875		stride = x * cpp;
876		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
877		size = stride * aligned_y;
878		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
879	} while (*tiling_mode != tiling);
880	*pitch = stride;
881
882	if (tiling == I915_TILING_NONE)
883		stride = 0;
884
885	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
886					       tiling, stride, 0);
887}
888
889static drm_intel_bo *
890drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
891				const char *name,
892				void *addr,
893				uint32_t tiling_mode,
894				uint32_t stride,
895				unsigned long size,
896				unsigned long flags)
897{
898	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
899	drm_intel_bo_gem *bo_gem;
900	int ret;
901	struct drm_i915_gem_userptr userptr;
902
903	/* Tiling with userptr surfaces is not supported
904	 * on all hardware so refuse it for time being.
905	 */
906	if (tiling_mode != I915_TILING_NONE)
907		return NULL;
908
909	bo_gem = calloc(1, sizeof(*bo_gem));
910	if (!bo_gem)
911		return NULL;
912
913	atomic_set(&bo_gem->refcount, 1);
914	DRMINITLISTHEAD(&bo_gem->vma_list);
915
916	bo_gem->bo.size = size;
917
918	memclear(userptr);
919	userptr.user_ptr = (__u64)((unsigned long)addr);
920	userptr.user_size = size;
921	userptr.flags = flags;
922
923	ret = drmIoctl(bufmgr_gem->fd,
924			DRM_IOCTL_I915_GEM_USERPTR,
925			&userptr);
926	if (ret != 0) {
927		DBG("bo_create_userptr: "
928		    "ioctl failed with user ptr %p size 0x%lx, "
929		    "user flags 0x%lx\n", addr, size, flags);
930		free(bo_gem);
931		return NULL;
932	}
933
934	pthread_mutex_lock(&bufmgr_gem->lock);
935
936	bo_gem->gem_handle = userptr.handle;
937	bo_gem->bo.handle = bo_gem->gem_handle;
938	bo_gem->bo.bufmgr    = bufmgr;
939	bo_gem->is_userptr   = true;
940	bo_gem->bo.virtual   = addr;
941	/* Save the address provided by user */
942	bo_gem->user_virtual = addr;
943	bo_gem->tiling_mode  = I915_TILING_NONE;
944	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
945	bo_gem->stride       = 0;
946
947	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
948		 gem_handle, sizeof(bo_gem->gem_handle),
949		 bo_gem);
950
951	bo_gem->name = name;
952	bo_gem->validate_index = -1;
953	bo_gem->reloc_tree_fences = 0;
954	bo_gem->used_as_reloc_target = false;
955	bo_gem->has_error = false;
956	bo_gem->reusable = false;
957
958	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
959	pthread_mutex_unlock(&bufmgr_gem->lock);
960
961	DBG("bo_create_userptr: "
962	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
963		addr, bo_gem->gem_handle, bo_gem->name,
964		size, stride, tiling_mode);
965
966	return &bo_gem->bo;
967}
968
969static bool
970has_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
971{
972	int ret;
973	void *ptr;
974	long pgsz;
975	struct drm_i915_gem_userptr userptr;
976
977	pgsz = sysconf(_SC_PAGESIZE);
978	assert(pgsz > 0);
979
980	ret = posix_memalign(&ptr, pgsz, pgsz);
981	if (ret) {
982		DBG("Failed to get a page (%ld) for userptr detection!\n",
983			pgsz);
984		return false;
985	}
986
987	memclear(userptr);
988	userptr.user_ptr = (__u64)(unsigned long)ptr;
989	userptr.user_size = pgsz;
990
991retry:
992	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
993	if (ret) {
994		if (errno == ENODEV && userptr.flags == 0) {
995			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
996			goto retry;
997		}
998		free(ptr);
999		return false;
1000	}
1001
1002	/* We don't release the userptr bo here as we want to keep the
1003	 * kernel mm tracking alive for our lifetime. The first time we
1004	 * create a userptr object the kernel has to install a mmu_notifer
1005	 * which is a heavyweight operation (e.g. it requires taking all
1006	 * mm_locks and stop_machine()).
1007	 */
1008
1009	bufmgr_gem->userptr_active.ptr = ptr;
1010	bufmgr_gem->userptr_active.handle = userptr.handle;
1011
1012	return true;
1013}
1014
1015static drm_intel_bo *
1016check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
1017		       const char *name,
1018		       void *addr,
1019		       uint32_t tiling_mode,
1020		       uint32_t stride,
1021		       unsigned long size,
1022		       unsigned long flags)
1023{
1024	if (has_userptr((drm_intel_bufmgr_gem *)bufmgr))
1025		bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr;
1026	else
1027		bufmgr->bo_alloc_userptr = NULL;
1028
1029	return drm_intel_bo_alloc_userptr(bufmgr, name, addr,
1030					  tiling_mode, stride, size, flags);
1031}
1032
1033static int get_tiling_mode(drm_intel_bufmgr_gem *bufmgr_gem,
1034			   uint32_t gem_handle,
1035			   uint32_t *tiling_mode,
1036			   uint32_t *swizzle_mode)
1037{
1038	struct drm_i915_gem_get_tiling get_tiling = {
1039		.handle = gem_handle,
1040	};
1041	int ret;
1042
1043	ret = drmIoctl(bufmgr_gem->fd,
1044		       DRM_IOCTL_I915_GEM_GET_TILING,
1045		       &get_tiling);
1046	if (ret != 0 && errno != EOPNOTSUPP)
1047		return ret;
1048
1049	*tiling_mode = get_tiling.tiling_mode;
1050	*swizzle_mode = get_tiling.swizzle_mode;
1051
1052	return 0;
1053}
1054
1055/**
1056 * Returns a drm_intel_bo wrapping the given buffer object handle.
1057 *
1058 * This can be used when one application needs to pass a buffer object
1059 * to another.
1060 */
1061drm_public drm_intel_bo *
1062drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
1063				  const char *name,
1064				  unsigned int handle)
1065{
1066	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1067	drm_intel_bo_gem *bo_gem;
1068	int ret;
1069	struct drm_gem_open open_arg;
1070
1071	/* At the moment most applications only have a few named bo.
1072	 * For instance, in a DRI client only the render buffers passed
1073	 * between X and the client are named. And since X returns the
1074	 * alternating names for the front/back buffer a linear search
1075	 * provides a sufficiently fast match.
1076	 */
1077	pthread_mutex_lock(&bufmgr_gem->lock);
1078	HASH_FIND(name_hh, bufmgr_gem->name_table,
1079		  &handle, sizeof(handle), bo_gem);
1080	if (bo_gem) {
1081		drm_intel_gem_bo_reference(&bo_gem->bo);
1082		goto out;
1083	}
1084
1085	memclear(open_arg);
1086	open_arg.name = handle;
1087	ret = drmIoctl(bufmgr_gem->fd,
1088		       DRM_IOCTL_GEM_OPEN,
1089		       &open_arg);
1090	if (ret != 0) {
1091		DBG("Couldn't reference %s handle 0x%08x: %s\n",
1092		    name, handle, strerror(errno));
1093		bo_gem = NULL;
1094		goto out;
1095	}
1096        /* Now see if someone has used a prime handle to get this
1097         * object from the kernel before by looking through the list
1098         * again for a matching gem_handle
1099         */
1100	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
1101		  &open_arg.handle, sizeof(open_arg.handle), bo_gem);
1102	if (bo_gem) {
1103		drm_intel_gem_bo_reference(&bo_gem->bo);
1104		goto out;
1105	}
1106
1107	bo_gem = calloc(1, sizeof(*bo_gem));
1108	if (!bo_gem)
1109		goto out;
1110
1111	atomic_set(&bo_gem->refcount, 1);
1112	DRMINITLISTHEAD(&bo_gem->vma_list);
1113
1114	bo_gem->bo.size = open_arg.size;
1115	bo_gem->bo.offset = 0;
1116	bo_gem->bo.offset64 = 0;
1117	bo_gem->bo.virtual = NULL;
1118	bo_gem->bo.bufmgr = bufmgr;
1119	bo_gem->name = name;
1120	bo_gem->validate_index = -1;
1121	bo_gem->gem_handle = open_arg.handle;
1122	bo_gem->bo.handle = open_arg.handle;
1123	bo_gem->global_name = handle;
1124	bo_gem->reusable = false;
1125
1126	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
1127		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
1128	HASH_ADD(name_hh, bufmgr_gem->name_table,
1129		 global_name, sizeof(bo_gem->global_name), bo_gem);
1130
1131	ret = get_tiling_mode(bufmgr_gem, bo_gem->gem_handle,
1132			      &bo_gem->tiling_mode, &bo_gem->swizzle_mode);
1133	if (ret != 0)
1134		goto err_unref;
1135
1136	/* XXX stride is unknown */
1137	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1138	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1139
1140out:
1141	pthread_mutex_unlock(&bufmgr_gem->lock);
1142	return &bo_gem->bo;
1143
1144err_unref:
1145	drm_intel_gem_bo_free(&bo_gem->bo);
1146	pthread_mutex_unlock(&bufmgr_gem->lock);
1147	return NULL;
1148}
1149
1150static void
1151drm_intel_gem_bo_free(drm_intel_bo *bo)
1152{
1153	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1154	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1155	int ret;
1156
1157	DRMLISTDEL(&bo_gem->vma_list);
1158	if (bo_gem->mem_virtual) {
1159		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1160		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1161		bufmgr_gem->vma_count--;
1162	}
1163	if (bo_gem->wc_virtual) {
1164		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1165		drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1166		bufmgr_gem->vma_count--;
1167	}
1168	if (bo_gem->gtt_virtual) {
1169		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1170		bufmgr_gem->vma_count--;
1171	}
1172
1173	if (bo_gem->global_name)
1174		HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem);
1175	HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem);
1176
1177	/* Close this object */
1178	ret = drmCloseBufferHandle(bufmgr_gem->fd, bo_gem->gem_handle);
1179	if (ret != 0) {
1180		DBG("drmCloseBufferHandle %d failed (%s): %s\n",
1181		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
1182	}
1183	free(bo);
1184}
1185
1186static void
1187drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
1188{
1189#if HAVE_VALGRIND
1190	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1191
1192	if (bo_gem->mem_virtual)
1193		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1194
1195	if (bo_gem->wc_virtual)
1196		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1197
1198	if (bo_gem->gtt_virtual)
1199		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1200#endif
1201}
1202
1203/** Frees all cached buffers significantly older than @time. */
1204static void
1205drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
1206{
1207	int i;
1208
1209	if (bufmgr_gem->time == time)
1210		return;
1211
1212	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1213		struct drm_intel_gem_bo_bucket *bucket =
1214		    &bufmgr_gem->cache_bucket[i];
1215
1216		while (!DRMLISTEMPTY(&bucket->head)) {
1217			drm_intel_bo_gem *bo_gem;
1218
1219			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1220					      bucket->head.next, head);
1221			if (time - bo_gem->free_time <= 1)
1222				break;
1223
1224			DRMLISTDEL(&bo_gem->head);
1225
1226			drm_intel_gem_bo_free(&bo_gem->bo);
1227		}
1228	}
1229
1230	bufmgr_gem->time = time;
1231}
1232
1233static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
1234{
1235	int limit;
1236
1237	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1238	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1239
1240	if (bufmgr_gem->vma_max < 0)
1241		return;
1242
1243	/* We may need to evict a few entries in order to create new mmaps */
1244	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1245	if (limit < 0)
1246		limit = 0;
1247
1248	while (bufmgr_gem->vma_count > limit) {
1249		drm_intel_bo_gem *bo_gem;
1250
1251		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1252				      bufmgr_gem->vma_cache.next,
1253				      vma_list);
1254		assert(bo_gem->map_count == 0);
1255		DRMLISTDELINIT(&bo_gem->vma_list);
1256
1257		if (bo_gem->mem_virtual) {
1258			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1259			bo_gem->mem_virtual = NULL;
1260			bufmgr_gem->vma_count--;
1261		}
1262		if (bo_gem->wc_virtual) {
1263			drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1264			bo_gem->wc_virtual = NULL;
1265			bufmgr_gem->vma_count--;
1266		}
1267		if (bo_gem->gtt_virtual) {
1268			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1269			bo_gem->gtt_virtual = NULL;
1270			bufmgr_gem->vma_count--;
1271		}
1272	}
1273}
1274
1275static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1276				       drm_intel_bo_gem *bo_gem)
1277{
1278	bufmgr_gem->vma_open--;
1279	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1280	if (bo_gem->mem_virtual)
1281		bufmgr_gem->vma_count++;
1282	if (bo_gem->wc_virtual)
1283		bufmgr_gem->vma_count++;
1284	if (bo_gem->gtt_virtual)
1285		bufmgr_gem->vma_count++;
1286	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1287}
1288
1289static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1290				      drm_intel_bo_gem *bo_gem)
1291{
1292	bufmgr_gem->vma_open++;
1293	DRMLISTDEL(&bo_gem->vma_list);
1294	if (bo_gem->mem_virtual)
1295		bufmgr_gem->vma_count--;
1296	if (bo_gem->wc_virtual)
1297		bufmgr_gem->vma_count--;
1298	if (bo_gem->gtt_virtual)
1299		bufmgr_gem->vma_count--;
1300	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1301}
1302
1303static void
1304drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
1305{
1306	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1307	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1308	struct drm_intel_gem_bo_bucket *bucket;
1309	int i;
1310
1311	/* Unreference all the target buffers */
1312	for (i = 0; i < bo_gem->reloc_count; i++) {
1313		if (bo_gem->reloc_target_info[i].bo != bo) {
1314			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1315								  reloc_target_info[i].bo,
1316								  time);
1317		}
1318	}
1319	for (i = 0; i < bo_gem->softpin_target_count; i++)
1320		drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1321								  time);
1322	bo_gem->kflags = 0;
1323	bo_gem->reloc_count = 0;
1324	bo_gem->used_as_reloc_target = false;
1325	bo_gem->softpin_target_count = 0;
1326
1327	DBG("bo_unreference final: %d (%s)\n",
1328	    bo_gem->gem_handle, bo_gem->name);
1329
1330	/* release memory associated with this object */
1331	if (bo_gem->reloc_target_info) {
1332		free(bo_gem->reloc_target_info);
1333		bo_gem->reloc_target_info = NULL;
1334	}
1335	if (bo_gem->relocs) {
1336		free(bo_gem->relocs);
1337		bo_gem->relocs = NULL;
1338	}
1339	if (bo_gem->softpin_target) {
1340		free(bo_gem->softpin_target);
1341		bo_gem->softpin_target = NULL;
1342		bo_gem->softpin_target_size = 0;
1343	}
1344
1345	/* Clear any left-over mappings */
1346	if (bo_gem->map_count) {
1347		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1348		bo_gem->map_count = 0;
1349		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1350		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1351	}
1352
1353	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1354	/* Put the buffer into our internal cache for reuse if we can. */
1355	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1356	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1357					      I915_MADV_DONTNEED)) {
1358		bo_gem->free_time = time;
1359
1360		bo_gem->name = NULL;
1361		bo_gem->validate_index = -1;
1362
1363		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
1364	} else {
1365		drm_intel_gem_bo_free(bo);
1366	}
1367}
1368
1369static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
1370						      time_t time)
1371{
1372	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1373
1374	assert(atomic_read(&bo_gem->refcount) > 0);
1375	if (atomic_dec_and_test(&bo_gem->refcount))
1376		drm_intel_gem_bo_unreference_final(bo, time);
1377}
1378
1379static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
1380{
1381	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1382	drm_intel_bufmgr_gem *bufmgr_gem;
1383	struct timespec time;
1384
1385	assert(atomic_read(&bo_gem->refcount) > 0);
1386
1387	if (atomic_add_unless(&bo_gem->refcount, -1, 1))
1388		return;
1389
1390	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1391
1392	clock_gettime(CLOCK_MONOTONIC, &time);
1393
1394	pthread_mutex_lock(&bufmgr_gem->lock);
1395
1396	if (atomic_dec_and_test(&bo_gem->refcount)) {
1397		drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
1398		drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1399	}
1400
1401	pthread_mutex_unlock(&bufmgr_gem->lock);
1402}
1403
1404static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
1405{
1406	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1407	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1408	struct drm_i915_gem_set_domain set_domain;
1409	int ret;
1410
1411	if (bo_gem->is_userptr) {
1412		/* Return the same user ptr */
1413		bo->virtual = bo_gem->user_virtual;
1414		return 0;
1415	}
1416
1417	pthread_mutex_lock(&bufmgr_gem->lock);
1418
1419	if (bo_gem->map_count++ == 0)
1420		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1421
1422	if (!bo_gem->mem_virtual) {
1423		struct drm_i915_gem_mmap mmap_arg;
1424
1425		DBG("bo_map: %d (%s), map_count=%d\n",
1426		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1427
1428		memclear(mmap_arg);
1429		mmap_arg.handle = bo_gem->gem_handle;
1430		mmap_arg.size = bo->size;
1431		ret = drmIoctl(bufmgr_gem->fd,
1432			       DRM_IOCTL_I915_GEM_MMAP,
1433			       &mmap_arg);
1434		if (ret != 0) {
1435			ret = -errno;
1436			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1437			    __FILE__, __LINE__, bo_gem->gem_handle,
1438			    bo_gem->name, strerror(errno));
1439			if (--bo_gem->map_count == 0)
1440				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1441			pthread_mutex_unlock(&bufmgr_gem->lock);
1442			return ret;
1443		}
1444		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1445		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1446	}
1447	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1448	    bo_gem->mem_virtual);
1449	bo->virtual = bo_gem->mem_virtual;
1450
1451	memclear(set_domain);
1452	set_domain.handle = bo_gem->gem_handle;
1453	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1454	if (write_enable)
1455		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1456	else
1457		set_domain.write_domain = 0;
1458	ret = drmIoctl(bufmgr_gem->fd,
1459		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1460		       &set_domain);
1461	if (ret != 0) {
1462		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1463		    __FILE__, __LINE__, bo_gem->gem_handle,
1464		    strerror(errno));
1465	}
1466
1467	if (write_enable)
1468		bo_gem->mapped_cpu_write = true;
1469
1470	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1471	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1472	pthread_mutex_unlock(&bufmgr_gem->lock);
1473
1474	return 0;
1475}
1476
1477static int
1478map_gtt(drm_intel_bo *bo)
1479{
1480	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1481	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1482	int ret;
1483
1484	if (bo_gem->is_userptr)
1485		return -EINVAL;
1486
1487	if (bo_gem->map_count++ == 0)
1488		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1489
1490	/* Get a mapping of the buffer if we haven't before. */
1491	if (bo_gem->gtt_virtual == NULL) {
1492		struct drm_i915_gem_mmap_gtt mmap_arg;
1493
1494		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1495		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1496
1497		memclear(mmap_arg);
1498		mmap_arg.handle = bo_gem->gem_handle;
1499
1500		/* Get the fake offset back... */
1501		ret = drmIoctl(bufmgr_gem->fd,
1502			       DRM_IOCTL_I915_GEM_MMAP_GTT,
1503			       &mmap_arg);
1504		if (ret != 0) {
1505			ret = -errno;
1506			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1507			    __FILE__, __LINE__,
1508			    bo_gem->gem_handle, bo_gem->name,
1509			    strerror(errno));
1510			if (--bo_gem->map_count == 0)
1511				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1512			return ret;
1513		}
1514
1515		/* and mmap it */
1516		bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1517					       MAP_SHARED, bufmgr_gem->fd,
1518					       mmap_arg.offset);
1519		if (bo_gem->gtt_virtual == MAP_FAILED) {
1520			bo_gem->gtt_virtual = NULL;
1521			ret = -errno;
1522			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1523			    __FILE__, __LINE__,
1524			    bo_gem->gem_handle, bo_gem->name,
1525			    strerror(errno));
1526			if (--bo_gem->map_count == 0)
1527				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1528			return ret;
1529		}
1530	}
1531
1532	bo->virtual = bo_gem->gtt_virtual;
1533
1534	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1535	    bo_gem->gtt_virtual);
1536
1537	return 0;
1538}
1539
1540drm_public int
1541drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1542{
1543	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1544	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1545	struct drm_i915_gem_set_domain set_domain;
1546	int ret;
1547
1548	pthread_mutex_lock(&bufmgr_gem->lock);
1549
1550	ret = map_gtt(bo);
1551	if (ret) {
1552		pthread_mutex_unlock(&bufmgr_gem->lock);
1553		return ret;
1554	}
1555
1556	/* Now move it to the GTT domain so that the GPU and CPU
1557	 * caches are flushed and the GPU isn't actively using the
1558	 * buffer.
1559	 *
1560	 * The pagefault handler does this domain change for us when
1561	 * it has unbound the BO from the GTT, but it's up to us to
1562	 * tell it when we're about to use things if we had done
1563	 * rendering and it still happens to be bound to the GTT.
1564	 */
1565	memclear(set_domain);
1566	set_domain.handle = bo_gem->gem_handle;
1567	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1568	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1569	ret = drmIoctl(bufmgr_gem->fd,
1570		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1571		       &set_domain);
1572	if (ret != 0) {
1573		DBG("%s:%d: Error setting domain %d: %s\n",
1574		    __FILE__, __LINE__, bo_gem->gem_handle,
1575		    strerror(errno));
1576	}
1577
1578	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1579	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1580	pthread_mutex_unlock(&bufmgr_gem->lock);
1581
1582	return 0;
1583}
1584
1585/**
1586 * Performs a mapping of the buffer object like the normal GTT
1587 * mapping, but avoids waiting for the GPU to be done reading from or
1588 * rendering to the buffer.
1589 *
1590 * This is used in the implementation of GL_ARB_map_buffer_range: The
1591 * user asks to create a buffer, then does a mapping, fills some
1592 * space, runs a drawing command, then asks to map it again without
1593 * synchronizing because it guarantees that it won't write over the
1594 * data that the GPU is busy using (or, more specifically, that if it
1595 * does write over the data, it acknowledges that rendering is
1596 * undefined).
1597 */
1598
1599drm_public int
1600drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
1601{
1602	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1603#if HAVE_VALGRIND
1604	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1605#endif
1606	int ret;
1607
1608	/* If the CPU cache isn't coherent with the GTT, then use a
1609	 * regular synchronized mapping.  The problem is that we don't
1610	 * track where the buffer was last used on the CPU side in
1611	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
1612	 * we would potentially corrupt the buffer even when the user
1613	 * does reasonable things.
1614	 */
1615	if (!bufmgr_gem->has_llc)
1616		return drm_intel_gem_bo_map_gtt(bo);
1617
1618	pthread_mutex_lock(&bufmgr_gem->lock);
1619
1620	ret = map_gtt(bo);
1621	if (ret == 0) {
1622		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1623		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1624	}
1625
1626	pthread_mutex_unlock(&bufmgr_gem->lock);
1627
1628	return ret;
1629}
1630
1631static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1632{
1633	drm_intel_bufmgr_gem *bufmgr_gem;
1634	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1635	int ret = 0;
1636
1637	if (bo == NULL)
1638		return 0;
1639
1640	if (bo_gem->is_userptr)
1641		return 0;
1642
1643	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1644
1645	pthread_mutex_lock(&bufmgr_gem->lock);
1646
1647	if (bo_gem->map_count <= 0) {
1648		DBG("attempted to unmap an unmapped bo\n");
1649		pthread_mutex_unlock(&bufmgr_gem->lock);
1650		/* Preserve the old behaviour of just treating this as a
1651		 * no-op rather than reporting the error.
1652		 */
1653		return 0;
1654	}
1655
1656	if (bo_gem->mapped_cpu_write) {
1657		struct drm_i915_gem_sw_finish sw_finish;
1658
1659		/* Cause a flush to happen if the buffer's pinned for
1660		 * scanout, so the results show up in a timely manner.
1661		 * Unlike GTT set domains, this only does work if the
1662		 * buffer should be scanout-related.
1663		 */
1664		memclear(sw_finish);
1665		sw_finish.handle = bo_gem->gem_handle;
1666		ret = drmIoctl(bufmgr_gem->fd,
1667			       DRM_IOCTL_I915_GEM_SW_FINISH,
1668			       &sw_finish);
1669		ret = ret == -1 ? -errno : 0;
1670
1671		bo_gem->mapped_cpu_write = false;
1672	}
1673
1674	/* We need to unmap after every innovation as we cannot track
1675	 * an open vma for every bo as that will exhaust the system
1676	 * limits and cause later failures.
1677	 */
1678	if (--bo_gem->map_count == 0) {
1679		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1680		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1681		bo->virtual = NULL;
1682	}
1683	pthread_mutex_unlock(&bufmgr_gem->lock);
1684
1685	return ret;
1686}
1687
1688drm_public int
1689drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1690{
1691	return drm_intel_gem_bo_unmap(bo);
1692}
1693
1694static bool is_cache_coherent(drm_intel_bo *bo)
1695{
1696	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1697	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1698	struct drm_i915_gem_caching arg = {};
1699
1700	arg.handle = bo_gem->gem_handle;
1701	if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_CACHING, &arg))
1702		assert(false);
1703	return arg.caching != I915_CACHING_NONE;
1704}
1705
1706static void set_domain(drm_intel_bo *bo, uint32_t read, uint32_t write)
1707{
1708	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1709	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1710	struct drm_i915_gem_set_domain arg = {};
1711
1712	arg.handle = bo_gem->gem_handle;
1713	arg.read_domains = read;
1714	arg.write_domain = write;
1715	if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &arg))
1716		assert(false);
1717}
1718
1719static int mmap_write(drm_intel_bo *bo, unsigned long offset,
1720		      unsigned long length, const void *buf)
1721{
1722	void *map = NULL;
1723
1724	if (!length)
1725		return 0;
1726
1727	if (is_cache_coherent(bo)) {
1728		map = drm_intel_gem_bo_map__cpu(bo);
1729		if (map)
1730			set_domain(bo, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
1731	}
1732	if (!map) {
1733		map = drm_intel_gem_bo_map__wc(bo);
1734		if (map)
1735			set_domain(bo, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
1736	}
1737
1738	assert(map);
1739	memcpy((char *)map + offset, buf, length);
1740	drm_intel_gem_bo_unmap(bo);
1741	return 0;
1742}
1743
1744static int mmap_read(drm_intel_bo *bo, unsigned long offset,
1745		      unsigned long length, void *buf)
1746{
1747	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1748	void *map = NULL;
1749
1750	if (!length)
1751		return 0;
1752
1753	if (bufmgr_gem->has_llc || is_cache_coherent(bo)) {
1754		map = drm_intel_gem_bo_map__cpu(bo);
1755		if (map)
1756			set_domain(bo, I915_GEM_DOMAIN_CPU, 0);
1757	}
1758	if (!map) {
1759		map = drm_intel_gem_bo_map__wc(bo);
1760		if (map)
1761			set_domain(bo, I915_GEM_DOMAIN_WC, 0);
1762	}
1763
1764	assert(map);
1765	memcpy(buf, (char *)map + offset, length);
1766	drm_intel_gem_bo_unmap(bo);
1767	return 0;
1768}
1769
1770static int
1771drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1772			 unsigned long size, const void *data)
1773{
1774	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1775	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1776	struct drm_i915_gem_pwrite pwrite;
1777	int ret;
1778
1779	if (bo_gem->is_userptr)
1780		return -EINVAL;
1781
1782	memclear(pwrite);
1783	pwrite.handle = bo_gem->gem_handle;
1784	pwrite.offset = offset;
1785	pwrite.size = size;
1786	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1787	ret = drmIoctl(bufmgr_gem->fd,
1788		       DRM_IOCTL_I915_GEM_PWRITE,
1789		       &pwrite);
1790	if (ret)
1791		ret = -errno;
1792
1793	if (ret != 0 && ret != -EOPNOTSUPP) {
1794		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1795		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1796		    (int)size, strerror(errno));
1797		return ret;
1798	}
1799
1800	if (ret == -EOPNOTSUPP)
1801		mmap_write(bo, offset, size, data);
1802
1803	return 0;
1804}
1805
1806static int
1807drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1808{
1809	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1810	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1811	int ret;
1812
1813	memclear(get_pipe_from_crtc_id);
1814	get_pipe_from_crtc_id.crtc_id = crtc_id;
1815	ret = drmIoctl(bufmgr_gem->fd,
1816		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1817		       &get_pipe_from_crtc_id);
1818	if (ret != 0) {
1819		/* We return -1 here to signal that we don't
1820		 * know which pipe is associated with this crtc.
1821		 * This lets the caller know that this information
1822		 * isn't available; using the wrong pipe for
1823		 * vblank waiting can cause the chipset to lock up
1824		 */
1825		return -1;
1826	}
1827
1828	return get_pipe_from_crtc_id.pipe;
1829}
1830
1831static int
1832drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1833			     unsigned long size, void *data)
1834{
1835	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1836	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1837	struct drm_i915_gem_pread pread;
1838	int ret;
1839
1840	if (bo_gem->is_userptr)
1841		return -EINVAL;
1842
1843	memclear(pread);
1844	pread.handle = bo_gem->gem_handle;
1845	pread.offset = offset;
1846	pread.size = size;
1847	pread.data_ptr = (uint64_t) (uintptr_t) data;
1848	ret = drmIoctl(bufmgr_gem->fd,
1849		       DRM_IOCTL_I915_GEM_PREAD,
1850		       &pread);
1851	if (ret)
1852		ret = -errno;
1853
1854	if (ret != 0 && ret != -EOPNOTSUPP) {
1855		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1856		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1857		    (int)size, strerror(errno));
1858		return ret;
1859	}
1860
1861	if (ret == -EOPNOTSUPP)
1862		mmap_read(bo, offset, size, data);
1863
1864	return 0;
1865}
1866
1867/** Waits for all GPU rendering with the object to have completed. */
1868static void
1869drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1870{
1871	drm_intel_gem_bo_start_gtt_access(bo, 1);
1872}
1873
1874/**
1875 * Waits on a BO for the given amount of time.
1876 *
1877 * @bo: buffer object to wait for
1878 * @timeout_ns: amount of time to wait in nanoseconds.
1879 *   If value is less than 0, an infinite wait will occur.
1880 *
1881 * Returns 0 if the wait was successful ie. the last batch referencing the
1882 * object has completed within the allotted time. Otherwise some negative return
1883 * value describes the error. Of particular interest is -ETIME when the wait has
1884 * failed to yield the desired result.
1885 *
1886 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
1887 * the operation to give up after a certain amount of time. Another subtle
1888 * difference is the internal locking semantics are different (this variant does
1889 * not hold the lock for the duration of the wait). This makes the wait subject
1890 * to a larger userspace race window.
1891 *
1892 * The implementation shall wait until the object is no longer actively
1893 * referenced within a batch buffer at the time of the call. The wait will
1894 * not guarantee that the buffer is re-issued via another thread, or an flinked
1895 * handle. Userspace must make sure this race does not occur if such precision
1896 * is important.
1897 *
1898 * Note that some kernels have broken the inifite wait for negative values
1899 * promise, upgrade to latest stable kernels if this is the case.
1900 */
1901drm_public int
1902drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
1903{
1904	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1905	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1906	struct drm_i915_gem_wait wait;
1907	int ret;
1908
1909	if (!bufmgr_gem->has_wait_timeout) {
1910		DBG("%s:%d: Timed wait is not supported. Falling back to "
1911		    "infinite wait\n", __FILE__, __LINE__);
1912		if (timeout_ns) {
1913			drm_intel_gem_bo_wait_rendering(bo);
1914			return 0;
1915		} else {
1916			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
1917		}
1918	}
1919
1920	memclear(wait);
1921	wait.bo_handle = bo_gem->gem_handle;
1922	wait.timeout_ns = timeout_ns;
1923	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1924	if (ret == -1)
1925		return -errno;
1926
1927	return ret;
1928}
1929
1930/**
1931 * Sets the object to the GTT read and possibly write domain, used by the X
1932 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1933 *
1934 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1935 * can do tiled pixmaps this way.
1936 */
1937drm_public void
1938drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1939{
1940	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1941	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1942	struct drm_i915_gem_set_domain set_domain;
1943	int ret;
1944
1945	memclear(set_domain);
1946	set_domain.handle = bo_gem->gem_handle;
1947	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1948	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1949	ret = drmIoctl(bufmgr_gem->fd,
1950		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1951		       &set_domain);
1952	if (ret != 0) {
1953		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1954		    __FILE__, __LINE__, bo_gem->gem_handle,
1955		    set_domain.read_domains, set_domain.write_domain,
1956		    strerror(errno));
1957	}
1958}
1959
1960static void
1961drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1962{
1963	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1964	int i, ret;
1965
1966	free(bufmgr_gem->exec2_objects);
1967	free(bufmgr_gem->exec_bos);
1968
1969	pthread_mutex_destroy(&bufmgr_gem->lock);
1970
1971	/* Free any cached buffer objects we were going to reuse */
1972	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1973		struct drm_intel_gem_bo_bucket *bucket =
1974		    &bufmgr_gem->cache_bucket[i];
1975		drm_intel_bo_gem *bo_gem;
1976
1977		while (!DRMLISTEMPTY(&bucket->head)) {
1978			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1979					      bucket->head.next, head);
1980			DRMLISTDEL(&bo_gem->head);
1981
1982			drm_intel_gem_bo_free(&bo_gem->bo);
1983		}
1984	}
1985
1986	/* Release userptr bo kept hanging around for optimisation. */
1987	if (bufmgr_gem->userptr_active.ptr) {
1988		ret = drmCloseBufferHandle(bufmgr_gem->fd,
1989					   bufmgr_gem->userptr_active.handle);
1990		free(bufmgr_gem->userptr_active.ptr);
1991		if (ret)
1992			fprintf(stderr,
1993				"Failed to release test userptr object! (%d) "
1994				"i915 kernel driver may not be sane!\n", errno);
1995	}
1996
1997	free(bufmgr);
1998}
1999
2000/**
2001 * Adds the target buffer to the validation list and adds the relocation
2002 * to the reloc_buffer's relocation list.
2003 *
2004 * The relocation entry at the given offset must already contain the
2005 * precomputed relocation value, because the kernel will optimize out
2006 * the relocation entry write when the buffer hasn't moved from the
2007 * last known offset in target_bo.
2008 */
2009static int
2010do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
2011		 drm_intel_bo *target_bo, uint32_t target_offset,
2012		 uint32_t read_domains, uint32_t write_domain,
2013		 bool need_fence)
2014{
2015	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2016	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2017	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2018	bool fenced_command;
2019
2020	if (bo_gem->has_error)
2021		return -ENOMEM;
2022
2023	if (target_bo_gem->has_error) {
2024		bo_gem->has_error = true;
2025		return -ENOMEM;
2026	}
2027
2028	/* We never use HW fences for rendering on 965+ */
2029	if (bufmgr_gem->gen >= 4)
2030		need_fence = false;
2031
2032	fenced_command = need_fence;
2033	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
2034		need_fence = false;
2035
2036	/* Create a new relocation list if needed */
2037	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
2038		return -ENOMEM;
2039
2040	/* Check overflow */
2041	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
2042
2043	/* Check args */
2044	assert(offset <= bo->size - 4);
2045	assert((write_domain & (write_domain - 1)) == 0);
2046
2047	/* An object needing a fence is a tiled buffer, so it won't have
2048	 * relocs to other buffers.
2049	 */
2050	if (need_fence) {
2051		assert(target_bo_gem->reloc_count == 0);
2052		target_bo_gem->reloc_tree_fences = 1;
2053	}
2054
2055	/* Make sure that we're not adding a reloc to something whose size has
2056	 * already been accounted for.
2057	 */
2058	assert(!bo_gem->used_as_reloc_target);
2059	if (target_bo_gem != bo_gem) {
2060		target_bo_gem->used_as_reloc_target = true;
2061		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
2062		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
2063	}
2064
2065	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
2066	if (target_bo != bo)
2067		drm_intel_gem_bo_reference(target_bo);
2068	if (fenced_command)
2069		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
2070			DRM_INTEL_RELOC_FENCE;
2071	else
2072		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
2073
2074	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
2075	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
2076	bo_gem->relocs[bo_gem->reloc_count].target_handle =
2077	    target_bo_gem->gem_handle;
2078	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
2079	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
2080	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
2081	bo_gem->reloc_count++;
2082
2083	return 0;
2084}
2085
2086static void
2087drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
2088{
2089	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2090
2091	if (enable)
2092		bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2093	else
2094		bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2095}
2096
2097static int
2098drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo)
2099{
2100	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2101	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2102	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2103	if (bo_gem->has_error)
2104		return -ENOMEM;
2105
2106	if (target_bo_gem->has_error) {
2107		bo_gem->has_error = true;
2108		return -ENOMEM;
2109	}
2110
2111	if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
2112		return -EINVAL;
2113	if (target_bo_gem == bo_gem)
2114		return -EINVAL;
2115
2116	if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
2117		int new_size = bo_gem->softpin_target_size * 2;
2118		if (new_size == 0)
2119			new_size = bufmgr_gem->max_relocs;
2120
2121		bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
2122				sizeof(drm_intel_bo *));
2123		if (!bo_gem->softpin_target)
2124			return -ENOMEM;
2125
2126		bo_gem->softpin_target_size = new_size;
2127	}
2128	bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
2129	drm_intel_gem_bo_reference(target_bo);
2130	bo_gem->softpin_target_count++;
2131
2132	return 0;
2133}
2134
2135static int
2136drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
2137			    drm_intel_bo *target_bo, uint32_t target_offset,
2138			    uint32_t read_domains, uint32_t write_domain)
2139{
2140	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2141	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
2142
2143	if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
2144		return drm_intel_gem_bo_add_softpin_target(bo, target_bo);
2145	else
2146		return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2147					read_domains, write_domain,
2148					!bufmgr_gem->fenced_relocs);
2149}
2150
2151static int
2152drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
2153				  drm_intel_bo *target_bo,
2154				  uint32_t target_offset,
2155				  uint32_t read_domains, uint32_t write_domain)
2156{
2157	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2158				read_domains, write_domain, true);
2159}
2160
2161drm_public int
2162drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
2163{
2164	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2165
2166	return bo_gem->reloc_count;
2167}
2168
2169/**
2170 * Removes existing relocation entries in the BO after "start".
2171 *
2172 * This allows a user to avoid a two-step process for state setup with
2173 * counting up all the buffer objects and doing a
2174 * drm_intel_bufmgr_check_aperture_space() before emitting any of the
2175 * relocations for the state setup.  Instead, save the state of the
2176 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
2177 * state, and then check if it still fits in the aperture.
2178 *
2179 * Any further drm_intel_bufmgr_check_aperture_space() queries
2180 * involving this buffer in the tree are undefined after this call.
2181 *
2182 * This also removes all softpinned targets being referenced by the BO.
2183 */
2184drm_public void
2185drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
2186{
2187	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2188	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2189	int i;
2190	struct timespec time;
2191
2192	clock_gettime(CLOCK_MONOTONIC, &time);
2193
2194	assert(bo_gem->reloc_count >= start);
2195
2196	/* Unreference the cleared target buffers */
2197	pthread_mutex_lock(&bufmgr_gem->lock);
2198
2199	for (i = start; i < bo_gem->reloc_count; i++) {
2200		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
2201		if (&target_bo_gem->bo != bo) {
2202			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
2203			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
2204								  time.tv_sec);
2205		}
2206	}
2207	bo_gem->reloc_count = start;
2208
2209	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2210		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i];
2211		drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
2212	}
2213	bo_gem->softpin_target_count = 0;
2214
2215	pthread_mutex_unlock(&bufmgr_gem->lock);
2216
2217}
2218
2219/**
2220 * Walk the tree of relocations rooted at BO and accumulate the list of
2221 * validations to be performed and update the relocation buffers with
2222 * index values into the validation list.
2223 */
2224static void
2225drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
2226{
2227	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2228	int i;
2229
2230	if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
2231		return;
2232
2233	for (i = 0; i < bo_gem->reloc_count; i++) {
2234		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2235		int need_fence;
2236
2237		if (target_bo == bo)
2238			continue;
2239
2240		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2241
2242		/* Continue walking the tree depth-first. */
2243		drm_intel_gem_bo_process_reloc2(target_bo);
2244
2245		need_fence = (bo_gem->reloc_target_info[i].flags &
2246			      DRM_INTEL_RELOC_FENCE);
2247
2248		/* Add the target to the validate list */
2249		drm_intel_add_validate_buffer2(target_bo, need_fence);
2250	}
2251
2252	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2253		drm_intel_bo *target_bo = bo_gem->softpin_target[i];
2254
2255		if (target_bo == bo)
2256			continue;
2257
2258		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2259		drm_intel_gem_bo_process_reloc2(target_bo);
2260		drm_intel_add_validate_buffer2(target_bo, false);
2261	}
2262}
2263
2264static void
2265drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
2266{
2267	int i;
2268
2269	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2270		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2271		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2272
2273		/* Update the buffer offset */
2274		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2275			/* If we're seeing softpinned object here it means that the kernel
2276			 * has relocated our object... Indicating a programming error
2277			 */
2278			assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
2279			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2280			    bo_gem->gem_handle, bo_gem->name,
2281			    upper_32_bits(bo->offset64),
2282			    lower_32_bits(bo->offset64),
2283			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
2284			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
2285			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
2286			bo->offset = bufmgr_gem->exec2_objects[i].offset;
2287		}
2288	}
2289}
2290
2291drm_public void
2292drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
2293			      int x1, int y1, int width, int height,
2294			      enum aub_dump_bmp_format format,
2295			      int pitch, int offset)
2296{
2297}
2298
2299static int
2300do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
2301	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2302	 int in_fence, int *out_fence,
2303	 unsigned int flags)
2304{
2305	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2306	struct drm_i915_gem_execbuffer2 execbuf;
2307	int ret = 0;
2308	int i;
2309
2310	if (to_bo_gem(bo)->has_error)
2311		return -ENOMEM;
2312
2313	switch (flags & 0x7) {
2314	default:
2315		return -EINVAL;
2316	case I915_EXEC_BLT:
2317		if (!bufmgr_gem->has_blt)
2318			return -EINVAL;
2319		break;
2320	case I915_EXEC_BSD:
2321		if (!bufmgr_gem->has_bsd)
2322			return -EINVAL;
2323		break;
2324	case I915_EXEC_VEBOX:
2325		if (!bufmgr_gem->has_vebox)
2326			return -EINVAL;
2327		break;
2328	case I915_EXEC_RENDER:
2329	case I915_EXEC_DEFAULT:
2330		break;
2331	}
2332
2333	pthread_mutex_lock(&bufmgr_gem->lock);
2334	/* Update indices and set up the validate list. */
2335	drm_intel_gem_bo_process_reloc2(bo);
2336
2337	/* Add the batch buffer to the validation list.  There are no relocations
2338	 * pointing to it.
2339	 */
2340	drm_intel_add_validate_buffer2(bo, 0);
2341
2342	memclear(execbuf);
2343	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2344	execbuf.buffer_count = bufmgr_gem->exec_count;
2345	execbuf.batch_start_offset = 0;
2346	execbuf.batch_len = used;
2347	execbuf.cliprects_ptr = (uintptr_t)cliprects;
2348	execbuf.num_cliprects = num_cliprects;
2349	execbuf.DR1 = 0;
2350	execbuf.DR4 = DR4;
2351	execbuf.flags = flags;
2352	if (ctx == NULL)
2353		i915_execbuffer2_set_context_id(execbuf, 0);
2354	else
2355		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2356	execbuf.rsvd2 = 0;
2357	if (in_fence != -1) {
2358		execbuf.rsvd2 = in_fence;
2359		execbuf.flags |= I915_EXEC_FENCE_IN;
2360	}
2361	if (out_fence != NULL) {
2362		*out_fence = -1;
2363		execbuf.flags |= I915_EXEC_FENCE_OUT;
2364	}
2365
2366	if (bufmgr_gem->no_exec)
2367		goto skip_execution;
2368
2369	ret = drmIoctl(bufmgr_gem->fd,
2370		       DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2371		       &execbuf);
2372	if (ret != 0) {
2373		ret = -errno;
2374		if (ret == -ENOSPC) {
2375			DBG("Execbuffer fails to pin. "
2376			    "Estimate: %u. Actual: %u. Available: %u\n",
2377			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2378							       bufmgr_gem->exec_count),
2379			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2380							      bufmgr_gem->exec_count),
2381			    (unsigned int) bufmgr_gem->gtt_size);
2382		}
2383	}
2384	drm_intel_update_buffer_offsets2(bufmgr_gem);
2385
2386	if (ret == 0 && out_fence != NULL)
2387		*out_fence = execbuf.rsvd2 >> 32;
2388
2389skip_execution:
2390	if (bufmgr_gem->bufmgr.debug)
2391		drm_intel_gem_dump_validation_list(bufmgr_gem);
2392
2393	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2394		drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2395
2396		bo_gem->idle = false;
2397
2398		/* Disconnect the buffer from the validate list */
2399		bo_gem->validate_index = -1;
2400		bufmgr_gem->exec_bos[i] = NULL;
2401	}
2402	bufmgr_gem->exec_count = 0;
2403	pthread_mutex_unlock(&bufmgr_gem->lock);
2404
2405	return ret;
2406}
2407
2408static int
2409drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2410		       drm_clip_rect_t *cliprects, int num_cliprects,
2411		       int DR4)
2412{
2413	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2414			-1, NULL, I915_EXEC_RENDER);
2415}
2416
2417static int
2418drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
2419			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2420			unsigned int flags)
2421{
2422	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2423			-1, NULL, flags);
2424}
2425
2426drm_public int
2427drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
2428			      int used, unsigned int flags)
2429{
2430	return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags);
2431}
2432
2433drm_public int
2434drm_intel_gem_bo_fence_exec(drm_intel_bo *bo,
2435			    drm_intel_context *ctx,
2436			    int used,
2437			    int in_fence,
2438			    int *out_fence,
2439			    unsigned int flags)
2440{
2441	return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags);
2442}
2443
2444static int
2445drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
2446{
2447	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2448	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2449	struct drm_i915_gem_pin pin;
2450	int ret;
2451
2452	memclear(pin);
2453	pin.handle = bo_gem->gem_handle;
2454	pin.alignment = alignment;
2455
2456	ret = drmIoctl(bufmgr_gem->fd,
2457		       DRM_IOCTL_I915_GEM_PIN,
2458		       &pin);
2459	if (ret != 0)
2460		return -errno;
2461
2462	bo->offset64 = pin.offset;
2463	bo->offset = pin.offset;
2464	return 0;
2465}
2466
2467static int
2468drm_intel_gem_bo_unpin(drm_intel_bo *bo)
2469{
2470	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2471	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2472	struct drm_i915_gem_unpin unpin;
2473	int ret;
2474
2475	memclear(unpin);
2476	unpin.handle = bo_gem->gem_handle;
2477
2478	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
2479	if (ret != 0)
2480		return -errno;
2481
2482	return 0;
2483}
2484
2485static int
2486drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2487				     uint32_t tiling_mode,
2488				     uint32_t stride)
2489{
2490	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2491	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2492	struct drm_i915_gem_set_tiling set_tiling;
2493	int ret;
2494
2495	if (bo_gem->global_name == 0 &&
2496	    tiling_mode == bo_gem->tiling_mode &&
2497	    stride == bo_gem->stride)
2498		return 0;
2499
2500	memset(&set_tiling, 0, sizeof(set_tiling));
2501	do {
2502		/* set_tiling is slightly broken and overwrites the
2503		 * input on the error path, so we have to open code
2504		 * rmIoctl.
2505		 */
2506		set_tiling.handle = bo_gem->gem_handle;
2507		set_tiling.tiling_mode = tiling_mode;
2508		set_tiling.stride = stride;
2509
2510		ret = ioctl(bufmgr_gem->fd,
2511			    DRM_IOCTL_I915_GEM_SET_TILING,
2512			    &set_tiling);
2513	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2514	if (ret == -1)
2515		return -errno;
2516
2517	bo_gem->tiling_mode = set_tiling.tiling_mode;
2518	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2519	bo_gem->stride = set_tiling.stride;
2520	return 0;
2521}
2522
2523static int
2524drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2525			    uint32_t stride)
2526{
2527	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2528	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2529	int ret;
2530
2531	/* Tiling with userptr surfaces is not supported
2532	 * on all hardware so refuse it for time being.
2533	 */
2534	if (bo_gem->is_userptr)
2535		return -EINVAL;
2536
2537	/* Linear buffers have no stride. By ensuring that we only ever use
2538	 * stride 0 with linear buffers, we simplify our code.
2539	 */
2540	if (*tiling_mode == I915_TILING_NONE)
2541		stride = 0;
2542
2543	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2544	if (ret == 0)
2545		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2546
2547	*tiling_mode = bo_gem->tiling_mode;
2548	return ret;
2549}
2550
2551static int
2552drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2553			    uint32_t * swizzle_mode)
2554{
2555	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2556
2557	*tiling_mode = bo_gem->tiling_mode;
2558	*swizzle_mode = bo_gem->swizzle_mode;
2559	return 0;
2560}
2561
2562static int
2563drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
2564{
2565	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2566
2567	bo->offset64 = offset;
2568	bo->offset = offset;
2569	bo_gem->kflags |= EXEC_OBJECT_PINNED;
2570
2571	return 0;
2572}
2573
2574drm_public drm_intel_bo *
2575drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
2576{
2577	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2578	int ret;
2579	uint32_t handle;
2580	drm_intel_bo_gem *bo_gem;
2581
2582	pthread_mutex_lock(&bufmgr_gem->lock);
2583	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2584	if (ret) {
2585		DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2586		pthread_mutex_unlock(&bufmgr_gem->lock);
2587		return NULL;
2588	}
2589
2590	/*
2591	 * See if the kernel has already returned this buffer to us. Just as
2592	 * for named buffers, we must not create two bo's pointing at the same
2593	 * kernel object
2594	 */
2595	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
2596		  &handle, sizeof(handle), bo_gem);
2597	if (bo_gem) {
2598		drm_intel_gem_bo_reference(&bo_gem->bo);
2599		goto out;
2600	}
2601
2602	bo_gem = calloc(1, sizeof(*bo_gem));
2603	if (!bo_gem)
2604		goto out;
2605
2606	atomic_set(&bo_gem->refcount, 1);
2607	DRMINITLISTHEAD(&bo_gem->vma_list);
2608
2609	/* Determine size of bo.  The fd-to-handle ioctl really should
2610	 * return the size, but it doesn't.  If we have kernel 3.12 or
2611	 * later, we can lseek on the prime fd to get the size.  Older
2612	 * kernels will just fail, in which case we fall back to the
2613	 * provided (estimated or guess size). */
2614	ret = lseek(prime_fd, 0, SEEK_END);
2615	if (ret != -1)
2616		bo_gem->bo.size = ret;
2617	else
2618		bo_gem->bo.size = size;
2619
2620	bo_gem->bo.handle = handle;
2621	bo_gem->bo.bufmgr = bufmgr;
2622
2623	bo_gem->gem_handle = handle;
2624	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
2625		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2626
2627	bo_gem->name = "prime";
2628	bo_gem->validate_index = -1;
2629	bo_gem->reloc_tree_fences = 0;
2630	bo_gem->used_as_reloc_target = false;
2631	bo_gem->has_error = false;
2632	bo_gem->reusable = false;
2633
2634	ret = get_tiling_mode(bufmgr_gem, handle,
2635			      &bo_gem->tiling_mode, &bo_gem->swizzle_mode);
2636	if (ret)
2637		goto err;
2638
2639	/* XXX stride is unknown */
2640	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2641
2642out:
2643	pthread_mutex_unlock(&bufmgr_gem->lock);
2644	return &bo_gem->bo;
2645
2646err:
2647	drm_intel_gem_bo_free(&bo_gem->bo);
2648	pthread_mutex_unlock(&bufmgr_gem->lock);
2649	return NULL;
2650}
2651
2652drm_public int
2653drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
2654{
2655	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2656	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2657
2658	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2659			       DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2660		return -errno;
2661
2662	bo_gem->reusable = false;
2663
2664	return 0;
2665}
2666
2667static int
2668drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
2669{
2670	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2671	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2672
2673	if (!bo_gem->global_name) {
2674		struct drm_gem_flink flink;
2675
2676		memclear(flink);
2677		flink.handle = bo_gem->gem_handle;
2678		if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink))
2679			return -errno;
2680
2681		pthread_mutex_lock(&bufmgr_gem->lock);
2682		if (!bo_gem->global_name) {
2683			bo_gem->global_name = flink.name;
2684			bo_gem->reusable = false;
2685
2686			HASH_ADD(name_hh, bufmgr_gem->name_table,
2687				 global_name, sizeof(bo_gem->global_name),
2688				 bo_gem);
2689		}
2690		pthread_mutex_unlock(&bufmgr_gem->lock);
2691	}
2692
2693	*name = bo_gem->global_name;
2694	return 0;
2695}
2696
2697/**
2698 * Enables unlimited caching of buffer objects for reuse.
2699 *
2700 * This is potentially very memory expensive, as the cache at each bucket
2701 * size is only bounded by how many buffers of that size we've managed to have
2702 * in flight at once.
2703 */
2704drm_public void
2705drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
2706{
2707	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2708
2709	bufmgr_gem->bo_reuse = true;
2710}
2711
2712/**
2713 * Disables implicit synchronisation before executing the bo
2714 *
2715 * This will cause rendering corruption unless you correctly manage explicit
2716 * fences for all rendering involving this buffer - including use by others.
2717 * Disabling the implicit serialisation is only required if that serialisation
2718 * is too coarse (for example, you have split the buffer into many
2719 * non-overlapping regions and are sharing the whole buffer between concurrent
2720 * independent command streams).
2721 *
2722 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2723 * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync,
2724 * or subsequent execbufs involving the bo will generate EINVAL.
2725 */
2726drm_public void
2727drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo)
2728{
2729	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2730
2731	bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2732}
2733
2734/**
2735 * Enables implicit synchronisation before executing the bo
2736 *
2737 * This is the default behaviour of the kernel, to wait upon prior writes
2738 * completing on the object before rendering with it, or to wait for prior
2739 * reads to complete before writing into the object.
2740 * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2741 * the kernel never to insert a stall before using the object. Then this
2742 * function can be used to restore the implicit sync before subsequent
2743 * rendering.
2744 */
2745drm_public void
2746drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo)
2747{
2748	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2749
2750	bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2751}
2752
2753/**
2754 * Query whether the kernel supports disabling of its implicit synchronisation
2755 * before execbuf. See drm_intel_gem_bo_disable_implicit_sync()
2756 */
2757drm_public int
2758drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr)
2759{
2760	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2761
2762	return bufmgr_gem->has_exec_async;
2763}
2764
2765/**
2766 * Enable use of fenced reloc type.
2767 *
2768 * New code should enable this to avoid unnecessary fence register
2769 * allocation.  If this option is not enabled, all relocs will have fence
2770 * register allocated.
2771 */
2772drm_public void
2773drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
2774{
2775	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2776	bufmgr_gem->fenced_relocs = true;
2777}
2778
2779/**
2780 * Return the additional aperture space required by the tree of buffer objects
2781 * rooted at bo.
2782 */
2783static int
2784drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
2785{
2786	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2787	int i;
2788	int total = 0;
2789
2790	if (bo == NULL || bo_gem->included_in_check_aperture)
2791		return 0;
2792
2793	total += bo->size;
2794	bo_gem->included_in_check_aperture = true;
2795
2796	for (i = 0; i < bo_gem->reloc_count; i++)
2797		total +=
2798		    drm_intel_gem_bo_get_aperture_space(bo_gem->
2799							reloc_target_info[i].bo);
2800
2801	return total;
2802}
2803
2804/**
2805 * Count the number of buffers in this list that need a fence reg
2806 *
2807 * If the count is greater than the number of available regs, we'll have
2808 * to ask the caller to resubmit a batch with fewer tiled buffers.
2809 *
2810 * This function over-counts if the same buffer is used multiple times.
2811 */
2812static unsigned int
2813drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
2814{
2815	int i;
2816	unsigned int total = 0;
2817
2818	for (i = 0; i < count; i++) {
2819		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2820
2821		if (bo_gem == NULL)
2822			continue;
2823
2824		total += bo_gem->reloc_tree_fences;
2825	}
2826	return total;
2827}
2828
2829/**
2830 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
2831 * for the next drm_intel_bufmgr_check_aperture_space() call.
2832 */
2833static void
2834drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
2835{
2836	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2837	int i;
2838
2839	if (bo == NULL || !bo_gem->included_in_check_aperture)
2840		return;
2841
2842	bo_gem->included_in_check_aperture = false;
2843
2844	for (i = 0; i < bo_gem->reloc_count; i++)
2845		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
2846							   reloc_target_info[i].bo);
2847}
2848
2849/**
2850 * Return a conservative estimate for the amount of aperture required
2851 * for a collection of buffers. This may double-count some buffers.
2852 */
2853static unsigned int
2854drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
2855{
2856	int i;
2857	unsigned int total = 0;
2858
2859	for (i = 0; i < count; i++) {
2860		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2861		if (bo_gem != NULL)
2862			total += bo_gem->reloc_tree_size;
2863	}
2864	return total;
2865}
2866
2867/**
2868 * Return the amount of aperture needed for a collection of buffers.
2869 * This avoids double counting any buffers, at the cost of looking
2870 * at every buffer in the set.
2871 */
2872static unsigned int
2873drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
2874{
2875	int i;
2876	unsigned int total = 0;
2877
2878	for (i = 0; i < count; i++) {
2879		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
2880		/* For the first buffer object in the array, we get an
2881		 * accurate count back for its reloc_tree size (since nothing
2882		 * had been flagged as being counted yet).  We can save that
2883		 * value out as a more conservative reloc_tree_size that
2884		 * avoids double-counting target buffers.  Since the first
2885		 * buffer happens to usually be the batch buffer in our
2886		 * callers, this can pull us back from doing the tree
2887		 * walk on every new batch emit.
2888		 */
2889		if (i == 0) {
2890			drm_intel_bo_gem *bo_gem =
2891			    (drm_intel_bo_gem *) bo_array[i];
2892			bo_gem->reloc_tree_size = total;
2893		}
2894	}
2895
2896	for (i = 0; i < count; i++)
2897		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
2898	return total;
2899}
2900
2901/**
2902 * Return -1 if the batchbuffer should be flushed before attempting to
2903 * emit rendering referencing the buffers pointed to by bo_array.
2904 *
2905 * This is required because if we try to emit a batchbuffer with relocations
2906 * to a tree of buffers that won't simultaneously fit in the aperture,
2907 * the rendering will return an error at a point where the software is not
2908 * prepared to recover from it.
2909 *
2910 * However, we also want to emit the batchbuffer significantly before we reach
2911 * the limit, as a series of batchbuffers each of which references buffers
2912 * covering almost all of the aperture means that at each emit we end up
2913 * waiting to evict a buffer from the last rendering, and we get synchronous
2914 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
2915 * get better parallelism.
2916 */
2917static int
2918drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
2919{
2920	drm_intel_bufmgr_gem *bufmgr_gem =
2921	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
2922	unsigned int total = 0;
2923	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2924	int total_fences;
2925
2926	/* Check for fence reg constraints if necessary */
2927	if (bufmgr_gem->available_fences) {
2928		total_fences = drm_intel_gem_total_fences(bo_array, count);
2929		if (total_fences > bufmgr_gem->available_fences)
2930			return -ENOSPC;
2931	}
2932
2933	total = drm_intel_gem_estimate_batch_space(bo_array, count);
2934
2935	if (total > threshold)
2936		total = drm_intel_gem_compute_batch_space(bo_array, count);
2937
2938	if (total > threshold) {
2939		DBG("check_space: overflowed available aperture, "
2940		    "%dkb vs %dkb\n",
2941		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
2942		return -ENOSPC;
2943	} else {
2944		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2945		    (int)bufmgr_gem->gtt_size / 1024);
2946		return 0;
2947	}
2948}
2949
2950/*
2951 * Disable buffer reuse for objects which are shared with the kernel
2952 * as scanout buffers
2953 */
2954static int
2955drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
2956{
2957	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2958
2959	bo_gem->reusable = false;
2960	return 0;
2961}
2962
2963static int
2964drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
2965{
2966	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2967
2968	return bo_gem->reusable;
2969}
2970
2971static int
2972_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2973{
2974	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2975	int i;
2976
2977	for (i = 0; i < bo_gem->reloc_count; i++) {
2978		if (bo_gem->reloc_target_info[i].bo == target_bo)
2979			return 1;
2980		if (bo == bo_gem->reloc_target_info[i].bo)
2981			continue;
2982		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2983						target_bo))
2984			return 1;
2985	}
2986
2987	for (i = 0; i< bo_gem->softpin_target_count; i++) {
2988		if (bo_gem->softpin_target[i] == target_bo)
2989			return 1;
2990		if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo))
2991			return 1;
2992	}
2993
2994	return 0;
2995}
2996
2997/** Return true if target_bo is referenced by bo's relocation tree. */
2998static int
2999drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
3000{
3001	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
3002
3003	if (bo == NULL || target_bo == NULL)
3004		return 0;
3005	if (target_bo_gem->used_as_reloc_target)
3006		return _drm_intel_gem_bo_references(bo, target_bo);
3007	return 0;
3008}
3009
3010static void
3011add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
3012{
3013	unsigned int i = bufmgr_gem->num_buckets;
3014
3015	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3016
3017	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3018	bufmgr_gem->cache_bucket[i].size = size;
3019	bufmgr_gem->num_buckets++;
3020}
3021
3022static void
3023init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
3024{
3025	unsigned long size, cache_max_size = 64 * 1024 * 1024;
3026
3027	/* OK, so power of two buckets was too wasteful of memory.
3028	 * Give 3 other sizes between each power of two, to hopefully
3029	 * cover things accurately enough.  (The alternative is
3030	 * probably to just go for exact matching of sizes, and assume
3031	 * that for things like composited window resize the tiled
3032	 * width/height alignment and rounding of sizes to pages will
3033	 * get us useful cache hit rates anyway)
3034	 */
3035	add_bucket(bufmgr_gem, 4096);
3036	add_bucket(bufmgr_gem, 4096 * 2);
3037	add_bucket(bufmgr_gem, 4096 * 3);
3038
3039	/* Initialize the linked lists for BO reuse cache. */
3040	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3041		add_bucket(bufmgr_gem, size);
3042
3043		add_bucket(bufmgr_gem, size + size * 1 / 4);
3044		add_bucket(bufmgr_gem, size + size * 2 / 4);
3045		add_bucket(bufmgr_gem, size + size * 3 / 4);
3046	}
3047}
3048
3049drm_public void
3050drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
3051{
3052	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3053
3054	bufmgr_gem->vma_max = limit;
3055
3056	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
3057}
3058
3059static int
3060parse_devid_override(const char *devid_override)
3061{
3062	static const struct {
3063		const char *name;
3064		int pci_id;
3065	} name_map[] = {
3066		{ "brw", PCI_CHIP_I965_GM },
3067		{ "g4x", PCI_CHIP_GM45_GM },
3068		{ "ilk", PCI_CHIP_ILD_G },
3069		{ "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS },
3070		{ "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 },
3071		{ "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 },
3072		{ "byt", PCI_CHIP_VALLEYVIEW_3 },
3073		{ "bdw", 0x1620 | BDW_ULX },
3074		{ "skl", PCI_CHIP_SKYLAKE_DT_GT2 },
3075		{ "kbl", PCI_CHIP_KABYLAKE_DT_GT2 },
3076	};
3077	unsigned int i;
3078
3079	for (i = 0; i < ARRAY_SIZE(name_map); i++) {
3080		if (!strcmp(name_map[i].name, devid_override))
3081			return name_map[i].pci_id;
3082	}
3083
3084	return strtod(devid_override, NULL);
3085}
3086
3087/**
3088 * Get the PCI ID for the device.  This can be overridden by setting the
3089 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
3090 */
3091static int
3092get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
3093{
3094	char *devid_override;
3095	int devid = 0;
3096	int ret;
3097	drm_i915_getparam_t gp;
3098
3099	if (geteuid() == getuid()) {
3100		devid_override = getenv("INTEL_DEVID_OVERRIDE");
3101		if (devid_override) {
3102			bufmgr_gem->no_exec = true;
3103			return parse_devid_override(devid_override);
3104		}
3105	}
3106
3107	memclear(gp);
3108	gp.param = I915_PARAM_CHIPSET_ID;
3109	gp.value = &devid;
3110	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3111	if (ret) {
3112		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
3113		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
3114	}
3115	return devid;
3116}
3117
3118drm_public int
3119drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
3120{
3121	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3122
3123	return bufmgr_gem->pci_device;
3124}
3125
3126/**
3127 * Sets the AUB filename.
3128 *
3129 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
3130 * for it to have any effect.
3131 */
3132drm_public void
3133drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
3134				      const char *filename)
3135{
3136}
3137
3138/**
3139 * Sets up AUB dumping.
3140 *
3141 * This is a trace file format that can be used with the simulator.
3142 * Packets are emitted in a format somewhat like GPU command packets.
3143 * You can set up a GTT and upload your objects into the referenced
3144 * space, then send off batchbuffers and get BMPs out the other end.
3145 */
3146drm_public void
3147drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
3148{
3149	fprintf(stderr, "libdrm aub dumping is deprecated.\n\n"
3150		"Use intel_aubdump from intel-gpu-tools instead.  Install intel-gpu-tools,\n"
3151		"then run (for example)\n\n"
3152		"\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n"
3153		"See the intel_aubdump man page for more details.\n");
3154}
3155
3156drm_public drm_intel_context *
3157drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
3158{
3159	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3160	struct drm_i915_gem_context_create create;
3161	drm_intel_context *context = NULL;
3162	int ret;
3163
3164	context = calloc(1, sizeof(*context));
3165	if (!context)
3166		return NULL;
3167
3168	memclear(create);
3169	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
3170	if (ret != 0) {
3171		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
3172		    strerror(errno));
3173		free(context);
3174		return NULL;
3175	}
3176
3177	context->ctx_id = create.ctx_id;
3178	context->bufmgr = bufmgr;
3179
3180	return context;
3181}
3182
3183drm_public int
3184drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id)
3185{
3186	if (ctx == NULL)
3187		return -EINVAL;
3188
3189	*ctx_id = ctx->ctx_id;
3190
3191	return 0;
3192}
3193
3194drm_public void
3195drm_intel_gem_context_destroy(drm_intel_context *ctx)
3196{
3197	drm_intel_bufmgr_gem *bufmgr_gem;
3198	struct drm_i915_gem_context_destroy destroy;
3199	int ret;
3200
3201	if (ctx == NULL)
3202		return;
3203
3204	memclear(destroy);
3205
3206	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3207	destroy.ctx_id = ctx->ctx_id;
3208	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
3209		       &destroy);
3210	if (ret != 0)
3211		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
3212			strerror(errno));
3213
3214	free(ctx);
3215}
3216
3217drm_public int
3218drm_intel_get_reset_stats(drm_intel_context *ctx,
3219			  uint32_t *reset_count,
3220			  uint32_t *active,
3221			  uint32_t *pending)
3222{
3223	drm_intel_bufmgr_gem *bufmgr_gem;
3224	struct drm_i915_reset_stats stats;
3225	int ret;
3226
3227	if (ctx == NULL)
3228		return -EINVAL;
3229
3230	memclear(stats);
3231
3232	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3233	stats.ctx_id = ctx->ctx_id;
3234	ret = drmIoctl(bufmgr_gem->fd,
3235		       DRM_IOCTL_I915_GET_RESET_STATS,
3236		       &stats);
3237	if (ret == 0) {
3238		if (reset_count != NULL)
3239			*reset_count = stats.reset_count;
3240
3241		if (active != NULL)
3242			*active = stats.batch_active;
3243
3244		if (pending != NULL)
3245			*pending = stats.batch_pending;
3246	}
3247
3248	return ret;
3249}
3250
3251drm_public int
3252drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
3253		   uint32_t offset,
3254		   uint64_t *result)
3255{
3256	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3257	struct drm_i915_reg_read reg_read;
3258	int ret;
3259
3260	memclear(reg_read);
3261	reg_read.offset = offset;
3262
3263	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
3264
3265	*result = reg_read.val;
3266	return ret;
3267}
3268
3269drm_public int
3270drm_intel_get_subslice_total(int fd, unsigned int *subslice_total)
3271{
3272	drm_i915_getparam_t gp;
3273	int ret;
3274
3275	memclear(gp);
3276	gp.value = (int*)subslice_total;
3277	gp.param = I915_PARAM_SUBSLICE_TOTAL;
3278	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3279	if (ret)
3280		return -errno;
3281
3282	return 0;
3283}
3284
3285drm_public int
3286drm_intel_get_eu_total(int fd, unsigned int *eu_total)
3287{
3288	drm_i915_getparam_t gp;
3289	int ret;
3290
3291	memclear(gp);
3292	gp.value = (int*)eu_total;
3293	gp.param = I915_PARAM_EU_TOTAL;
3294	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3295	if (ret)
3296		return -errno;
3297
3298	return 0;
3299}
3300
3301drm_public int
3302drm_intel_get_pooled_eu(int fd)
3303{
3304	drm_i915_getparam_t gp;
3305	int ret = -1;
3306
3307	memclear(gp);
3308	gp.param = I915_PARAM_HAS_POOLED_EU;
3309	gp.value = &ret;
3310	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3311		return -errno;
3312
3313	return ret;
3314}
3315
3316drm_public int
3317drm_intel_get_min_eu_in_pool(int fd)
3318{
3319	drm_i915_getparam_t gp;
3320	int ret = -1;
3321
3322	memclear(gp);
3323	gp.param = I915_PARAM_MIN_EU_IN_POOL;
3324	gp.value = &ret;
3325	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3326		return -errno;
3327
3328	return ret;
3329}
3330
3331/**
3332 * Annotate the given bo for use in aub dumping.
3333 *
3334 * \param annotations is an array of drm_intel_aub_annotation objects
3335 * describing the type of data in various sections of the bo.  Each
3336 * element of the array specifies the type and subtype of a section of
3337 * the bo, and the past-the-end offset of that section.  The elements
3338 * of \c annotations must be sorted so that ending_offset is
3339 * increasing.
3340 *
3341 * \param count is the number of elements in the \c annotations array.
3342 * If \c count is zero, then \c annotations will not be dereferenced.
3343 *
3344 * Annotations are copied into a private data structure, so caller may
3345 * re-use the memory pointed to by \c annotations after the call
3346 * returns.
3347 *
3348 * Annotations are stored for the lifetime of the bo; to reset to the
3349 * default state (no annotations), call this function with a \c count
3350 * of zero.
3351 */
3352drm_public void drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
3353					 drm_intel_aub_annotation *annotations,
3354					 unsigned count)
3355{
3356}
3357
3358static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3359static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3360
3361static drm_intel_bufmgr_gem *
3362drm_intel_bufmgr_gem_find(int fd)
3363{
3364	drm_intel_bufmgr_gem *bufmgr_gem;
3365
3366	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3367		if (bufmgr_gem->fd == fd) {
3368			atomic_inc(&bufmgr_gem->refcount);
3369			return bufmgr_gem;
3370		}
3371	}
3372
3373	return NULL;
3374}
3375
3376static void
3377drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
3378{
3379	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3380
3381	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1))
3382		return;
3383
3384	pthread_mutex_lock(&bufmgr_list_mutex);
3385
3386	if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
3387		DRMLISTDEL(&bufmgr_gem->managers);
3388		drm_intel_bufmgr_gem_destroy(bufmgr);
3389	}
3390
3391	pthread_mutex_unlock(&bufmgr_list_mutex);
3392}
3393
3394drm_public void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo)
3395{
3396	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3397	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3398
3399	if (bo_gem->gtt_virtual)
3400		return bo_gem->gtt_virtual;
3401
3402	if (bo_gem->is_userptr)
3403		return NULL;
3404
3405	pthread_mutex_lock(&bufmgr_gem->lock);
3406	if (bo_gem->gtt_virtual == NULL) {
3407		struct drm_i915_gem_mmap_gtt mmap_arg;
3408		void *ptr;
3409
3410		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
3411		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3412
3413		if (bo_gem->map_count++ == 0)
3414			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3415
3416		memclear(mmap_arg);
3417		mmap_arg.handle = bo_gem->gem_handle;
3418
3419		/* Get the fake offset back... */
3420		ptr = MAP_FAILED;
3421		if (drmIoctl(bufmgr_gem->fd,
3422			     DRM_IOCTL_I915_GEM_MMAP_GTT,
3423			     &mmap_arg) == 0) {
3424			/* and mmap it */
3425			ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
3426				       MAP_SHARED, bufmgr_gem->fd,
3427				       mmap_arg.offset);
3428		}
3429		if (ptr == MAP_FAILED) {
3430			if (--bo_gem->map_count == 0)
3431				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3432			ptr = NULL;
3433		}
3434
3435		bo_gem->gtt_virtual = ptr;
3436	}
3437	pthread_mutex_unlock(&bufmgr_gem->lock);
3438
3439	return bo_gem->gtt_virtual;
3440}
3441
3442drm_public void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo)
3443{
3444	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3445	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3446
3447	if (bo_gem->mem_virtual)
3448		return bo_gem->mem_virtual;
3449
3450	if (bo_gem->is_userptr) {
3451		/* Return the same user ptr */
3452		return bo_gem->user_virtual;
3453	}
3454
3455	pthread_mutex_lock(&bufmgr_gem->lock);
3456	if (!bo_gem->mem_virtual) {
3457		struct drm_i915_gem_mmap mmap_arg;
3458
3459		if (bo_gem->map_count++ == 0)
3460			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3461
3462		DBG("bo_map: %d (%s), map_count=%d\n",
3463		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3464
3465		memclear(mmap_arg);
3466		mmap_arg.handle = bo_gem->gem_handle;
3467		mmap_arg.size = bo->size;
3468		if (drmIoctl(bufmgr_gem->fd,
3469			     DRM_IOCTL_I915_GEM_MMAP,
3470			     &mmap_arg)) {
3471			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3472			    __FILE__, __LINE__, bo_gem->gem_handle,
3473			    bo_gem->name, strerror(errno));
3474			if (--bo_gem->map_count == 0)
3475				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3476		} else {
3477			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3478			bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3479		}
3480	}
3481	pthread_mutex_unlock(&bufmgr_gem->lock);
3482
3483	return bo_gem->mem_virtual;
3484}
3485
3486drm_public void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo)
3487{
3488	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3489	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3490
3491	if (bo_gem->wc_virtual)
3492		return bo_gem->wc_virtual;
3493
3494	if (bo_gem->is_userptr)
3495		return NULL;
3496
3497	pthread_mutex_lock(&bufmgr_gem->lock);
3498	if (!bo_gem->wc_virtual) {
3499		struct drm_i915_gem_mmap mmap_arg;
3500
3501		if (bo_gem->map_count++ == 0)
3502			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3503
3504		DBG("bo_map: %d (%s), map_count=%d\n",
3505		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3506
3507		memclear(mmap_arg);
3508		mmap_arg.handle = bo_gem->gem_handle;
3509		mmap_arg.size = bo->size;
3510		mmap_arg.flags = I915_MMAP_WC;
3511		if (drmIoctl(bufmgr_gem->fd,
3512			     DRM_IOCTL_I915_GEM_MMAP,
3513			     &mmap_arg)) {
3514			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3515			    __FILE__, __LINE__, bo_gem->gem_handle,
3516			    bo_gem->name, strerror(errno));
3517			if (--bo_gem->map_count == 0)
3518				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3519		} else {
3520			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3521			bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3522		}
3523	}
3524	pthread_mutex_unlock(&bufmgr_gem->lock);
3525
3526	return bo_gem->wc_virtual;
3527}
3528
3529/**
3530 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3531 * and manage map buffer objections.
3532 *
3533 * \param fd File descriptor of the opened DRM device.
3534 */
3535drm_public drm_intel_bufmgr *
3536drm_intel_bufmgr_gem_init(int fd, int batch_size)
3537{
3538	drm_intel_bufmgr_gem *bufmgr_gem;
3539	struct drm_i915_gem_get_aperture aperture;
3540	drm_i915_getparam_t gp;
3541	int ret, tmp;
3542
3543	pthread_mutex_lock(&bufmgr_list_mutex);
3544
3545	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
3546	if (bufmgr_gem)
3547		goto exit;
3548
3549	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3550	if (bufmgr_gem == NULL)
3551		goto exit;
3552
3553	bufmgr_gem->fd = fd;
3554	atomic_set(&bufmgr_gem->refcount, 1);
3555
3556	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3557		free(bufmgr_gem);
3558		bufmgr_gem = NULL;
3559		goto exit;
3560	}
3561
3562	memclear(aperture);
3563	ret = drmIoctl(bufmgr_gem->fd,
3564		       DRM_IOCTL_I915_GEM_GET_APERTURE,
3565		       &aperture);
3566
3567	if (ret == 0)
3568		bufmgr_gem->gtt_size = aperture.aper_available_size;
3569	else {
3570		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3571			strerror(errno));
3572		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3573		fprintf(stderr, "Assuming %dkB available aperture size.\n"
3574			"May lead to reduced performance or incorrect "
3575			"rendering.\n",
3576			(int)bufmgr_gem->gtt_size / 1024);
3577	}
3578
3579	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3580
3581	if (IS_GEN2(bufmgr_gem->pci_device))
3582		bufmgr_gem->gen = 2;
3583	else if (IS_GEN3(bufmgr_gem->pci_device))
3584		bufmgr_gem->gen = 3;
3585	else if (IS_GEN4(bufmgr_gem->pci_device))
3586		bufmgr_gem->gen = 4;
3587	else if (IS_GEN5(bufmgr_gem->pci_device))
3588		bufmgr_gem->gen = 5;
3589	else if (IS_GEN6(bufmgr_gem->pci_device))
3590		bufmgr_gem->gen = 6;
3591	else if (IS_GEN7(bufmgr_gem->pci_device))
3592		bufmgr_gem->gen = 7;
3593	else if (IS_GEN8(bufmgr_gem->pci_device))
3594		bufmgr_gem->gen = 8;
3595	else if (!intel_get_genx(bufmgr_gem->pci_device, &bufmgr_gem->gen)) {
3596		free(bufmgr_gem);
3597		bufmgr_gem = NULL;
3598		goto exit;
3599	}
3600
3601	if (IS_GEN3(bufmgr_gem->pci_device) &&
3602	    bufmgr_gem->gtt_size > 256*1024*1024) {
3603		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3604		 * be used for tiled blits. To simplify the accounting, just
3605		 * subtract the unmappable part (fixed to 256MB on all known
3606		 * gen3 devices) if the kernel advertises it. */
3607		bufmgr_gem->gtt_size -= 256*1024*1024;
3608	}
3609
3610	memclear(gp);
3611	gp.value = &tmp;
3612
3613	gp.param = I915_PARAM_HAS_EXECBUF2;
3614	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3615	if (ret) {
3616		fprintf(stderr, "i915 does not support EXECBUFER2\n");
3617		free(bufmgr_gem);
3618		bufmgr_gem = NULL;
3619        goto exit;
3620    }
3621
3622	gp.param = I915_PARAM_HAS_BSD;
3623	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3624	bufmgr_gem->has_bsd = ret == 0;
3625
3626	gp.param = I915_PARAM_HAS_BLT;
3627	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3628	bufmgr_gem->has_blt = ret == 0;
3629
3630	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
3631	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3632	bufmgr_gem->has_relaxed_fencing = ret == 0;
3633
3634	gp.param = I915_PARAM_HAS_EXEC_ASYNC;
3635	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3636	bufmgr_gem->has_exec_async = ret == 0;
3637
3638	bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr;
3639
3640	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3641	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3642	bufmgr_gem->has_wait_timeout = ret == 0;
3643
3644	gp.param = I915_PARAM_HAS_LLC;
3645	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3646	if (ret != 0) {
3647		/* Kernel does not supports HAS_LLC query, fallback to GPU
3648		 * generation detection and assume that we have LLC on GEN6/7
3649		 */
3650		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3651				IS_GEN7(bufmgr_gem->pci_device));
3652	} else
3653		bufmgr_gem->has_llc = *gp.value;
3654
3655	gp.param = I915_PARAM_HAS_VEBOX;
3656	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3657	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3658
3659	gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
3660	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3661	if (ret == 0 && *gp.value > 0)
3662		bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset;
3663
3664	if (bufmgr_gem->gen < 4) {
3665		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
3666		gp.value = &bufmgr_gem->available_fences;
3667		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3668		if (ret) {
3669			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
3670				errno);
3671			fprintf(stderr, "param: %d, val: %d\n", gp.param,
3672				*gp.value);
3673			bufmgr_gem->available_fences = 0;
3674		} else {
3675			/* XXX The kernel reports the total number of fences,
3676			 * including any that may be pinned.
3677			 *
3678			 * We presume that there will be at least one pinned
3679			 * fence for the scanout buffer, but there may be more
3680			 * than one scanout and the user may be manually
3681			 * pinning buffers. Let's move to execbuffer2 and
3682			 * thereby forget the insanity of using fences...
3683			 */
3684			bufmgr_gem->available_fences -= 2;
3685			if (bufmgr_gem->available_fences < 0)
3686				bufmgr_gem->available_fences = 0;
3687		}
3688	}
3689
3690	if (bufmgr_gem->gen >= 8) {
3691		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
3692		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3693		if (ret == 0 && *gp.value == 3)
3694			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
3695	}
3696
3697	/* Let's go with one relocation per every 2 dwords (but round down a bit
3698	 * since a power of two will mean an extra page allocation for the reloc
3699	 * buffer).
3700	 *
3701	 * Every 4 was too few for the blender benchmark.
3702	 */
3703	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3704
3705	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
3706	bufmgr_gem->bufmgr.bo_alloc_for_render =
3707	    drm_intel_gem_bo_alloc_for_render;
3708	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
3709	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
3710	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
3711	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
3712	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
3713	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
3714	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
3715	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
3716	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
3717	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
3718	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
3719	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
3720	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
3721	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
3722	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
3723	bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
3724	bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3725	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
3726	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3727	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
3728	bufmgr_gem->bufmgr.debug = 0;
3729	bufmgr_gem->bufmgr.check_aperture_space =
3730	    drm_intel_gem_check_aperture_space;
3731	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3732	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
3733	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
3734	    drm_intel_gem_get_pipe_from_crtc_id;
3735	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
3736
3737	init_cache_buckets(bufmgr_gem);
3738
3739	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
3740	bufmgr_gem->vma_max = -1; /* unlimited by default */
3741
3742	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3743
3744exit:
3745	pthread_mutex_unlock(&bufmgr_list_mutex);
3746
3747	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
3748}
3749