intel_bufmgr_gem.c revision d6e8b34d
1/**************************************************************************
2 *
3 * Copyright � 2007 Red Hat Inc.
4 * Copyright � 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30/*
31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32 *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 *	    Eric Anholt <eric@anholt.net>
34 *	    Dave Airlie <airlied@linux.ie>
35 */
36
37#include <xf86drm.h>
38#include <xf86atomic.h>
39#include <fcntl.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44#include <assert.h>
45#include <pthread.h>
46#include <stddef.h>
47#include <sys/ioctl.h>
48#include <sys/stat.h>
49#include <sys/types.h>
50#include <stdbool.h>
51
52#include "errno.h"
53#ifndef ETIME
54#define ETIME ETIMEDOUT
55#endif
56#include "libdrm_macros.h"
57#include "libdrm_lists.h"
58#include "intel_bufmgr.h"
59#include "intel_bufmgr_priv.h"
60#include "intel_chipset.h"
61#include "string.h"
62
63#include "i915_drm.h"
64#include "uthash.h"
65
66#if HAVE_VALGRIND
67#include <valgrind.h>
68#include <memcheck.h>
69#define VG(x) x
70#else
71#define VG(x)
72#endif
73
74#define memclear(s) memset(&s, 0, sizeof(s))
75
76#define DBG(...) do {					\
77	if (bufmgr_gem->bufmgr.debug)			\
78		fprintf(stderr, __VA_ARGS__);		\
79} while (0)
80
81#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
82#define MAX2(A, B) ((A) > (B) ? (A) : (B))
83
84/**
85 * upper_32_bits - return bits 32-63 of a number
86 * @n: the number we're accessing
87 *
88 * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
89 * the "right shift count >= width of type" warning when that quantity is
90 * 32-bits.
91 */
92#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
93
94/**
95 * lower_32_bits - return bits 0-31 of a number
96 * @n: the number we're accessing
97 */
98#define lower_32_bits(n) ((__u32)(n))
99
100typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
101
102struct drm_intel_gem_bo_bucket {
103	drmMMListHead head;
104	unsigned long size;
105};
106
107typedef struct _drm_intel_bufmgr_gem {
108	drm_intel_bufmgr bufmgr;
109
110	atomic_t refcount;
111
112	int fd;
113
114	int max_relocs;
115
116	pthread_mutex_t lock;
117
118	struct drm_i915_gem_exec_object *exec_objects;
119	struct drm_i915_gem_exec_object2 *exec2_objects;
120	drm_intel_bo **exec_bos;
121	int exec_size;
122	int exec_count;
123
124	/** Array of lists of cached gem objects of power-of-two sizes */
125	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
126	int num_buckets;
127	time_t time;
128
129	drmMMListHead managers;
130
131	drm_intel_bo_gem *name_table;
132	drm_intel_bo_gem *handle_table;
133
134	drmMMListHead vma_cache;
135	int vma_count, vma_open, vma_max;
136
137	uint64_t gtt_size;
138	int available_fences;
139	int pci_device;
140	int gen;
141	unsigned int has_bsd : 1;
142	unsigned int has_blt : 1;
143	unsigned int has_relaxed_fencing : 1;
144	unsigned int has_llc : 1;
145	unsigned int has_wait_timeout : 1;
146	unsigned int bo_reuse : 1;
147	unsigned int no_exec : 1;
148	unsigned int has_vebox : 1;
149	unsigned int has_exec_async : 1;
150	bool fenced_relocs;
151
152	struct {
153		void *ptr;
154		uint32_t handle;
155	} userptr_active;
156
157} drm_intel_bufmgr_gem;
158
159#define DRM_INTEL_RELOC_FENCE (1<<0)
160
161typedef struct _drm_intel_reloc_target_info {
162	drm_intel_bo *bo;
163	int flags;
164} drm_intel_reloc_target;
165
166struct _drm_intel_bo_gem {
167	drm_intel_bo bo;
168
169	atomic_t refcount;
170	uint32_t gem_handle;
171	const char *name;
172
173	/**
174	 * Kenel-assigned global name for this object
175         *
176         * List contains both flink named and prime fd'd objects
177	 */
178	unsigned int global_name;
179
180	UT_hash_handle handle_hh;
181	UT_hash_handle name_hh;
182
183	/**
184	 * Index of the buffer within the validation list while preparing a
185	 * batchbuffer execution.
186	 */
187	int validate_index;
188
189	/**
190	 * Current tiling mode
191	 */
192	uint32_t tiling_mode;
193	uint32_t swizzle_mode;
194	unsigned long stride;
195
196	unsigned long kflags;
197
198	time_t free_time;
199
200	/** Array passed to the DRM containing relocation information. */
201	struct drm_i915_gem_relocation_entry *relocs;
202	/**
203	 * Array of info structs corresponding to relocs[i].target_handle etc
204	 */
205	drm_intel_reloc_target *reloc_target_info;
206	/** Number of entries in relocs */
207	int reloc_count;
208	/** Array of BOs that are referenced by this buffer and will be softpinned */
209	drm_intel_bo **softpin_target;
210	/** Number softpinned BOs that are referenced by this buffer */
211	int softpin_target_count;
212	/** Maximum amount of softpinned BOs that are referenced by this buffer */
213	int softpin_target_size;
214
215	/** Mapped address for the buffer, saved across map/unmap cycles */
216	void *mem_virtual;
217	/** GTT virtual address for the buffer, saved across map/unmap cycles */
218	void *gtt_virtual;
219	/** WC CPU address for the buffer, saved across map/unmap cycles */
220	void *wc_virtual;
221	/**
222	 * Virtual address of the buffer allocated by user, used for userptr
223	 * objects only.
224	 */
225	void *user_virtual;
226	int map_count;
227	drmMMListHead vma_list;
228
229	/** BO cache list */
230	drmMMListHead head;
231
232	/**
233	 * Boolean of whether this BO and its children have been included in
234	 * the current drm_intel_bufmgr_check_aperture_space() total.
235	 */
236	bool included_in_check_aperture;
237
238	/**
239	 * Boolean of whether this buffer has been used as a relocation
240	 * target and had its size accounted for, and thus can't have any
241	 * further relocations added to it.
242	 */
243	bool used_as_reloc_target;
244
245	/**
246	 * Boolean of whether we have encountered an error whilst building the relocation tree.
247	 */
248	bool has_error;
249
250	/**
251	 * Boolean of whether this buffer can be re-used
252	 */
253	bool reusable;
254
255	/**
256	 * Boolean of whether the GPU is definitely not accessing the buffer.
257	 *
258	 * This is only valid when reusable, since non-reusable
259	 * buffers are those that have been shared with other
260	 * processes, so we don't know their state.
261	 */
262	bool idle;
263
264	/**
265	 * Boolean of whether this buffer was allocated with userptr
266	 */
267	bool is_userptr;
268
269	/**
270	 * Size in bytes of this buffer and its relocation descendents.
271	 *
272	 * Used to avoid costly tree walking in
273	 * drm_intel_bufmgr_check_aperture in the common case.
274	 */
275	int reloc_tree_size;
276
277	/**
278	 * Number of potential fence registers required by this buffer and its
279	 * relocations.
280	 */
281	int reloc_tree_fences;
282
283	/** Flags that we may need to do the SW_FINISH ioctl on unmap. */
284	bool mapped_cpu_write;
285};
286
287static unsigned int
288drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
289
290static unsigned int
291drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
292
293static int
294drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
295			    uint32_t * swizzle_mode);
296
297static int
298drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
299				     uint32_t tiling_mode,
300				     uint32_t stride);
301
302static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
303						      time_t time);
304
305static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
306
307static void drm_intel_gem_bo_free(drm_intel_bo *bo);
308
309static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo)
310{
311        return (drm_intel_bo_gem *)bo;
312}
313
314static unsigned long
315drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
316			   uint32_t *tiling_mode)
317{
318	unsigned long min_size, max_size;
319	unsigned long i;
320
321	if (*tiling_mode == I915_TILING_NONE)
322		return size;
323
324	/* 965+ just need multiples of page size for tiling */
325	if (bufmgr_gem->gen >= 4)
326		return ROUND_UP_TO(size, 4096);
327
328	/* Older chips need powers of two, of at least 512k or 1M */
329	if (bufmgr_gem->gen == 3) {
330		min_size = 1024*1024;
331		max_size = 128*1024*1024;
332	} else {
333		min_size = 512*1024;
334		max_size = 64*1024*1024;
335	}
336
337	if (size > max_size) {
338		*tiling_mode = I915_TILING_NONE;
339		return size;
340	}
341
342	/* Do we need to allocate every page for the fence? */
343	if (bufmgr_gem->has_relaxed_fencing)
344		return ROUND_UP_TO(size, 4096);
345
346	for (i = min_size; i < size; i <<= 1)
347		;
348
349	return i;
350}
351
352/*
353 * Round a given pitch up to the minimum required for X tiling on a
354 * given chip.  We use 512 as the minimum to allow for a later tiling
355 * change.
356 */
357static unsigned long
358drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
359			    unsigned long pitch, uint32_t *tiling_mode)
360{
361	unsigned long tile_width;
362	unsigned long i;
363
364	/* If untiled, then just align it so that we can do rendering
365	 * to it with the 3D engine.
366	 */
367	if (*tiling_mode == I915_TILING_NONE)
368		return ALIGN(pitch, 64);
369
370	if (*tiling_mode == I915_TILING_X
371			|| (IS_915(bufmgr_gem->pci_device)
372			    && *tiling_mode == I915_TILING_Y))
373		tile_width = 512;
374	else
375		tile_width = 128;
376
377	/* 965 is flexible */
378	if (bufmgr_gem->gen >= 4)
379		return ROUND_UP_TO(pitch, tile_width);
380
381	/* The older hardware has a maximum pitch of 8192 with tiled
382	 * surfaces, so fallback to untiled if it's too large.
383	 */
384	if (pitch > 8192) {
385		*tiling_mode = I915_TILING_NONE;
386		return ALIGN(pitch, 64);
387	}
388
389	/* Pre-965 needs power of two tile width */
390	for (i = tile_width; i < pitch; i <<= 1)
391		;
392
393	return i;
394}
395
396static struct drm_intel_gem_bo_bucket *
397drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
398				 unsigned long size)
399{
400	int i;
401
402	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
403		struct drm_intel_gem_bo_bucket *bucket =
404		    &bufmgr_gem->cache_bucket[i];
405		if (bucket->size >= size) {
406			return bucket;
407		}
408	}
409
410	return NULL;
411}
412
413static void
414drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
415{
416	int i, j;
417
418	for (i = 0; i < bufmgr_gem->exec_count; i++) {
419		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
420		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
421
422		if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
423			DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
424			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
425			    bo_gem->name);
426			continue;
427		}
428
429		for (j = 0; j < bo_gem->reloc_count; j++) {
430			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
431			drm_intel_bo_gem *target_gem =
432			    (drm_intel_bo_gem *) target_bo;
433
434			DBG("%2d: %d %s(%s)@0x%08x %08x -> "
435			    "%d (%s)@0x%08x %08x + 0x%08x\n",
436			    i,
437			    bo_gem->gem_handle,
438			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
439			    bo_gem->name,
440			    upper_32_bits(bo_gem->relocs[j].offset),
441			    lower_32_bits(bo_gem->relocs[j].offset),
442			    target_gem->gem_handle,
443			    target_gem->name,
444			    upper_32_bits(target_bo->offset64),
445			    lower_32_bits(target_bo->offset64),
446			    bo_gem->relocs[j].delta);
447		}
448
449		for (j = 0; j < bo_gem->softpin_target_count; j++) {
450			drm_intel_bo *target_bo = bo_gem->softpin_target[j];
451			drm_intel_bo_gem *target_gem =
452			    (drm_intel_bo_gem *) target_bo;
453			DBG("%2d: %d %s(%s) -> "
454			    "%d *(%s)@0x%08x %08x\n",
455			    i,
456			    bo_gem->gem_handle,
457			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
458			    bo_gem->name,
459			    target_gem->gem_handle,
460			    target_gem->name,
461			    upper_32_bits(target_bo->offset64),
462			    lower_32_bits(target_bo->offset64));
463		}
464	}
465}
466
467static inline void
468drm_intel_gem_bo_reference(drm_intel_bo *bo)
469{
470	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
471
472	atomic_inc(&bo_gem->refcount);
473}
474
475/**
476 * Adds the given buffer to the list of buffers to be validated (moved into the
477 * appropriate memory type) with the next batch submission.
478 *
479 * If a buffer is validated multiple times in a batch submission, it ends up
480 * with the intersection of the memory type flags and the union of the
481 * access flags.
482 */
483static void
484drm_intel_add_validate_buffer(drm_intel_bo *bo)
485{
486	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
487	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
488	int index;
489
490	if (bo_gem->validate_index != -1)
491		return;
492
493	/* Extend the array of validation entries as necessary. */
494	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
495		int new_size = bufmgr_gem->exec_size * 2;
496
497		if (new_size == 0)
498			new_size = 5;
499
500		bufmgr_gem->exec_objects =
501		    realloc(bufmgr_gem->exec_objects,
502			    sizeof(*bufmgr_gem->exec_objects) * new_size);
503		bufmgr_gem->exec_bos =
504		    realloc(bufmgr_gem->exec_bos,
505			    sizeof(*bufmgr_gem->exec_bos) * new_size);
506		bufmgr_gem->exec_size = new_size;
507	}
508
509	index = bufmgr_gem->exec_count;
510	bo_gem->validate_index = index;
511	/* Fill in array entry */
512	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
513	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
514	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
515	bufmgr_gem->exec_objects[index].alignment = bo->align;
516	bufmgr_gem->exec_objects[index].offset = 0;
517	bufmgr_gem->exec_bos[index] = bo;
518	bufmgr_gem->exec_count++;
519}
520
521static void
522drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
523{
524	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
525	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
526	int index;
527	unsigned long flags;
528
529	flags = 0;
530	if (need_fence)
531		flags |= EXEC_OBJECT_NEEDS_FENCE;
532
533	if (bo_gem->validate_index != -1) {
534		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
535		return;
536	}
537
538	/* Extend the array of validation entries as necessary. */
539	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
540		int new_size = bufmgr_gem->exec_size * 2;
541
542		if (new_size == 0)
543			new_size = 5;
544
545		bufmgr_gem->exec2_objects =
546			realloc(bufmgr_gem->exec2_objects,
547				sizeof(*bufmgr_gem->exec2_objects) * new_size);
548		bufmgr_gem->exec_bos =
549			realloc(bufmgr_gem->exec_bos,
550				sizeof(*bufmgr_gem->exec_bos) * new_size);
551		bufmgr_gem->exec_size = new_size;
552	}
553
554	index = bufmgr_gem->exec_count;
555	bo_gem->validate_index = index;
556	/* Fill in array entry */
557	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
558	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
559	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
560	bufmgr_gem->exec2_objects[index].alignment = bo->align;
561	bufmgr_gem->exec2_objects[index].offset = bo->offset64;
562	bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags;
563	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
564	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
565	bufmgr_gem->exec_bos[index] = bo;
566	bufmgr_gem->exec_count++;
567}
568
569#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
570	sizeof(uint32_t))
571
572static void
573drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
574				      drm_intel_bo_gem *bo_gem,
575				      unsigned int alignment)
576{
577	unsigned int size;
578
579	assert(!bo_gem->used_as_reloc_target);
580
581	/* The older chipsets are far-less flexible in terms of tiling,
582	 * and require tiled buffer to be size aligned in the aperture.
583	 * This means that in the worst possible case we will need a hole
584	 * twice as large as the object in order for it to fit into the
585	 * aperture. Optimal packing is for wimps.
586	 */
587	size = bo_gem->bo.size;
588	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
589		unsigned int min_size;
590
591		if (bufmgr_gem->has_relaxed_fencing) {
592			if (bufmgr_gem->gen == 3)
593				min_size = 1024*1024;
594			else
595				min_size = 512*1024;
596
597			while (min_size < size)
598				min_size *= 2;
599		} else
600			min_size = size;
601
602		/* Account for worst-case alignment. */
603		alignment = MAX2(alignment, min_size);
604	}
605
606	bo_gem->reloc_tree_size = size + alignment;
607}
608
609static int
610drm_intel_setup_reloc_list(drm_intel_bo *bo)
611{
612	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
613	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
614	unsigned int max_relocs = bufmgr_gem->max_relocs;
615
616	if (bo->size / 4 < max_relocs)
617		max_relocs = bo->size / 4;
618
619	bo_gem->relocs = malloc(max_relocs *
620				sizeof(struct drm_i915_gem_relocation_entry));
621	bo_gem->reloc_target_info = malloc(max_relocs *
622					   sizeof(drm_intel_reloc_target));
623	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
624		bo_gem->has_error = true;
625
626		free (bo_gem->relocs);
627		bo_gem->relocs = NULL;
628
629		free (bo_gem->reloc_target_info);
630		bo_gem->reloc_target_info = NULL;
631
632		return 1;
633	}
634
635	return 0;
636}
637
638static int
639drm_intel_gem_bo_busy(drm_intel_bo *bo)
640{
641	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
642	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
643	struct drm_i915_gem_busy busy;
644	int ret;
645
646	if (bo_gem->reusable && bo_gem->idle)
647		return false;
648
649	memclear(busy);
650	busy.handle = bo_gem->gem_handle;
651
652	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
653	if (ret == 0) {
654		bo_gem->idle = !busy.busy;
655		return busy.busy;
656	} else {
657		return false;
658	}
659}
660
661static int
662drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
663				  drm_intel_bo_gem *bo_gem, int state)
664{
665	struct drm_i915_gem_madvise madv;
666
667	memclear(madv);
668	madv.handle = bo_gem->gem_handle;
669	madv.madv = state;
670	madv.retained = 1;
671	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
672
673	return madv.retained;
674}
675
676static int
677drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
678{
679	return drm_intel_gem_bo_madvise_internal
680		((drm_intel_bufmgr_gem *) bo->bufmgr,
681		 (drm_intel_bo_gem *) bo,
682		 madv);
683}
684
685/* drop the oldest entries that have been purged by the kernel */
686static void
687drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
688				    struct drm_intel_gem_bo_bucket *bucket)
689{
690	while (!DRMLISTEMPTY(&bucket->head)) {
691		drm_intel_bo_gem *bo_gem;
692
693		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
694				      bucket->head.next, head);
695		if (drm_intel_gem_bo_madvise_internal
696		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
697			break;
698
699		DRMLISTDEL(&bo_gem->head);
700		drm_intel_gem_bo_free(&bo_gem->bo);
701	}
702}
703
704static drm_intel_bo *
705drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
706				const char *name,
707				unsigned long size,
708				unsigned long flags,
709				uint32_t tiling_mode,
710				unsigned long stride,
711				unsigned int alignment)
712{
713	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
714	drm_intel_bo_gem *bo_gem;
715	unsigned int page_size = getpagesize();
716	int ret;
717	struct drm_intel_gem_bo_bucket *bucket;
718	bool alloc_from_cache;
719	unsigned long bo_size;
720	bool for_render = false;
721
722	if (flags & BO_ALLOC_FOR_RENDER)
723		for_render = true;
724
725	/* Round the allocated size up to a power of two number of pages. */
726	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
727
728	/* If we don't have caching at this size, don't actually round the
729	 * allocation up.
730	 */
731	if (bucket == NULL) {
732		bo_size = size;
733		if (bo_size < page_size)
734			bo_size = page_size;
735	} else {
736		bo_size = bucket->size;
737	}
738
739	pthread_mutex_lock(&bufmgr_gem->lock);
740	/* Get a buffer out of the cache if available */
741retry:
742	alloc_from_cache = false;
743	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
744		if (for_render) {
745			/* Allocate new render-target BOs from the tail (MRU)
746			 * of the list, as it will likely be hot in the GPU
747			 * cache and in the aperture for us.
748			 */
749			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
750					      bucket->head.prev, head);
751			DRMLISTDEL(&bo_gem->head);
752			alloc_from_cache = true;
753			bo_gem->bo.align = alignment;
754		} else {
755			assert(alignment == 0);
756			/* For non-render-target BOs (where we're probably
757			 * going to map it first thing in order to fill it
758			 * with data), check if the last BO in the cache is
759			 * unbusy, and only reuse in that case. Otherwise,
760			 * allocating a new buffer is probably faster than
761			 * waiting for the GPU to finish.
762			 */
763			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
764					      bucket->head.next, head);
765			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
766				alloc_from_cache = true;
767				DRMLISTDEL(&bo_gem->head);
768			}
769		}
770
771		if (alloc_from_cache) {
772			if (!drm_intel_gem_bo_madvise_internal
773			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
774				drm_intel_gem_bo_free(&bo_gem->bo);
775				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
776								    bucket);
777				goto retry;
778			}
779
780			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
781								 tiling_mode,
782								 stride)) {
783				drm_intel_gem_bo_free(&bo_gem->bo);
784				goto retry;
785			}
786		}
787	}
788
789	if (!alloc_from_cache) {
790		struct drm_i915_gem_create create;
791
792		bo_gem = calloc(1, sizeof(*bo_gem));
793		if (!bo_gem)
794			goto err;
795
796		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
797		   list (vma_list), so better set the list head here */
798		DRMINITLISTHEAD(&bo_gem->vma_list);
799
800		bo_gem->bo.size = bo_size;
801
802		memclear(create);
803		create.size = bo_size;
804
805		ret = drmIoctl(bufmgr_gem->fd,
806			       DRM_IOCTL_I915_GEM_CREATE,
807			       &create);
808		if (ret != 0) {
809			free(bo_gem);
810			goto err;
811		}
812
813		bo_gem->gem_handle = create.handle;
814		HASH_ADD(handle_hh, bufmgr_gem->handle_table,
815			 gem_handle, sizeof(bo_gem->gem_handle),
816			 bo_gem);
817
818		bo_gem->bo.handle = bo_gem->gem_handle;
819		bo_gem->bo.bufmgr = bufmgr;
820		bo_gem->bo.align = alignment;
821
822		bo_gem->tiling_mode = I915_TILING_NONE;
823		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
824		bo_gem->stride = 0;
825
826		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
827							 tiling_mode,
828							 stride))
829			goto err_free;
830	}
831
832	bo_gem->name = name;
833	atomic_set(&bo_gem->refcount, 1);
834	bo_gem->validate_index = -1;
835	bo_gem->reloc_tree_fences = 0;
836	bo_gem->used_as_reloc_target = false;
837	bo_gem->has_error = false;
838	bo_gem->reusable = true;
839
840	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
841	pthread_mutex_unlock(&bufmgr_gem->lock);
842
843	DBG("bo_create: buf %d (%s) %ldb\n",
844	    bo_gem->gem_handle, bo_gem->name, size);
845
846	return &bo_gem->bo;
847
848err_free:
849	drm_intel_gem_bo_free(&bo_gem->bo);
850err:
851	pthread_mutex_unlock(&bufmgr_gem->lock);
852	return NULL;
853}
854
855static drm_intel_bo *
856drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
857				  const char *name,
858				  unsigned long size,
859				  unsigned int alignment)
860{
861	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
862					       BO_ALLOC_FOR_RENDER,
863					       I915_TILING_NONE, 0,
864					       alignment);
865}
866
867static drm_intel_bo *
868drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
869		       const char *name,
870		       unsigned long size,
871		       unsigned int alignment)
872{
873	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
874					       I915_TILING_NONE, 0, 0);
875}
876
877static drm_intel_bo *
878drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
879			     int x, int y, int cpp, uint32_t *tiling_mode,
880			     unsigned long *pitch, unsigned long flags)
881{
882	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
883	unsigned long size, stride;
884	uint32_t tiling;
885
886	do {
887		unsigned long aligned_y, height_alignment;
888
889		tiling = *tiling_mode;
890
891		/* If we're tiled, our allocations are in 8 or 32-row blocks,
892		 * so failure to align our height means that we won't allocate
893		 * enough pages.
894		 *
895		 * If we're untiled, we still have to align to 2 rows high
896		 * because the data port accesses 2x2 blocks even if the
897		 * bottom row isn't to be rendered, so failure to align means
898		 * we could walk off the end of the GTT and fault.  This is
899		 * documented on 965, and may be the case on older chipsets
900		 * too so we try to be careful.
901		 */
902		aligned_y = y;
903		height_alignment = 2;
904
905		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
906			height_alignment = 16;
907		else if (tiling == I915_TILING_X
908			|| (IS_915(bufmgr_gem->pci_device)
909			    && tiling == I915_TILING_Y))
910			height_alignment = 8;
911		else if (tiling == I915_TILING_Y)
912			height_alignment = 32;
913		aligned_y = ALIGN(y, height_alignment);
914
915		stride = x * cpp;
916		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
917		size = stride * aligned_y;
918		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
919	} while (*tiling_mode != tiling);
920	*pitch = stride;
921
922	if (tiling == I915_TILING_NONE)
923		stride = 0;
924
925	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
926					       tiling, stride, 0);
927}
928
929static drm_intel_bo *
930drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
931				const char *name,
932				void *addr,
933				uint32_t tiling_mode,
934				uint32_t stride,
935				unsigned long size,
936				unsigned long flags)
937{
938	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
939	drm_intel_bo_gem *bo_gem;
940	int ret;
941	struct drm_i915_gem_userptr userptr;
942
943	/* Tiling with userptr surfaces is not supported
944	 * on all hardware so refuse it for time being.
945	 */
946	if (tiling_mode != I915_TILING_NONE)
947		return NULL;
948
949	bo_gem = calloc(1, sizeof(*bo_gem));
950	if (!bo_gem)
951		return NULL;
952
953	atomic_set(&bo_gem->refcount, 1);
954	DRMINITLISTHEAD(&bo_gem->vma_list);
955
956	bo_gem->bo.size = size;
957
958	memclear(userptr);
959	userptr.user_ptr = (__u64)((unsigned long)addr);
960	userptr.user_size = size;
961	userptr.flags = flags;
962
963	ret = drmIoctl(bufmgr_gem->fd,
964			DRM_IOCTL_I915_GEM_USERPTR,
965			&userptr);
966	if (ret != 0) {
967		DBG("bo_create_userptr: "
968		    "ioctl failed with user ptr %p size 0x%lx, "
969		    "user flags 0x%lx\n", addr, size, flags);
970		free(bo_gem);
971		return NULL;
972	}
973
974	pthread_mutex_lock(&bufmgr_gem->lock);
975
976	bo_gem->gem_handle = userptr.handle;
977	bo_gem->bo.handle = bo_gem->gem_handle;
978	bo_gem->bo.bufmgr    = bufmgr;
979	bo_gem->is_userptr   = true;
980	bo_gem->bo.virtual   = addr;
981	/* Save the address provided by user */
982	bo_gem->user_virtual = addr;
983	bo_gem->tiling_mode  = I915_TILING_NONE;
984	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
985	bo_gem->stride       = 0;
986
987	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
988		 gem_handle, sizeof(bo_gem->gem_handle),
989		 bo_gem);
990
991	bo_gem->name = name;
992	bo_gem->validate_index = -1;
993	bo_gem->reloc_tree_fences = 0;
994	bo_gem->used_as_reloc_target = false;
995	bo_gem->has_error = false;
996	bo_gem->reusable = false;
997
998	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
999	pthread_mutex_unlock(&bufmgr_gem->lock);
1000
1001	DBG("bo_create_userptr: "
1002	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
1003		addr, bo_gem->gem_handle, bo_gem->name,
1004		size, stride, tiling_mode);
1005
1006	return &bo_gem->bo;
1007}
1008
1009static bool
1010has_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
1011{
1012	int ret;
1013	void *ptr;
1014	long pgsz;
1015	struct drm_i915_gem_userptr userptr;
1016
1017	pgsz = sysconf(_SC_PAGESIZE);
1018	assert(pgsz > 0);
1019
1020	ret = posix_memalign(&ptr, pgsz, pgsz);
1021	if (ret) {
1022		DBG("Failed to get a page (%ld) for userptr detection!\n",
1023			pgsz);
1024		return false;
1025	}
1026
1027	memclear(userptr);
1028	userptr.user_ptr = (__u64)(unsigned long)ptr;
1029	userptr.user_size = pgsz;
1030
1031retry:
1032	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
1033	if (ret) {
1034		if (errno == ENODEV && userptr.flags == 0) {
1035			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
1036			goto retry;
1037		}
1038		free(ptr);
1039		return false;
1040	}
1041
1042	/* We don't release the userptr bo here as we want to keep the
1043	 * kernel mm tracking alive for our lifetime. The first time we
1044	 * create a userptr object the kernel has to install a mmu_notifer
1045	 * which is a heavyweight operation (e.g. it requires taking all
1046	 * mm_locks and stop_machine()).
1047	 */
1048
1049	bufmgr_gem->userptr_active.ptr = ptr;
1050	bufmgr_gem->userptr_active.handle = userptr.handle;
1051
1052	return true;
1053}
1054
1055static drm_intel_bo *
1056check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
1057		       const char *name,
1058		       void *addr,
1059		       uint32_t tiling_mode,
1060		       uint32_t stride,
1061		       unsigned long size,
1062		       unsigned long flags)
1063{
1064	if (has_userptr((drm_intel_bufmgr_gem *)bufmgr))
1065		bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr;
1066	else
1067		bufmgr->bo_alloc_userptr = NULL;
1068
1069	return drm_intel_bo_alloc_userptr(bufmgr, name, addr,
1070					  tiling_mode, stride, size, flags);
1071}
1072
1073/**
1074 * Returns a drm_intel_bo wrapping the given buffer object handle.
1075 *
1076 * This can be used when one application needs to pass a buffer object
1077 * to another.
1078 */
1079drm_public drm_intel_bo *
1080drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
1081				  const char *name,
1082				  unsigned int handle)
1083{
1084	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1085	drm_intel_bo_gem *bo_gem;
1086	int ret;
1087	struct drm_gem_open open_arg;
1088	struct drm_i915_gem_get_tiling get_tiling;
1089
1090	/* At the moment most applications only have a few named bo.
1091	 * For instance, in a DRI client only the render buffers passed
1092	 * between X and the client are named. And since X returns the
1093	 * alternating names for the front/back buffer a linear search
1094	 * provides a sufficiently fast match.
1095	 */
1096	pthread_mutex_lock(&bufmgr_gem->lock);
1097	HASH_FIND(name_hh, bufmgr_gem->name_table,
1098		  &handle, sizeof(handle), bo_gem);
1099	if (bo_gem) {
1100		drm_intel_gem_bo_reference(&bo_gem->bo);
1101		goto out;
1102	}
1103
1104	memclear(open_arg);
1105	open_arg.name = handle;
1106	ret = drmIoctl(bufmgr_gem->fd,
1107		       DRM_IOCTL_GEM_OPEN,
1108		       &open_arg);
1109	if (ret != 0) {
1110		DBG("Couldn't reference %s handle 0x%08x: %s\n",
1111		    name, handle, strerror(errno));
1112		bo_gem = NULL;
1113		goto out;
1114	}
1115        /* Now see if someone has used a prime handle to get this
1116         * object from the kernel before by looking through the list
1117         * again for a matching gem_handle
1118         */
1119	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
1120		  &open_arg.handle, sizeof(open_arg.handle), bo_gem);
1121	if (bo_gem) {
1122		drm_intel_gem_bo_reference(&bo_gem->bo);
1123		goto out;
1124	}
1125
1126	bo_gem = calloc(1, sizeof(*bo_gem));
1127	if (!bo_gem)
1128		goto out;
1129
1130	atomic_set(&bo_gem->refcount, 1);
1131	DRMINITLISTHEAD(&bo_gem->vma_list);
1132
1133	bo_gem->bo.size = open_arg.size;
1134	bo_gem->bo.offset = 0;
1135	bo_gem->bo.offset64 = 0;
1136	bo_gem->bo.virtual = NULL;
1137	bo_gem->bo.bufmgr = bufmgr;
1138	bo_gem->name = name;
1139	bo_gem->validate_index = -1;
1140	bo_gem->gem_handle = open_arg.handle;
1141	bo_gem->bo.handle = open_arg.handle;
1142	bo_gem->global_name = handle;
1143	bo_gem->reusable = false;
1144
1145	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
1146		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
1147	HASH_ADD(name_hh, bufmgr_gem->name_table,
1148		 global_name, sizeof(bo_gem->global_name), bo_gem);
1149
1150	memclear(get_tiling);
1151	get_tiling.handle = bo_gem->gem_handle;
1152	ret = drmIoctl(bufmgr_gem->fd,
1153		       DRM_IOCTL_I915_GEM_GET_TILING,
1154		       &get_tiling);
1155	if (ret != 0)
1156		goto err_unref;
1157
1158	bo_gem->tiling_mode = get_tiling.tiling_mode;
1159	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1160	/* XXX stride is unknown */
1161	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1162	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1163
1164out:
1165	pthread_mutex_unlock(&bufmgr_gem->lock);
1166	return &bo_gem->bo;
1167
1168err_unref:
1169	drm_intel_gem_bo_free(&bo_gem->bo);
1170	pthread_mutex_unlock(&bufmgr_gem->lock);
1171	return NULL;
1172}
1173
1174static void
1175drm_intel_gem_bo_free(drm_intel_bo *bo)
1176{
1177	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1178	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1179	struct drm_gem_close close;
1180	int ret;
1181
1182	DRMLISTDEL(&bo_gem->vma_list);
1183	if (bo_gem->mem_virtual) {
1184		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1185		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1186		bufmgr_gem->vma_count--;
1187	}
1188	if (bo_gem->wc_virtual) {
1189		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1190		drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1191		bufmgr_gem->vma_count--;
1192	}
1193	if (bo_gem->gtt_virtual) {
1194		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1195		bufmgr_gem->vma_count--;
1196	}
1197
1198	if (bo_gem->global_name)
1199		HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem);
1200	HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem);
1201
1202	/* Close this object */
1203	memclear(close);
1204	close.handle = bo_gem->gem_handle;
1205	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
1206	if (ret != 0) {
1207		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1208		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
1209	}
1210	free(bo);
1211}
1212
1213static void
1214drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
1215{
1216#if HAVE_VALGRIND
1217	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1218
1219	if (bo_gem->mem_virtual)
1220		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1221
1222	if (bo_gem->wc_virtual)
1223		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1224
1225	if (bo_gem->gtt_virtual)
1226		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1227#endif
1228}
1229
1230/** Frees all cached buffers significantly older than @time. */
1231static void
1232drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
1233{
1234	int i;
1235
1236	if (bufmgr_gem->time == time)
1237		return;
1238
1239	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1240		struct drm_intel_gem_bo_bucket *bucket =
1241		    &bufmgr_gem->cache_bucket[i];
1242
1243		while (!DRMLISTEMPTY(&bucket->head)) {
1244			drm_intel_bo_gem *bo_gem;
1245
1246			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1247					      bucket->head.next, head);
1248			if (time - bo_gem->free_time <= 1)
1249				break;
1250
1251			DRMLISTDEL(&bo_gem->head);
1252
1253			drm_intel_gem_bo_free(&bo_gem->bo);
1254		}
1255	}
1256
1257	bufmgr_gem->time = time;
1258}
1259
1260static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
1261{
1262	int limit;
1263
1264	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1265	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1266
1267	if (bufmgr_gem->vma_max < 0)
1268		return;
1269
1270	/* We may need to evict a few entries in order to create new mmaps */
1271	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1272	if (limit < 0)
1273		limit = 0;
1274
1275	while (bufmgr_gem->vma_count > limit) {
1276		drm_intel_bo_gem *bo_gem;
1277
1278		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1279				      bufmgr_gem->vma_cache.next,
1280				      vma_list);
1281		assert(bo_gem->map_count == 0);
1282		DRMLISTDELINIT(&bo_gem->vma_list);
1283
1284		if (bo_gem->mem_virtual) {
1285			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1286			bo_gem->mem_virtual = NULL;
1287			bufmgr_gem->vma_count--;
1288		}
1289		if (bo_gem->wc_virtual) {
1290			drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1291			bo_gem->wc_virtual = NULL;
1292			bufmgr_gem->vma_count--;
1293		}
1294		if (bo_gem->gtt_virtual) {
1295			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1296			bo_gem->gtt_virtual = NULL;
1297			bufmgr_gem->vma_count--;
1298		}
1299	}
1300}
1301
1302static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1303				       drm_intel_bo_gem *bo_gem)
1304{
1305	bufmgr_gem->vma_open--;
1306	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1307	if (bo_gem->mem_virtual)
1308		bufmgr_gem->vma_count++;
1309	if (bo_gem->wc_virtual)
1310		bufmgr_gem->vma_count++;
1311	if (bo_gem->gtt_virtual)
1312		bufmgr_gem->vma_count++;
1313	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1314}
1315
1316static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1317				      drm_intel_bo_gem *bo_gem)
1318{
1319	bufmgr_gem->vma_open++;
1320	DRMLISTDEL(&bo_gem->vma_list);
1321	if (bo_gem->mem_virtual)
1322		bufmgr_gem->vma_count--;
1323	if (bo_gem->wc_virtual)
1324		bufmgr_gem->vma_count--;
1325	if (bo_gem->gtt_virtual)
1326		bufmgr_gem->vma_count--;
1327	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1328}
1329
1330static void
1331drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
1332{
1333	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1334	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1335	struct drm_intel_gem_bo_bucket *bucket;
1336	int i;
1337
1338	/* Unreference all the target buffers */
1339	for (i = 0; i < bo_gem->reloc_count; i++) {
1340		if (bo_gem->reloc_target_info[i].bo != bo) {
1341			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1342								  reloc_target_info[i].bo,
1343								  time);
1344		}
1345	}
1346	for (i = 0; i < bo_gem->softpin_target_count; i++)
1347		drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1348								  time);
1349	bo_gem->kflags = 0;
1350	bo_gem->reloc_count = 0;
1351	bo_gem->used_as_reloc_target = false;
1352	bo_gem->softpin_target_count = 0;
1353
1354	DBG("bo_unreference final: %d (%s)\n",
1355	    bo_gem->gem_handle, bo_gem->name);
1356
1357	/* release memory associated with this object */
1358	if (bo_gem->reloc_target_info) {
1359		free(bo_gem->reloc_target_info);
1360		bo_gem->reloc_target_info = NULL;
1361	}
1362	if (bo_gem->relocs) {
1363		free(bo_gem->relocs);
1364		bo_gem->relocs = NULL;
1365	}
1366	if (bo_gem->softpin_target) {
1367		free(bo_gem->softpin_target);
1368		bo_gem->softpin_target = NULL;
1369		bo_gem->softpin_target_size = 0;
1370	}
1371
1372	/* Clear any left-over mappings */
1373	if (bo_gem->map_count) {
1374		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1375		bo_gem->map_count = 0;
1376		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1377		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1378	}
1379
1380	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1381	/* Put the buffer into our internal cache for reuse if we can. */
1382	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1383	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1384					      I915_MADV_DONTNEED)) {
1385		bo_gem->free_time = time;
1386
1387		bo_gem->name = NULL;
1388		bo_gem->validate_index = -1;
1389
1390		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
1391	} else {
1392		drm_intel_gem_bo_free(bo);
1393	}
1394}
1395
1396static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
1397						      time_t time)
1398{
1399	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1400
1401	assert(atomic_read(&bo_gem->refcount) > 0);
1402	if (atomic_dec_and_test(&bo_gem->refcount))
1403		drm_intel_gem_bo_unreference_final(bo, time);
1404}
1405
1406static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
1407{
1408	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1409
1410	assert(atomic_read(&bo_gem->refcount) > 0);
1411
1412	if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1413		drm_intel_bufmgr_gem *bufmgr_gem =
1414		    (drm_intel_bufmgr_gem *) bo->bufmgr;
1415		struct timespec time;
1416
1417		clock_gettime(CLOCK_MONOTONIC, &time);
1418
1419		pthread_mutex_lock(&bufmgr_gem->lock);
1420
1421		if (atomic_dec_and_test(&bo_gem->refcount)) {
1422			drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
1423			drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1424		}
1425
1426		pthread_mutex_unlock(&bufmgr_gem->lock);
1427	}
1428}
1429
1430static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
1431{
1432	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1433	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1434	struct drm_i915_gem_set_domain set_domain;
1435	int ret;
1436
1437	if (bo_gem->is_userptr) {
1438		/* Return the same user ptr */
1439		bo->virtual = bo_gem->user_virtual;
1440		return 0;
1441	}
1442
1443	pthread_mutex_lock(&bufmgr_gem->lock);
1444
1445	if (bo_gem->map_count++ == 0)
1446		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1447
1448	if (!bo_gem->mem_virtual) {
1449		struct drm_i915_gem_mmap mmap_arg;
1450
1451		DBG("bo_map: %d (%s), map_count=%d\n",
1452		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1453
1454		memclear(mmap_arg);
1455		mmap_arg.handle = bo_gem->gem_handle;
1456		mmap_arg.size = bo->size;
1457		ret = drmIoctl(bufmgr_gem->fd,
1458			       DRM_IOCTL_I915_GEM_MMAP,
1459			       &mmap_arg);
1460		if (ret != 0) {
1461			ret = -errno;
1462			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1463			    __FILE__, __LINE__, bo_gem->gem_handle,
1464			    bo_gem->name, strerror(errno));
1465			if (--bo_gem->map_count == 0)
1466				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1467			pthread_mutex_unlock(&bufmgr_gem->lock);
1468			return ret;
1469		}
1470		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1471		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1472	}
1473	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1474	    bo_gem->mem_virtual);
1475	bo->virtual = bo_gem->mem_virtual;
1476
1477	memclear(set_domain);
1478	set_domain.handle = bo_gem->gem_handle;
1479	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1480	if (write_enable)
1481		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1482	else
1483		set_domain.write_domain = 0;
1484	ret = drmIoctl(bufmgr_gem->fd,
1485		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1486		       &set_domain);
1487	if (ret != 0) {
1488		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1489		    __FILE__, __LINE__, bo_gem->gem_handle,
1490		    strerror(errno));
1491	}
1492
1493	if (write_enable)
1494		bo_gem->mapped_cpu_write = true;
1495
1496	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1497	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1498	pthread_mutex_unlock(&bufmgr_gem->lock);
1499
1500	return 0;
1501}
1502
1503static int
1504map_gtt(drm_intel_bo *bo)
1505{
1506	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1507	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1508	int ret;
1509
1510	if (bo_gem->is_userptr)
1511		return -EINVAL;
1512
1513	if (bo_gem->map_count++ == 0)
1514		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1515
1516	/* Get a mapping of the buffer if we haven't before. */
1517	if (bo_gem->gtt_virtual == NULL) {
1518		struct drm_i915_gem_mmap_gtt mmap_arg;
1519
1520		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1521		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1522
1523		memclear(mmap_arg);
1524		mmap_arg.handle = bo_gem->gem_handle;
1525
1526		/* Get the fake offset back... */
1527		ret = drmIoctl(bufmgr_gem->fd,
1528			       DRM_IOCTL_I915_GEM_MMAP_GTT,
1529			       &mmap_arg);
1530		if (ret != 0) {
1531			ret = -errno;
1532			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1533			    __FILE__, __LINE__,
1534			    bo_gem->gem_handle, bo_gem->name,
1535			    strerror(errno));
1536			if (--bo_gem->map_count == 0)
1537				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1538			return ret;
1539		}
1540
1541		/* and mmap it */
1542		 bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1543						MAP_SHARED, bufmgr_gem->fd,
1544						mmap_arg.offset);
1545		if (bo_gem->gtt_virtual == MAP_FAILED) {
1546			bo_gem->gtt_virtual = NULL;
1547			ret = -errno;
1548			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1549			    __FILE__, __LINE__,
1550			    bo_gem->gem_handle, bo_gem->name,
1551			    strerror(errno));
1552			if (--bo_gem->map_count == 0)
1553				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1554			return ret;
1555		}
1556	}
1557
1558	bo->virtual = bo_gem->gtt_virtual;
1559
1560	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1561	    bo_gem->gtt_virtual);
1562
1563	return 0;
1564}
1565
1566drm_public int
1567drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1568{
1569	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1570	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1571	struct drm_i915_gem_set_domain set_domain;
1572	int ret;
1573
1574	pthread_mutex_lock(&bufmgr_gem->lock);
1575
1576	ret = map_gtt(bo);
1577	if (ret) {
1578		pthread_mutex_unlock(&bufmgr_gem->lock);
1579		return ret;
1580	}
1581
1582	/* Now move it to the GTT domain so that the GPU and CPU
1583	 * caches are flushed and the GPU isn't actively using the
1584	 * buffer.
1585	 *
1586	 * The pagefault handler does this domain change for us when
1587	 * it has unbound the BO from the GTT, but it's up to us to
1588	 * tell it when we're about to use things if we had done
1589	 * rendering and it still happens to be bound to the GTT.
1590	 */
1591	memclear(set_domain);
1592	set_domain.handle = bo_gem->gem_handle;
1593	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1594	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1595	ret = drmIoctl(bufmgr_gem->fd,
1596		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1597		       &set_domain);
1598	if (ret != 0) {
1599		DBG("%s:%d: Error setting domain %d: %s\n",
1600		    __FILE__, __LINE__, bo_gem->gem_handle,
1601		    strerror(errno));
1602	}
1603
1604	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1605	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1606	pthread_mutex_unlock(&bufmgr_gem->lock);
1607
1608	return 0;
1609}
1610
1611/**
1612 * Performs a mapping of the buffer object like the normal GTT
1613 * mapping, but avoids waiting for the GPU to be done reading from or
1614 * rendering to the buffer.
1615 *
1616 * This is used in the implementation of GL_ARB_map_buffer_range: The
1617 * user asks to create a buffer, then does a mapping, fills some
1618 * space, runs a drawing command, then asks to map it again without
1619 * synchronizing because it guarantees that it won't write over the
1620 * data that the GPU is busy using (or, more specifically, that if it
1621 * does write over the data, it acknowledges that rendering is
1622 * undefined).
1623 */
1624
1625drm_public int
1626drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
1627{
1628	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1629#if HAVE_VALGRIND
1630	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1631#endif
1632	int ret;
1633
1634	/* If the CPU cache isn't coherent with the GTT, then use a
1635	 * regular synchronized mapping.  The problem is that we don't
1636	 * track where the buffer was last used on the CPU side in
1637	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
1638	 * we would potentially corrupt the buffer even when the user
1639	 * does reasonable things.
1640	 */
1641	if (!bufmgr_gem->has_llc)
1642		return drm_intel_gem_bo_map_gtt(bo);
1643
1644	pthread_mutex_lock(&bufmgr_gem->lock);
1645
1646	ret = map_gtt(bo);
1647	if (ret == 0) {
1648		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1649		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1650	}
1651
1652	pthread_mutex_unlock(&bufmgr_gem->lock);
1653
1654	return ret;
1655}
1656
1657static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1658{
1659	drm_intel_bufmgr_gem *bufmgr_gem;
1660	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1661	int ret = 0;
1662
1663	if (bo == NULL)
1664		return 0;
1665
1666	if (bo_gem->is_userptr)
1667		return 0;
1668
1669	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1670
1671	pthread_mutex_lock(&bufmgr_gem->lock);
1672
1673	if (bo_gem->map_count <= 0) {
1674		DBG("attempted to unmap an unmapped bo\n");
1675		pthread_mutex_unlock(&bufmgr_gem->lock);
1676		/* Preserve the old behaviour of just treating this as a
1677		 * no-op rather than reporting the error.
1678		 */
1679		return 0;
1680	}
1681
1682	if (bo_gem->mapped_cpu_write) {
1683		struct drm_i915_gem_sw_finish sw_finish;
1684
1685		/* Cause a flush to happen if the buffer's pinned for
1686		 * scanout, so the results show up in a timely manner.
1687		 * Unlike GTT set domains, this only does work if the
1688		 * buffer should be scanout-related.
1689		 */
1690		memclear(sw_finish);
1691		sw_finish.handle = bo_gem->gem_handle;
1692		ret = drmIoctl(bufmgr_gem->fd,
1693			       DRM_IOCTL_I915_GEM_SW_FINISH,
1694			       &sw_finish);
1695		ret = ret == -1 ? -errno : 0;
1696
1697		bo_gem->mapped_cpu_write = false;
1698	}
1699
1700	/* We need to unmap after every innovation as we cannot track
1701	 * an open vma for every bo as that will exhaust the system
1702	 * limits and cause later failures.
1703	 */
1704	if (--bo_gem->map_count == 0) {
1705		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1706		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1707		bo->virtual = NULL;
1708	}
1709	pthread_mutex_unlock(&bufmgr_gem->lock);
1710
1711	return ret;
1712}
1713
1714drm_public int
1715drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1716{
1717	return drm_intel_gem_bo_unmap(bo);
1718}
1719
1720static int
1721drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1722			 unsigned long size, const void *data)
1723{
1724	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1725	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1726	struct drm_i915_gem_pwrite pwrite;
1727	int ret;
1728
1729	if (bo_gem->is_userptr)
1730		return -EINVAL;
1731
1732	memclear(pwrite);
1733	pwrite.handle = bo_gem->gem_handle;
1734	pwrite.offset = offset;
1735	pwrite.size = size;
1736	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1737	ret = drmIoctl(bufmgr_gem->fd,
1738		       DRM_IOCTL_I915_GEM_PWRITE,
1739		       &pwrite);
1740	if (ret != 0) {
1741		ret = -errno;
1742		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1743		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1744		    (int)size, strerror(errno));
1745	}
1746
1747	return ret;
1748}
1749
1750static int
1751drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1752{
1753	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1754	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1755	int ret;
1756
1757	memclear(get_pipe_from_crtc_id);
1758	get_pipe_from_crtc_id.crtc_id = crtc_id;
1759	ret = drmIoctl(bufmgr_gem->fd,
1760		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1761		       &get_pipe_from_crtc_id);
1762	if (ret != 0) {
1763		/* We return -1 here to signal that we don't
1764		 * know which pipe is associated with this crtc.
1765		 * This lets the caller know that this information
1766		 * isn't available; using the wrong pipe for
1767		 * vblank waiting can cause the chipset to lock up
1768		 */
1769		return -1;
1770	}
1771
1772	return get_pipe_from_crtc_id.pipe;
1773}
1774
1775static int
1776drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1777			     unsigned long size, void *data)
1778{
1779	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1780	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1781	struct drm_i915_gem_pread pread;
1782	int ret;
1783
1784	if (bo_gem->is_userptr)
1785		return -EINVAL;
1786
1787	memclear(pread);
1788	pread.handle = bo_gem->gem_handle;
1789	pread.offset = offset;
1790	pread.size = size;
1791	pread.data_ptr = (uint64_t) (uintptr_t) data;
1792	ret = drmIoctl(bufmgr_gem->fd,
1793		       DRM_IOCTL_I915_GEM_PREAD,
1794		       &pread);
1795	if (ret != 0) {
1796		ret = -errno;
1797		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1798		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1799		    (int)size, strerror(errno));
1800	}
1801
1802	return ret;
1803}
1804
1805/** Waits for all GPU rendering with the object to have completed. */
1806static void
1807drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1808{
1809	drm_intel_gem_bo_start_gtt_access(bo, 1);
1810}
1811
1812/**
1813 * Waits on a BO for the given amount of time.
1814 *
1815 * @bo: buffer object to wait for
1816 * @timeout_ns: amount of time to wait in nanoseconds.
1817 *   If value is less than 0, an infinite wait will occur.
1818 *
1819 * Returns 0 if the wait was successful ie. the last batch referencing the
1820 * object has completed within the allotted time. Otherwise some negative return
1821 * value describes the error. Of particular interest is -ETIME when the wait has
1822 * failed to yield the desired result.
1823 *
1824 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
1825 * the operation to give up after a certain amount of time. Another subtle
1826 * difference is the internal locking semantics are different (this variant does
1827 * not hold the lock for the duration of the wait). This makes the wait subject
1828 * to a larger userspace race window.
1829 *
1830 * The implementation shall wait until the object is no longer actively
1831 * referenced within a batch buffer at the time of the call. The wait will
1832 * not guarantee that the buffer is re-issued via another thread, or an flinked
1833 * handle. Userspace must make sure this race does not occur if such precision
1834 * is important.
1835 *
1836 * Note that some kernels have broken the inifite wait for negative values
1837 * promise, upgrade to latest stable kernels if this is the case.
1838 */
1839drm_public int
1840drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
1841{
1842	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1843	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1844	struct drm_i915_gem_wait wait;
1845	int ret;
1846
1847	if (!bufmgr_gem->has_wait_timeout) {
1848		DBG("%s:%d: Timed wait is not supported. Falling back to "
1849		    "infinite wait\n", __FILE__, __LINE__);
1850		if (timeout_ns) {
1851			drm_intel_gem_bo_wait_rendering(bo);
1852			return 0;
1853		} else {
1854			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
1855		}
1856	}
1857
1858	memclear(wait);
1859	wait.bo_handle = bo_gem->gem_handle;
1860	wait.timeout_ns = timeout_ns;
1861	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1862	if (ret == -1)
1863		return -errno;
1864
1865	return ret;
1866}
1867
1868/**
1869 * Sets the object to the GTT read and possibly write domain, used by the X
1870 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1871 *
1872 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1873 * can do tiled pixmaps this way.
1874 */
1875drm_public void
1876drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1877{
1878	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1879	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1880	struct drm_i915_gem_set_domain set_domain;
1881	int ret;
1882
1883	memclear(set_domain);
1884	set_domain.handle = bo_gem->gem_handle;
1885	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1886	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1887	ret = drmIoctl(bufmgr_gem->fd,
1888		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1889		       &set_domain);
1890	if (ret != 0) {
1891		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1892		    __FILE__, __LINE__, bo_gem->gem_handle,
1893		    set_domain.read_domains, set_domain.write_domain,
1894		    strerror(errno));
1895	}
1896}
1897
1898static void
1899drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1900{
1901	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1902	struct drm_gem_close close_bo;
1903	int i, ret;
1904
1905	free(bufmgr_gem->exec2_objects);
1906	free(bufmgr_gem->exec_objects);
1907	free(bufmgr_gem->exec_bos);
1908
1909	pthread_mutex_destroy(&bufmgr_gem->lock);
1910
1911	/* Free any cached buffer objects we were going to reuse */
1912	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1913		struct drm_intel_gem_bo_bucket *bucket =
1914		    &bufmgr_gem->cache_bucket[i];
1915		drm_intel_bo_gem *bo_gem;
1916
1917		while (!DRMLISTEMPTY(&bucket->head)) {
1918			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1919					      bucket->head.next, head);
1920			DRMLISTDEL(&bo_gem->head);
1921
1922			drm_intel_gem_bo_free(&bo_gem->bo);
1923		}
1924	}
1925
1926	/* Release userptr bo kept hanging around for optimisation. */
1927	if (bufmgr_gem->userptr_active.ptr) {
1928		memclear(close_bo);
1929		close_bo.handle = bufmgr_gem->userptr_active.handle;
1930		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
1931		free(bufmgr_gem->userptr_active.ptr);
1932		if (ret)
1933			fprintf(stderr,
1934				"Failed to release test userptr object! (%d) "
1935				"i915 kernel driver may not be sane!\n", errno);
1936	}
1937
1938	free(bufmgr);
1939}
1940
1941/**
1942 * Adds the target buffer to the validation list and adds the relocation
1943 * to the reloc_buffer's relocation list.
1944 *
1945 * The relocation entry at the given offset must already contain the
1946 * precomputed relocation value, because the kernel will optimize out
1947 * the relocation entry write when the buffer hasn't moved from the
1948 * last known offset in target_bo.
1949 */
1950static int
1951do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1952		 drm_intel_bo *target_bo, uint32_t target_offset,
1953		 uint32_t read_domains, uint32_t write_domain,
1954		 bool need_fence)
1955{
1956	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1957	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1958	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
1959	bool fenced_command;
1960
1961	if (bo_gem->has_error)
1962		return -ENOMEM;
1963
1964	if (target_bo_gem->has_error) {
1965		bo_gem->has_error = true;
1966		return -ENOMEM;
1967	}
1968
1969	/* We never use HW fences for rendering on 965+ */
1970	if (bufmgr_gem->gen >= 4)
1971		need_fence = false;
1972
1973	fenced_command = need_fence;
1974	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
1975		need_fence = false;
1976
1977	/* Create a new relocation list if needed */
1978	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
1979		return -ENOMEM;
1980
1981	/* Check overflow */
1982	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1983
1984	/* Check args */
1985	assert(offset <= bo->size - 4);
1986	assert((write_domain & (write_domain - 1)) == 0);
1987
1988	/* An object needing a fence is a tiled buffer, so it won't have
1989	 * relocs to other buffers.
1990	 */
1991	if (need_fence) {
1992		assert(target_bo_gem->reloc_count == 0);
1993		target_bo_gem->reloc_tree_fences = 1;
1994	}
1995
1996	/* Make sure that we're not adding a reloc to something whose size has
1997	 * already been accounted for.
1998	 */
1999	assert(!bo_gem->used_as_reloc_target);
2000	if (target_bo_gem != bo_gem) {
2001		target_bo_gem->used_as_reloc_target = true;
2002		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
2003		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
2004	}
2005
2006	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
2007	if (target_bo != bo)
2008		drm_intel_gem_bo_reference(target_bo);
2009	if (fenced_command)
2010		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
2011			DRM_INTEL_RELOC_FENCE;
2012	else
2013		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
2014
2015	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
2016	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
2017	bo_gem->relocs[bo_gem->reloc_count].target_handle =
2018	    target_bo_gem->gem_handle;
2019	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
2020	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
2021	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
2022	bo_gem->reloc_count++;
2023
2024	return 0;
2025}
2026
2027static void
2028drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
2029{
2030	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2031
2032	if (enable)
2033		bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2034	else
2035		bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2036}
2037
2038static int
2039drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo)
2040{
2041	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2042	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2043	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2044	if (bo_gem->has_error)
2045		return -ENOMEM;
2046
2047	if (target_bo_gem->has_error) {
2048		bo_gem->has_error = true;
2049		return -ENOMEM;
2050	}
2051
2052	if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
2053		return -EINVAL;
2054	if (target_bo_gem == bo_gem)
2055		return -EINVAL;
2056
2057	if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
2058		int new_size = bo_gem->softpin_target_size * 2;
2059		if (new_size == 0)
2060			new_size = bufmgr_gem->max_relocs;
2061
2062		bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
2063				sizeof(drm_intel_bo *));
2064		if (!bo_gem->softpin_target)
2065			return -ENOMEM;
2066
2067		bo_gem->softpin_target_size = new_size;
2068	}
2069	bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
2070	drm_intel_gem_bo_reference(target_bo);
2071	bo_gem->softpin_target_count++;
2072
2073	return 0;
2074}
2075
2076static int
2077drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
2078			    drm_intel_bo *target_bo, uint32_t target_offset,
2079			    uint32_t read_domains, uint32_t write_domain)
2080{
2081	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2082	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
2083
2084	if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
2085		return drm_intel_gem_bo_add_softpin_target(bo, target_bo);
2086	else
2087		return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2088					read_domains, write_domain,
2089					!bufmgr_gem->fenced_relocs);
2090}
2091
2092static int
2093drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
2094				  drm_intel_bo *target_bo,
2095				  uint32_t target_offset,
2096				  uint32_t read_domains, uint32_t write_domain)
2097{
2098	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2099				read_domains, write_domain, true);
2100}
2101
2102drm_public int
2103drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
2104{
2105	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2106
2107	return bo_gem->reloc_count;
2108}
2109
2110/**
2111 * Removes existing relocation entries in the BO after "start".
2112 *
2113 * This allows a user to avoid a two-step process for state setup with
2114 * counting up all the buffer objects and doing a
2115 * drm_intel_bufmgr_check_aperture_space() before emitting any of the
2116 * relocations for the state setup.  Instead, save the state of the
2117 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
2118 * state, and then check if it still fits in the aperture.
2119 *
2120 * Any further drm_intel_bufmgr_check_aperture_space() queries
2121 * involving this buffer in the tree are undefined after this call.
2122 *
2123 * This also removes all softpinned targets being referenced by the BO.
2124 */
2125drm_public void
2126drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
2127{
2128	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2129	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2130	int i;
2131	struct timespec time;
2132
2133	clock_gettime(CLOCK_MONOTONIC, &time);
2134
2135	assert(bo_gem->reloc_count >= start);
2136
2137	/* Unreference the cleared target buffers */
2138	pthread_mutex_lock(&bufmgr_gem->lock);
2139
2140	for (i = start; i < bo_gem->reloc_count; i++) {
2141		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
2142		if (&target_bo_gem->bo != bo) {
2143			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
2144			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
2145								  time.tv_sec);
2146		}
2147	}
2148	bo_gem->reloc_count = start;
2149
2150	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2151		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i];
2152		drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
2153	}
2154	bo_gem->softpin_target_count = 0;
2155
2156	pthread_mutex_unlock(&bufmgr_gem->lock);
2157
2158}
2159
2160/**
2161 * Walk the tree of relocations rooted at BO and accumulate the list of
2162 * validations to be performed and update the relocation buffers with
2163 * index values into the validation list.
2164 */
2165static void
2166drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
2167{
2168	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2169	int i;
2170
2171	if (bo_gem->relocs == NULL)
2172		return;
2173
2174	for (i = 0; i < bo_gem->reloc_count; i++) {
2175		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2176
2177		if (target_bo == bo)
2178			continue;
2179
2180		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2181
2182		/* Continue walking the tree depth-first. */
2183		drm_intel_gem_bo_process_reloc(target_bo);
2184
2185		/* Add the target to the validate list */
2186		drm_intel_add_validate_buffer(target_bo);
2187	}
2188}
2189
2190static void
2191drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
2192{
2193	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2194	int i;
2195
2196	if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
2197		return;
2198
2199	for (i = 0; i < bo_gem->reloc_count; i++) {
2200		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2201		int need_fence;
2202
2203		if (target_bo == bo)
2204			continue;
2205
2206		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2207
2208		/* Continue walking the tree depth-first. */
2209		drm_intel_gem_bo_process_reloc2(target_bo);
2210
2211		need_fence = (bo_gem->reloc_target_info[i].flags &
2212			      DRM_INTEL_RELOC_FENCE);
2213
2214		/* Add the target to the validate list */
2215		drm_intel_add_validate_buffer2(target_bo, need_fence);
2216	}
2217
2218	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2219		drm_intel_bo *target_bo = bo_gem->softpin_target[i];
2220
2221		if (target_bo == bo)
2222			continue;
2223
2224		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2225		drm_intel_gem_bo_process_reloc2(target_bo);
2226		drm_intel_add_validate_buffer2(target_bo, false);
2227	}
2228}
2229
2230
2231static void
2232drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
2233{
2234	int i;
2235
2236	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2237		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2238		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2239
2240		/* Update the buffer offset */
2241		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
2242			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2243			    bo_gem->gem_handle, bo_gem->name,
2244			    upper_32_bits(bo->offset64),
2245			    lower_32_bits(bo->offset64),
2246			    upper_32_bits(bufmgr_gem->exec_objects[i].offset),
2247			    lower_32_bits(bufmgr_gem->exec_objects[i].offset));
2248			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
2249			bo->offset = bufmgr_gem->exec_objects[i].offset;
2250		}
2251	}
2252}
2253
2254static void
2255drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
2256{
2257	int i;
2258
2259	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2260		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2261		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2262
2263		/* Update the buffer offset */
2264		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2265			/* If we're seeing softpinned object here it means that the kernel
2266			 * has relocated our object... Indicating a programming error
2267			 */
2268			assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
2269			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2270			    bo_gem->gem_handle, bo_gem->name,
2271			    upper_32_bits(bo->offset64),
2272			    lower_32_bits(bo->offset64),
2273			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
2274			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
2275			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
2276			bo->offset = bufmgr_gem->exec2_objects[i].offset;
2277		}
2278	}
2279}
2280
2281drm_public void
2282drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
2283			      int x1, int y1, int width, int height,
2284			      enum aub_dump_bmp_format format,
2285			      int pitch, int offset)
2286{
2287}
2288
2289static int
2290drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
2291		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
2292{
2293	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2294	struct drm_i915_gem_execbuffer execbuf;
2295	int ret, i;
2296
2297	if (to_bo_gem(bo)->has_error)
2298		return -ENOMEM;
2299
2300	pthread_mutex_lock(&bufmgr_gem->lock);
2301	/* Update indices and set up the validate list. */
2302	drm_intel_gem_bo_process_reloc(bo);
2303
2304	/* Add the batch buffer to the validation list.  There are no
2305	 * relocations pointing to it.
2306	 */
2307	drm_intel_add_validate_buffer(bo);
2308
2309	memclear(execbuf);
2310	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
2311	execbuf.buffer_count = bufmgr_gem->exec_count;
2312	execbuf.batch_start_offset = 0;
2313	execbuf.batch_len = used;
2314	execbuf.cliprects_ptr = (uintptr_t) cliprects;
2315	execbuf.num_cliprects = num_cliprects;
2316	execbuf.DR1 = 0;
2317	execbuf.DR4 = DR4;
2318
2319	ret = drmIoctl(bufmgr_gem->fd,
2320		       DRM_IOCTL_I915_GEM_EXECBUFFER,
2321		       &execbuf);
2322	if (ret != 0) {
2323		ret = -errno;
2324		if (errno == ENOSPC) {
2325			DBG("Execbuffer fails to pin. "
2326			    "Estimate: %u. Actual: %u. Available: %u\n",
2327			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2328							       bufmgr_gem->
2329							       exec_count),
2330			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2331							      bufmgr_gem->
2332							      exec_count),
2333			    (unsigned int)bufmgr_gem->gtt_size);
2334		}
2335	}
2336	drm_intel_update_buffer_offsets(bufmgr_gem);
2337
2338	if (bufmgr_gem->bufmgr.debug)
2339		drm_intel_gem_dump_validation_list(bufmgr_gem);
2340
2341	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2342		drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2343
2344		bo_gem->idle = false;
2345
2346		/* Disconnect the buffer from the validate list */
2347		bo_gem->validate_index = -1;
2348		bufmgr_gem->exec_bos[i] = NULL;
2349	}
2350	bufmgr_gem->exec_count = 0;
2351	pthread_mutex_unlock(&bufmgr_gem->lock);
2352
2353	return ret;
2354}
2355
2356static int
2357do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
2358	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2359	 int in_fence, int *out_fence,
2360	 unsigned int flags)
2361{
2362	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2363	struct drm_i915_gem_execbuffer2 execbuf;
2364	int ret = 0;
2365	int i;
2366
2367	if (to_bo_gem(bo)->has_error)
2368		return -ENOMEM;
2369
2370	switch (flags & 0x7) {
2371	default:
2372		return -EINVAL;
2373	case I915_EXEC_BLT:
2374		if (!bufmgr_gem->has_blt)
2375			return -EINVAL;
2376		break;
2377	case I915_EXEC_BSD:
2378		if (!bufmgr_gem->has_bsd)
2379			return -EINVAL;
2380		break;
2381	case I915_EXEC_VEBOX:
2382		if (!bufmgr_gem->has_vebox)
2383			return -EINVAL;
2384		break;
2385	case I915_EXEC_RENDER:
2386	case I915_EXEC_DEFAULT:
2387		break;
2388	}
2389
2390	pthread_mutex_lock(&bufmgr_gem->lock);
2391	/* Update indices and set up the validate list. */
2392	drm_intel_gem_bo_process_reloc2(bo);
2393
2394	/* Add the batch buffer to the validation list.  There are no relocations
2395	 * pointing to it.
2396	 */
2397	drm_intel_add_validate_buffer2(bo, 0);
2398
2399	memclear(execbuf);
2400	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2401	execbuf.buffer_count = bufmgr_gem->exec_count;
2402	execbuf.batch_start_offset = 0;
2403	execbuf.batch_len = used;
2404	execbuf.cliprects_ptr = (uintptr_t)cliprects;
2405	execbuf.num_cliprects = num_cliprects;
2406	execbuf.DR1 = 0;
2407	execbuf.DR4 = DR4;
2408	execbuf.flags = flags;
2409	if (ctx == NULL)
2410		i915_execbuffer2_set_context_id(execbuf, 0);
2411	else
2412		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2413	execbuf.rsvd2 = 0;
2414	if (in_fence != -1) {
2415		execbuf.rsvd2 = in_fence;
2416		execbuf.flags |= I915_EXEC_FENCE_IN;
2417	}
2418	if (out_fence != NULL) {
2419		*out_fence = -1;
2420		execbuf.flags |= I915_EXEC_FENCE_OUT;
2421	}
2422
2423	if (bufmgr_gem->no_exec)
2424		goto skip_execution;
2425
2426	ret = drmIoctl(bufmgr_gem->fd,
2427		       DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2428		       &execbuf);
2429	if (ret != 0) {
2430		ret = -errno;
2431		if (ret == -ENOSPC) {
2432			DBG("Execbuffer fails to pin. "
2433			    "Estimate: %u. Actual: %u. Available: %u\n",
2434			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2435							       bufmgr_gem->exec_count),
2436			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2437							      bufmgr_gem->exec_count),
2438			    (unsigned int) bufmgr_gem->gtt_size);
2439		}
2440	}
2441	drm_intel_update_buffer_offsets2(bufmgr_gem);
2442
2443	if (ret == 0 && out_fence != NULL)
2444		*out_fence = execbuf.rsvd2 >> 32;
2445
2446skip_execution:
2447	if (bufmgr_gem->bufmgr.debug)
2448		drm_intel_gem_dump_validation_list(bufmgr_gem);
2449
2450	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2451		drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2452
2453		bo_gem->idle = false;
2454
2455		/* Disconnect the buffer from the validate list */
2456		bo_gem->validate_index = -1;
2457		bufmgr_gem->exec_bos[i] = NULL;
2458	}
2459	bufmgr_gem->exec_count = 0;
2460	pthread_mutex_unlock(&bufmgr_gem->lock);
2461
2462	return ret;
2463}
2464
2465static int
2466drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2467		       drm_clip_rect_t *cliprects, int num_cliprects,
2468		       int DR4)
2469{
2470	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2471			-1, NULL, I915_EXEC_RENDER);
2472}
2473
2474static int
2475drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
2476			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2477			unsigned int flags)
2478{
2479	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2480			-1, NULL, flags);
2481}
2482
2483drm_public int
2484drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
2485			      int used, unsigned int flags)
2486{
2487	return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags);
2488}
2489
2490drm_public int
2491drm_intel_gem_bo_fence_exec(drm_intel_bo *bo,
2492			    drm_intel_context *ctx,
2493			    int used,
2494			    int in_fence,
2495			    int *out_fence,
2496			    unsigned int flags)
2497{
2498	return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags);
2499}
2500
2501static int
2502drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
2503{
2504	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2505	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2506	struct drm_i915_gem_pin pin;
2507	int ret;
2508
2509	memclear(pin);
2510	pin.handle = bo_gem->gem_handle;
2511	pin.alignment = alignment;
2512
2513	ret = drmIoctl(bufmgr_gem->fd,
2514		       DRM_IOCTL_I915_GEM_PIN,
2515		       &pin);
2516	if (ret != 0)
2517		return -errno;
2518
2519	bo->offset64 = pin.offset;
2520	bo->offset = pin.offset;
2521	return 0;
2522}
2523
2524static int
2525drm_intel_gem_bo_unpin(drm_intel_bo *bo)
2526{
2527	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2528	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2529	struct drm_i915_gem_unpin unpin;
2530	int ret;
2531
2532	memclear(unpin);
2533	unpin.handle = bo_gem->gem_handle;
2534
2535	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
2536	if (ret != 0)
2537		return -errno;
2538
2539	return 0;
2540}
2541
2542static int
2543drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2544				     uint32_t tiling_mode,
2545				     uint32_t stride)
2546{
2547	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2548	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2549	struct drm_i915_gem_set_tiling set_tiling;
2550	int ret;
2551
2552	if (bo_gem->global_name == 0 &&
2553	    tiling_mode == bo_gem->tiling_mode &&
2554	    stride == bo_gem->stride)
2555		return 0;
2556
2557	memset(&set_tiling, 0, sizeof(set_tiling));
2558	do {
2559		/* set_tiling is slightly broken and overwrites the
2560		 * input on the error path, so we have to open code
2561		 * rmIoctl.
2562		 */
2563		set_tiling.handle = bo_gem->gem_handle;
2564		set_tiling.tiling_mode = tiling_mode;
2565		set_tiling.stride = stride;
2566
2567		ret = ioctl(bufmgr_gem->fd,
2568			    DRM_IOCTL_I915_GEM_SET_TILING,
2569			    &set_tiling);
2570	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2571	if (ret == -1)
2572		return -errno;
2573
2574	bo_gem->tiling_mode = set_tiling.tiling_mode;
2575	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2576	bo_gem->stride = set_tiling.stride;
2577	return 0;
2578}
2579
2580static int
2581drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2582			    uint32_t stride)
2583{
2584	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2585	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2586	int ret;
2587
2588	/* Tiling with userptr surfaces is not supported
2589	 * on all hardware so refuse it for time being.
2590	 */
2591	if (bo_gem->is_userptr)
2592		return -EINVAL;
2593
2594	/* Linear buffers have no stride. By ensuring that we only ever use
2595	 * stride 0 with linear buffers, we simplify our code.
2596	 */
2597	if (*tiling_mode == I915_TILING_NONE)
2598		stride = 0;
2599
2600	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2601	if (ret == 0)
2602		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2603
2604	*tiling_mode = bo_gem->tiling_mode;
2605	return ret;
2606}
2607
2608static int
2609drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2610			    uint32_t * swizzle_mode)
2611{
2612	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2613
2614	*tiling_mode = bo_gem->tiling_mode;
2615	*swizzle_mode = bo_gem->swizzle_mode;
2616	return 0;
2617}
2618
2619static int
2620drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
2621{
2622	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2623
2624	bo->offset64 = offset;
2625	bo->offset = offset;
2626	bo_gem->kflags |= EXEC_OBJECT_PINNED;
2627
2628	return 0;
2629}
2630
2631drm_public drm_intel_bo *
2632drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
2633{
2634	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2635	int ret;
2636	uint32_t handle;
2637	drm_intel_bo_gem *bo_gem;
2638	struct drm_i915_gem_get_tiling get_tiling;
2639
2640	pthread_mutex_lock(&bufmgr_gem->lock);
2641	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2642	if (ret) {
2643		DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2644		pthread_mutex_unlock(&bufmgr_gem->lock);
2645		return NULL;
2646	}
2647
2648	/*
2649	 * See if the kernel has already returned this buffer to us. Just as
2650	 * for named buffers, we must not create two bo's pointing at the same
2651	 * kernel object
2652	 */
2653	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
2654		  &handle, sizeof(handle), bo_gem);
2655	if (bo_gem) {
2656		drm_intel_gem_bo_reference(&bo_gem->bo);
2657		goto out;
2658	}
2659
2660	bo_gem = calloc(1, sizeof(*bo_gem));
2661	if (!bo_gem)
2662		goto out;
2663
2664	atomic_set(&bo_gem->refcount, 1);
2665	DRMINITLISTHEAD(&bo_gem->vma_list);
2666
2667	/* Determine size of bo.  The fd-to-handle ioctl really should
2668	 * return the size, but it doesn't.  If we have kernel 3.12 or
2669	 * later, we can lseek on the prime fd to get the size.  Older
2670	 * kernels will just fail, in which case we fall back to the
2671	 * provided (estimated or guess size). */
2672	ret = lseek(prime_fd, 0, SEEK_END);
2673	if (ret != -1)
2674		bo_gem->bo.size = ret;
2675	else
2676		bo_gem->bo.size = size;
2677
2678	bo_gem->bo.handle = handle;
2679	bo_gem->bo.bufmgr = bufmgr;
2680
2681	bo_gem->gem_handle = handle;
2682	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
2683		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2684
2685	bo_gem->name = "prime";
2686	bo_gem->validate_index = -1;
2687	bo_gem->reloc_tree_fences = 0;
2688	bo_gem->used_as_reloc_target = false;
2689	bo_gem->has_error = false;
2690	bo_gem->reusable = false;
2691
2692	memclear(get_tiling);
2693	get_tiling.handle = bo_gem->gem_handle;
2694	if (drmIoctl(bufmgr_gem->fd,
2695		     DRM_IOCTL_I915_GEM_GET_TILING,
2696		     &get_tiling))
2697		goto err;
2698
2699	bo_gem->tiling_mode = get_tiling.tiling_mode;
2700	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
2701	/* XXX stride is unknown */
2702	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2703
2704out:
2705	pthread_mutex_unlock(&bufmgr_gem->lock);
2706	return &bo_gem->bo;
2707
2708err:
2709	drm_intel_gem_bo_free(&bo_gem->bo);
2710	pthread_mutex_unlock(&bufmgr_gem->lock);
2711	return NULL;
2712}
2713
2714drm_public int
2715drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
2716{
2717	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2718	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2719
2720	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2721			       DRM_CLOEXEC, prime_fd) != 0)
2722		return -errno;
2723
2724	bo_gem->reusable = false;
2725
2726	return 0;
2727}
2728
2729static int
2730drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
2731{
2732	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2733	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2734
2735	if (!bo_gem->global_name) {
2736		struct drm_gem_flink flink;
2737
2738		memclear(flink);
2739		flink.handle = bo_gem->gem_handle;
2740		if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink))
2741			return -errno;
2742
2743		pthread_mutex_lock(&bufmgr_gem->lock);
2744		if (!bo_gem->global_name) {
2745			bo_gem->global_name = flink.name;
2746			bo_gem->reusable = false;
2747
2748			HASH_ADD(name_hh, bufmgr_gem->name_table,
2749				 global_name, sizeof(bo_gem->global_name),
2750				 bo_gem);
2751		}
2752		pthread_mutex_unlock(&bufmgr_gem->lock);
2753	}
2754
2755	*name = bo_gem->global_name;
2756	return 0;
2757}
2758
2759/**
2760 * Enables unlimited caching of buffer objects for reuse.
2761 *
2762 * This is potentially very memory expensive, as the cache at each bucket
2763 * size is only bounded by how many buffers of that size we've managed to have
2764 * in flight at once.
2765 */
2766drm_public void
2767drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
2768{
2769	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2770
2771	bufmgr_gem->bo_reuse = true;
2772}
2773
2774/**
2775 * Disables implicit synchronisation before executing the bo
2776 *
2777 * This will cause rendering corruption unless you correctly manage explicit
2778 * fences for all rendering involving this buffer - including use by others.
2779 * Disabling the implicit serialisation is only required if that serialisation
2780 * is too coarse (for example, you have split the buffer into many
2781 * non-overlapping regions and are sharing the whole buffer between concurrent
2782 * independent command streams).
2783 *
2784 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2785 * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync,
2786 * or subsequent execbufs involving the bo will generate EINVAL.
2787 */
2788drm_public void
2789drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo)
2790{
2791	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2792
2793	bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2794}
2795
2796/**
2797 * Enables implicit synchronisation before executing the bo
2798 *
2799 * This is the default behaviour of the kernel, to wait upon prior writes
2800 * completing on the object before rendering with it, or to wait for prior
2801 * reads to complete before writing into the object.
2802 * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2803 * the kernel never to insert a stall before using the object. Then this
2804 * function can be used to restore the implicit sync before subsequent
2805 * rendering.
2806 */
2807drm_public void
2808drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo)
2809{
2810	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2811
2812	bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2813}
2814
2815/**
2816 * Query whether the kernel supports disabling of its implicit synchronisation
2817 * before execbuf. See drm_intel_gem_bo_disable_implicit_sync()
2818 */
2819drm_public int
2820drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr)
2821{
2822	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2823
2824	return bufmgr_gem->has_exec_async;
2825}
2826
2827/**
2828 * Enable use of fenced reloc type.
2829 *
2830 * New code should enable this to avoid unnecessary fence register
2831 * allocation.  If this option is not enabled, all relocs will have fence
2832 * register allocated.
2833 */
2834drm_public void
2835drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
2836{
2837	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2838
2839	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
2840		bufmgr_gem->fenced_relocs = true;
2841}
2842
2843/**
2844 * Return the additional aperture space required by the tree of buffer objects
2845 * rooted at bo.
2846 */
2847static int
2848drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
2849{
2850	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2851	int i;
2852	int total = 0;
2853
2854	if (bo == NULL || bo_gem->included_in_check_aperture)
2855		return 0;
2856
2857	total += bo->size;
2858	bo_gem->included_in_check_aperture = true;
2859
2860	for (i = 0; i < bo_gem->reloc_count; i++)
2861		total +=
2862		    drm_intel_gem_bo_get_aperture_space(bo_gem->
2863							reloc_target_info[i].bo);
2864
2865	return total;
2866}
2867
2868/**
2869 * Count the number of buffers in this list that need a fence reg
2870 *
2871 * If the count is greater than the number of available regs, we'll have
2872 * to ask the caller to resubmit a batch with fewer tiled buffers.
2873 *
2874 * This function over-counts if the same buffer is used multiple times.
2875 */
2876static unsigned int
2877drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
2878{
2879	int i;
2880	unsigned int total = 0;
2881
2882	for (i = 0; i < count; i++) {
2883		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2884
2885		if (bo_gem == NULL)
2886			continue;
2887
2888		total += bo_gem->reloc_tree_fences;
2889	}
2890	return total;
2891}
2892
2893/**
2894 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
2895 * for the next drm_intel_bufmgr_check_aperture_space() call.
2896 */
2897static void
2898drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
2899{
2900	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2901	int i;
2902
2903	if (bo == NULL || !bo_gem->included_in_check_aperture)
2904		return;
2905
2906	bo_gem->included_in_check_aperture = false;
2907
2908	for (i = 0; i < bo_gem->reloc_count; i++)
2909		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
2910							   reloc_target_info[i].bo);
2911}
2912
2913/**
2914 * Return a conservative estimate for the amount of aperture required
2915 * for a collection of buffers. This may double-count some buffers.
2916 */
2917static unsigned int
2918drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
2919{
2920	int i;
2921	unsigned int total = 0;
2922
2923	for (i = 0; i < count; i++) {
2924		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2925		if (bo_gem != NULL)
2926			total += bo_gem->reloc_tree_size;
2927	}
2928	return total;
2929}
2930
2931/**
2932 * Return the amount of aperture needed for a collection of buffers.
2933 * This avoids double counting any buffers, at the cost of looking
2934 * at every buffer in the set.
2935 */
2936static unsigned int
2937drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
2938{
2939	int i;
2940	unsigned int total = 0;
2941
2942	for (i = 0; i < count; i++) {
2943		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
2944		/* For the first buffer object in the array, we get an
2945		 * accurate count back for its reloc_tree size (since nothing
2946		 * had been flagged as being counted yet).  We can save that
2947		 * value out as a more conservative reloc_tree_size that
2948		 * avoids double-counting target buffers.  Since the first
2949		 * buffer happens to usually be the batch buffer in our
2950		 * callers, this can pull us back from doing the tree
2951		 * walk on every new batch emit.
2952		 */
2953		if (i == 0) {
2954			drm_intel_bo_gem *bo_gem =
2955			    (drm_intel_bo_gem *) bo_array[i];
2956			bo_gem->reloc_tree_size = total;
2957		}
2958	}
2959
2960	for (i = 0; i < count; i++)
2961		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
2962	return total;
2963}
2964
2965/**
2966 * Return -1 if the batchbuffer should be flushed before attempting to
2967 * emit rendering referencing the buffers pointed to by bo_array.
2968 *
2969 * This is required because if we try to emit a batchbuffer with relocations
2970 * to a tree of buffers that won't simultaneously fit in the aperture,
2971 * the rendering will return an error at a point where the software is not
2972 * prepared to recover from it.
2973 *
2974 * However, we also want to emit the batchbuffer significantly before we reach
2975 * the limit, as a series of batchbuffers each of which references buffers
2976 * covering almost all of the aperture means that at each emit we end up
2977 * waiting to evict a buffer from the last rendering, and we get synchronous
2978 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
2979 * get better parallelism.
2980 */
2981static int
2982drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
2983{
2984	drm_intel_bufmgr_gem *bufmgr_gem =
2985	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
2986	unsigned int total = 0;
2987	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2988	int total_fences;
2989
2990	/* Check for fence reg constraints if necessary */
2991	if (bufmgr_gem->available_fences) {
2992		total_fences = drm_intel_gem_total_fences(bo_array, count);
2993		if (total_fences > bufmgr_gem->available_fences)
2994			return -ENOSPC;
2995	}
2996
2997	total = drm_intel_gem_estimate_batch_space(bo_array, count);
2998
2999	if (total > threshold)
3000		total = drm_intel_gem_compute_batch_space(bo_array, count);
3001
3002	if (total > threshold) {
3003		DBG("check_space: overflowed available aperture, "
3004		    "%dkb vs %dkb\n",
3005		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
3006		return -ENOSPC;
3007	} else {
3008		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
3009		    (int)bufmgr_gem->gtt_size / 1024);
3010		return 0;
3011	}
3012}
3013
3014/*
3015 * Disable buffer reuse for objects which are shared with the kernel
3016 * as scanout buffers
3017 */
3018static int
3019drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
3020{
3021	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3022
3023	bo_gem->reusable = false;
3024	return 0;
3025}
3026
3027static int
3028drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
3029{
3030	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3031
3032	return bo_gem->reusable;
3033}
3034
3035static int
3036_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
3037{
3038	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3039	int i;
3040
3041	for (i = 0; i < bo_gem->reloc_count; i++) {
3042		if (bo_gem->reloc_target_info[i].bo == target_bo)
3043			return 1;
3044		if (bo == bo_gem->reloc_target_info[i].bo)
3045			continue;
3046		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
3047						target_bo))
3048			return 1;
3049	}
3050
3051	for (i = 0; i< bo_gem->softpin_target_count; i++) {
3052		if (bo_gem->softpin_target[i] == target_bo)
3053			return 1;
3054		if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo))
3055			return 1;
3056	}
3057
3058	return 0;
3059}
3060
3061/** Return true if target_bo is referenced by bo's relocation tree. */
3062static int
3063drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
3064{
3065	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
3066
3067	if (bo == NULL || target_bo == NULL)
3068		return 0;
3069	if (target_bo_gem->used_as_reloc_target)
3070		return _drm_intel_gem_bo_references(bo, target_bo);
3071	return 0;
3072}
3073
3074static void
3075add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
3076{
3077	unsigned int i = bufmgr_gem->num_buckets;
3078
3079	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3080
3081	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3082	bufmgr_gem->cache_bucket[i].size = size;
3083	bufmgr_gem->num_buckets++;
3084}
3085
3086static void
3087init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
3088{
3089	unsigned long size, cache_max_size = 64 * 1024 * 1024;
3090
3091	/* OK, so power of two buckets was too wasteful of memory.
3092	 * Give 3 other sizes between each power of two, to hopefully
3093	 * cover things accurately enough.  (The alternative is
3094	 * probably to just go for exact matching of sizes, and assume
3095	 * that for things like composited window resize the tiled
3096	 * width/height alignment and rounding of sizes to pages will
3097	 * get us useful cache hit rates anyway)
3098	 */
3099	add_bucket(bufmgr_gem, 4096);
3100	add_bucket(bufmgr_gem, 4096 * 2);
3101	add_bucket(bufmgr_gem, 4096 * 3);
3102
3103	/* Initialize the linked lists for BO reuse cache. */
3104	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3105		add_bucket(bufmgr_gem, size);
3106
3107		add_bucket(bufmgr_gem, size + size * 1 / 4);
3108		add_bucket(bufmgr_gem, size + size * 2 / 4);
3109		add_bucket(bufmgr_gem, size + size * 3 / 4);
3110	}
3111}
3112
3113drm_public void
3114drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
3115{
3116	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3117
3118	bufmgr_gem->vma_max = limit;
3119
3120	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
3121}
3122
3123static int
3124parse_devid_override(const char *devid_override)
3125{
3126	static const struct {
3127		const char *name;
3128		int pci_id;
3129	} name_map[] = {
3130		{ "brw", PCI_CHIP_I965_GM },
3131		{ "g4x", PCI_CHIP_GM45_GM },
3132		{ "ilk", PCI_CHIP_ILD_G },
3133		{ "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS },
3134		{ "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 },
3135		{ "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 },
3136		{ "byt", PCI_CHIP_VALLEYVIEW_3 },
3137		{ "bdw", 0x1620 | BDW_ULX },
3138		{ "skl", PCI_CHIP_SKYLAKE_DT_GT2 },
3139		{ "kbl", PCI_CHIP_KABYLAKE_DT_GT2 },
3140	};
3141	unsigned int i;
3142
3143	for (i = 0; i < ARRAY_SIZE(name_map); i++) {
3144		if (!strcmp(name_map[i].name, devid_override))
3145			return name_map[i].pci_id;
3146	}
3147
3148	return strtod(devid_override, NULL);
3149}
3150
3151/**
3152 * Get the PCI ID for the device.  This can be overridden by setting the
3153 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
3154 */
3155static int
3156get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
3157{
3158	char *devid_override;
3159	int devid = 0;
3160	int ret;
3161	drm_i915_getparam_t gp;
3162
3163	if (geteuid() == getuid()) {
3164		devid_override = getenv("INTEL_DEVID_OVERRIDE");
3165		if (devid_override) {
3166			bufmgr_gem->no_exec = true;
3167			return parse_devid_override(devid_override);
3168		}
3169	}
3170
3171	memclear(gp);
3172	gp.param = I915_PARAM_CHIPSET_ID;
3173	gp.value = &devid;
3174	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3175	if (ret) {
3176		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
3177		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
3178	}
3179	return devid;
3180}
3181
3182drm_public int
3183drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
3184{
3185	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3186
3187	return bufmgr_gem->pci_device;
3188}
3189
3190/**
3191 * Sets the AUB filename.
3192 *
3193 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
3194 * for it to have any effect.
3195 */
3196drm_public void
3197drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
3198				      const char *filename)
3199{
3200}
3201
3202/**
3203 * Sets up AUB dumping.
3204 *
3205 * This is a trace file format that can be used with the simulator.
3206 * Packets are emitted in a format somewhat like GPU command packets.
3207 * You can set up a GTT and upload your objects into the referenced
3208 * space, then send off batchbuffers and get BMPs out the other end.
3209 */
3210drm_public void
3211drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
3212{
3213	fprintf(stderr, "libdrm aub dumping is deprecated.\n\n"
3214		"Use intel_aubdump from intel-gpu-tools instead.  Install intel-gpu-tools,\n"
3215		"then run (for example)\n\n"
3216		"\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n"
3217		"See the intel_aubdump man page for more details.\n");
3218}
3219
3220drm_public drm_intel_context *
3221drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
3222{
3223	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3224	struct drm_i915_gem_context_create create;
3225	drm_intel_context *context = NULL;
3226	int ret;
3227
3228	context = calloc(1, sizeof(*context));
3229	if (!context)
3230		return NULL;
3231
3232	memclear(create);
3233	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
3234	if (ret != 0) {
3235		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
3236		    strerror(errno));
3237		free(context);
3238		return NULL;
3239	}
3240
3241	context->ctx_id = create.ctx_id;
3242	context->bufmgr = bufmgr;
3243
3244	return context;
3245}
3246
3247drm_public int
3248drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id)
3249{
3250	if (ctx == NULL)
3251		return -EINVAL;
3252
3253	*ctx_id = ctx->ctx_id;
3254
3255	return 0;
3256}
3257
3258drm_public void
3259drm_intel_gem_context_destroy(drm_intel_context *ctx)
3260{
3261	drm_intel_bufmgr_gem *bufmgr_gem;
3262	struct drm_i915_gem_context_destroy destroy;
3263	int ret;
3264
3265	if (ctx == NULL)
3266		return;
3267
3268	memclear(destroy);
3269
3270	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3271	destroy.ctx_id = ctx->ctx_id;
3272	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
3273		       &destroy);
3274	if (ret != 0)
3275		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
3276			strerror(errno));
3277
3278	free(ctx);
3279}
3280
3281drm_public int
3282drm_intel_get_reset_stats(drm_intel_context *ctx,
3283			  uint32_t *reset_count,
3284			  uint32_t *active,
3285			  uint32_t *pending)
3286{
3287	drm_intel_bufmgr_gem *bufmgr_gem;
3288	struct drm_i915_reset_stats stats;
3289	int ret;
3290
3291	if (ctx == NULL)
3292		return -EINVAL;
3293
3294	memclear(stats);
3295
3296	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3297	stats.ctx_id = ctx->ctx_id;
3298	ret = drmIoctl(bufmgr_gem->fd,
3299		       DRM_IOCTL_I915_GET_RESET_STATS,
3300		       &stats);
3301	if (ret == 0) {
3302		if (reset_count != NULL)
3303			*reset_count = stats.reset_count;
3304
3305		if (active != NULL)
3306			*active = stats.batch_active;
3307
3308		if (pending != NULL)
3309			*pending = stats.batch_pending;
3310	}
3311
3312	return ret;
3313}
3314
3315drm_public int
3316drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
3317		   uint32_t offset,
3318		   uint64_t *result)
3319{
3320	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3321	struct drm_i915_reg_read reg_read;
3322	int ret;
3323
3324	memclear(reg_read);
3325	reg_read.offset = offset;
3326
3327	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
3328
3329	*result = reg_read.val;
3330	return ret;
3331}
3332
3333drm_public int
3334drm_intel_get_subslice_total(int fd, unsigned int *subslice_total)
3335{
3336	drm_i915_getparam_t gp;
3337	int ret;
3338
3339	memclear(gp);
3340	gp.value = (int*)subslice_total;
3341	gp.param = I915_PARAM_SUBSLICE_TOTAL;
3342	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3343	if (ret)
3344		return -errno;
3345
3346	return 0;
3347}
3348
3349drm_public int
3350drm_intel_get_eu_total(int fd, unsigned int *eu_total)
3351{
3352	drm_i915_getparam_t gp;
3353	int ret;
3354
3355	memclear(gp);
3356	gp.value = (int*)eu_total;
3357	gp.param = I915_PARAM_EU_TOTAL;
3358	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3359	if (ret)
3360		return -errno;
3361
3362	return 0;
3363}
3364
3365drm_public int
3366drm_intel_get_pooled_eu(int fd)
3367{
3368	drm_i915_getparam_t gp;
3369	int ret = -1;
3370
3371	memclear(gp);
3372	gp.param = I915_PARAM_HAS_POOLED_EU;
3373	gp.value = &ret;
3374	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3375		return -errno;
3376
3377	return ret;
3378}
3379
3380drm_public int
3381drm_intel_get_min_eu_in_pool(int fd)
3382{
3383	drm_i915_getparam_t gp;
3384	int ret = -1;
3385
3386	memclear(gp);
3387	gp.param = I915_PARAM_MIN_EU_IN_POOL;
3388	gp.value = &ret;
3389	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3390		return -errno;
3391
3392	return ret;
3393}
3394
3395/**
3396 * Annotate the given bo for use in aub dumping.
3397 *
3398 * \param annotations is an array of drm_intel_aub_annotation objects
3399 * describing the type of data in various sections of the bo.  Each
3400 * element of the array specifies the type and subtype of a section of
3401 * the bo, and the past-the-end offset of that section.  The elements
3402 * of \c annotations must be sorted so that ending_offset is
3403 * increasing.
3404 *
3405 * \param count is the number of elements in the \c annotations array.
3406 * If \c count is zero, then \c annotations will not be dereferenced.
3407 *
3408 * Annotations are copied into a private data structure, so caller may
3409 * re-use the memory pointed to by \c annotations after the call
3410 * returns.
3411 *
3412 * Annotations are stored for the lifetime of the bo; to reset to the
3413 * default state (no annotations), call this function with a \c count
3414 * of zero.
3415 */
3416drm_public void drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
3417					 drm_intel_aub_annotation *annotations,
3418					 unsigned count)
3419{
3420}
3421
3422static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3423static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3424
3425static drm_intel_bufmgr_gem *
3426drm_intel_bufmgr_gem_find(int fd)
3427{
3428	drm_intel_bufmgr_gem *bufmgr_gem;
3429
3430	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3431		if (bufmgr_gem->fd == fd) {
3432			atomic_inc(&bufmgr_gem->refcount);
3433			return bufmgr_gem;
3434		}
3435	}
3436
3437	return NULL;
3438}
3439
3440static void
3441drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
3442{
3443	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3444
3445	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3446		pthread_mutex_lock(&bufmgr_list_mutex);
3447
3448		if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
3449			DRMLISTDEL(&bufmgr_gem->managers);
3450			drm_intel_bufmgr_gem_destroy(bufmgr);
3451		}
3452
3453		pthread_mutex_unlock(&bufmgr_list_mutex);
3454	}
3455}
3456
3457drm_public void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo)
3458{
3459	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3460	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3461
3462	if (bo_gem->gtt_virtual)
3463		return bo_gem->gtt_virtual;
3464
3465	if (bo_gem->is_userptr)
3466		return NULL;
3467
3468	pthread_mutex_lock(&bufmgr_gem->lock);
3469	if (bo_gem->gtt_virtual == NULL) {
3470		struct drm_i915_gem_mmap_gtt mmap_arg;
3471		void *ptr;
3472
3473		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
3474		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3475
3476		if (bo_gem->map_count++ == 0)
3477			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3478
3479		memclear(mmap_arg);
3480		mmap_arg.handle = bo_gem->gem_handle;
3481
3482		/* Get the fake offset back... */
3483		ptr = MAP_FAILED;
3484		if (drmIoctl(bufmgr_gem->fd,
3485			     DRM_IOCTL_I915_GEM_MMAP_GTT,
3486			     &mmap_arg) == 0) {
3487			/* and mmap it */
3488			ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
3489				       MAP_SHARED, bufmgr_gem->fd,
3490				       mmap_arg.offset);
3491		}
3492		if (ptr == MAP_FAILED) {
3493			if (--bo_gem->map_count == 0)
3494				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3495			ptr = NULL;
3496		}
3497
3498		bo_gem->gtt_virtual = ptr;
3499	}
3500	pthread_mutex_unlock(&bufmgr_gem->lock);
3501
3502	return bo_gem->gtt_virtual;
3503}
3504
3505drm_public void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo)
3506{
3507	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3508	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3509
3510	if (bo_gem->mem_virtual)
3511		return bo_gem->mem_virtual;
3512
3513	if (bo_gem->is_userptr) {
3514		/* Return the same user ptr */
3515		return bo_gem->user_virtual;
3516	}
3517
3518	pthread_mutex_lock(&bufmgr_gem->lock);
3519	if (!bo_gem->mem_virtual) {
3520		struct drm_i915_gem_mmap mmap_arg;
3521
3522		if (bo_gem->map_count++ == 0)
3523			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3524
3525		DBG("bo_map: %d (%s), map_count=%d\n",
3526		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3527
3528		memclear(mmap_arg);
3529		mmap_arg.handle = bo_gem->gem_handle;
3530		mmap_arg.size = bo->size;
3531		if (drmIoctl(bufmgr_gem->fd,
3532			     DRM_IOCTL_I915_GEM_MMAP,
3533			     &mmap_arg)) {
3534			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3535			    __FILE__, __LINE__, bo_gem->gem_handle,
3536			    bo_gem->name, strerror(errno));
3537			if (--bo_gem->map_count == 0)
3538				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3539		} else {
3540			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3541			bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3542		}
3543	}
3544	pthread_mutex_unlock(&bufmgr_gem->lock);
3545
3546	return bo_gem->mem_virtual;
3547}
3548
3549drm_public void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo)
3550{
3551	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3552	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3553
3554	if (bo_gem->wc_virtual)
3555		return bo_gem->wc_virtual;
3556
3557	if (bo_gem->is_userptr)
3558		return NULL;
3559
3560	pthread_mutex_lock(&bufmgr_gem->lock);
3561	if (!bo_gem->wc_virtual) {
3562		struct drm_i915_gem_mmap mmap_arg;
3563
3564		if (bo_gem->map_count++ == 0)
3565			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3566
3567		DBG("bo_map: %d (%s), map_count=%d\n",
3568		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3569
3570		memclear(mmap_arg);
3571		mmap_arg.handle = bo_gem->gem_handle;
3572		mmap_arg.size = bo->size;
3573		mmap_arg.flags = I915_MMAP_WC;
3574		if (drmIoctl(bufmgr_gem->fd,
3575			     DRM_IOCTL_I915_GEM_MMAP,
3576			     &mmap_arg)) {
3577			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3578			    __FILE__, __LINE__, bo_gem->gem_handle,
3579			    bo_gem->name, strerror(errno));
3580			if (--bo_gem->map_count == 0)
3581				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3582		} else {
3583			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3584			bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3585		}
3586	}
3587	pthread_mutex_unlock(&bufmgr_gem->lock);
3588
3589	return bo_gem->wc_virtual;
3590}
3591
3592/**
3593 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3594 * and manage map buffer objections.
3595 *
3596 * \param fd File descriptor of the opened DRM device.
3597 */
3598drm_public drm_intel_bufmgr *
3599drm_intel_bufmgr_gem_init(int fd, int batch_size)
3600{
3601	drm_intel_bufmgr_gem *bufmgr_gem;
3602	struct drm_i915_gem_get_aperture aperture;
3603	drm_i915_getparam_t gp;
3604	int ret, tmp;
3605	bool exec2 = false;
3606
3607	pthread_mutex_lock(&bufmgr_list_mutex);
3608
3609	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
3610	if (bufmgr_gem)
3611		goto exit;
3612
3613	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3614	if (bufmgr_gem == NULL)
3615		goto exit;
3616
3617	bufmgr_gem->fd = fd;
3618	atomic_set(&bufmgr_gem->refcount, 1);
3619
3620	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3621		free(bufmgr_gem);
3622		bufmgr_gem = NULL;
3623		goto exit;
3624	}
3625
3626	memclear(aperture);
3627	ret = drmIoctl(bufmgr_gem->fd,
3628		       DRM_IOCTL_I915_GEM_GET_APERTURE,
3629		       &aperture);
3630
3631	if (ret == 0)
3632		bufmgr_gem->gtt_size = aperture.aper_available_size;
3633	else {
3634		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3635			strerror(errno));
3636		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3637		fprintf(stderr, "Assuming %dkB available aperture size.\n"
3638			"May lead to reduced performance or incorrect "
3639			"rendering.\n",
3640			(int)bufmgr_gem->gtt_size / 1024);
3641	}
3642
3643	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3644
3645	if (IS_GEN2(bufmgr_gem->pci_device))
3646		bufmgr_gem->gen = 2;
3647	else if (IS_GEN3(bufmgr_gem->pci_device))
3648		bufmgr_gem->gen = 3;
3649	else if (IS_GEN4(bufmgr_gem->pci_device))
3650		bufmgr_gem->gen = 4;
3651	else if (IS_GEN5(bufmgr_gem->pci_device))
3652		bufmgr_gem->gen = 5;
3653	else if (IS_GEN6(bufmgr_gem->pci_device))
3654		bufmgr_gem->gen = 6;
3655	else if (IS_GEN7(bufmgr_gem->pci_device))
3656		bufmgr_gem->gen = 7;
3657	else if (IS_GEN8(bufmgr_gem->pci_device))
3658		bufmgr_gem->gen = 8;
3659	else if (!intel_get_genx(bufmgr_gem->pci_device, &bufmgr_gem->gen)) {
3660		free(bufmgr_gem);
3661		bufmgr_gem = NULL;
3662		goto exit;
3663	}
3664
3665	if (IS_GEN3(bufmgr_gem->pci_device) &&
3666	    bufmgr_gem->gtt_size > 256*1024*1024) {
3667		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3668		 * be used for tiled blits. To simplify the accounting, just
3669		 * subtract the unmappable part (fixed to 256MB on all known
3670		 * gen3 devices) if the kernel advertises it. */
3671		bufmgr_gem->gtt_size -= 256*1024*1024;
3672	}
3673
3674	memclear(gp);
3675	gp.value = &tmp;
3676
3677	gp.param = I915_PARAM_HAS_EXECBUF2;
3678	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3679	if (!ret)
3680		exec2 = true;
3681
3682	gp.param = I915_PARAM_HAS_BSD;
3683	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3684	bufmgr_gem->has_bsd = ret == 0;
3685
3686	gp.param = I915_PARAM_HAS_BLT;
3687	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3688	bufmgr_gem->has_blt = ret == 0;
3689
3690	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
3691	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3692	bufmgr_gem->has_relaxed_fencing = ret == 0;
3693
3694	gp.param = I915_PARAM_HAS_EXEC_ASYNC;
3695	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3696	bufmgr_gem->has_exec_async = ret == 0;
3697
3698	bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr;
3699
3700	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3701	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3702	bufmgr_gem->has_wait_timeout = ret == 0;
3703
3704	gp.param = I915_PARAM_HAS_LLC;
3705	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3706	if (ret != 0) {
3707		/* Kernel does not supports HAS_LLC query, fallback to GPU
3708		 * generation detection and assume that we have LLC on GEN6/7
3709		 */
3710		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3711				IS_GEN7(bufmgr_gem->pci_device));
3712	} else
3713		bufmgr_gem->has_llc = *gp.value;
3714
3715	gp.param = I915_PARAM_HAS_VEBOX;
3716	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3717	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3718
3719	gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
3720	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3721	if (ret == 0 && *gp.value > 0)
3722		bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset;
3723
3724	if (bufmgr_gem->gen < 4) {
3725		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
3726		gp.value = &bufmgr_gem->available_fences;
3727		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3728		if (ret) {
3729			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
3730				errno);
3731			fprintf(stderr, "param: %d, val: %d\n", gp.param,
3732				*gp.value);
3733			bufmgr_gem->available_fences = 0;
3734		} else {
3735			/* XXX The kernel reports the total number of fences,
3736			 * including any that may be pinned.
3737			 *
3738			 * We presume that there will be at least one pinned
3739			 * fence for the scanout buffer, but there may be more
3740			 * than one scanout and the user may be manually
3741			 * pinning buffers. Let's move to execbuffer2 and
3742			 * thereby forget the insanity of using fences...
3743			 */
3744			bufmgr_gem->available_fences -= 2;
3745			if (bufmgr_gem->available_fences < 0)
3746				bufmgr_gem->available_fences = 0;
3747		}
3748	}
3749
3750	if (bufmgr_gem->gen >= 8) {
3751		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
3752		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3753		if (ret == 0 && *gp.value == 3)
3754			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
3755	}
3756
3757	/* Let's go with one relocation per every 2 dwords (but round down a bit
3758	 * since a power of two will mean an extra page allocation for the reloc
3759	 * buffer).
3760	 *
3761	 * Every 4 was too few for the blender benchmark.
3762	 */
3763	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3764
3765	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
3766	bufmgr_gem->bufmgr.bo_alloc_for_render =
3767	    drm_intel_gem_bo_alloc_for_render;
3768	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
3769	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
3770	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
3771	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
3772	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
3773	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
3774	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
3775	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
3776	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
3777	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
3778	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
3779	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
3780	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
3781	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
3782	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
3783	/* Use the new one if available */
3784	if (exec2) {
3785		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
3786		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3787	} else
3788		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
3789	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
3790	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3791	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
3792	bufmgr_gem->bufmgr.debug = 0;
3793	bufmgr_gem->bufmgr.check_aperture_space =
3794	    drm_intel_gem_check_aperture_space;
3795	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3796	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
3797	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
3798	    drm_intel_gem_get_pipe_from_crtc_id;
3799	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
3800
3801	init_cache_buckets(bufmgr_gem);
3802
3803	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
3804	bufmgr_gem->vma_max = -1; /* unlimited by default */
3805
3806	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3807
3808exit:
3809	pthread_mutex_unlock(&bufmgr_list_mutex);
3810
3811	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
3812}
3813