intel_bufmgr_gem.c revision 6260e5d5
1/**************************************************************************
2 *
3 * Copyright � 2007 Red Hat Inc.
4 * Copyright � 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30/*
31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32 *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 *	    Eric Anholt <eric@anholt.net>
34 *	    Dave Airlie <airlied@linux.ie>
35 */
36
37#include <xf86drm.h>
38#include <xf86atomic.h>
39#include <fcntl.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44#include <assert.h>
45#include <pthread.h>
46#include <stddef.h>
47#include <sys/ioctl.h>
48#include <sys/stat.h>
49#include <sys/types.h>
50#include <stdbool.h>
51
52#include "errno.h"
53#ifndef ETIME
54#define ETIME ETIMEDOUT
55#endif
56#include "libdrm_macros.h"
57#include "libdrm_lists.h"
58#include "intel_bufmgr.h"
59#include "intel_bufmgr_priv.h"
60#include "intel_chipset.h"
61#include "string.h"
62
63#include "i915_drm.h"
64#include "uthash.h"
65
66#if HAVE_VALGRIND
67#include <valgrind.h>
68#include <memcheck.h>
69#define VG(x) x
70#else
71#define VG(x)
72#endif
73
74#define memclear(s) memset(&s, 0, sizeof(s))
75
76#define DBG(...) do {					\
77	if (bufmgr_gem->bufmgr.debug)			\
78		fprintf(stderr, __VA_ARGS__);		\
79} while (0)
80
81#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
82#define MAX2(A, B) ((A) > (B) ? (A) : (B))
83
84/**
85 * upper_32_bits - return bits 32-63 of a number
86 * @n: the number we're accessing
87 *
88 * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
89 * the "right shift count >= width of type" warning when that quantity is
90 * 32-bits.
91 */
92#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
93
94/**
95 * lower_32_bits - return bits 0-31 of a number
96 * @n: the number we're accessing
97 */
98#define lower_32_bits(n) ((__u32)(n))
99
100typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
101
102struct drm_intel_gem_bo_bucket {
103	drmMMListHead head;
104	unsigned long size;
105};
106
107typedef struct _drm_intel_bufmgr_gem {
108	drm_intel_bufmgr bufmgr;
109
110	atomic_t refcount;
111
112	int fd;
113
114	int max_relocs;
115
116	pthread_mutex_t lock;
117
118	struct drm_i915_gem_exec_object *exec_objects;
119	struct drm_i915_gem_exec_object2 *exec2_objects;
120	drm_intel_bo **exec_bos;
121	int exec_size;
122	int exec_count;
123
124	/** Array of lists of cached gem objects of power-of-two sizes */
125	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
126	int num_buckets;
127	time_t time;
128
129	drmMMListHead managers;
130
131	drm_intel_bo_gem *name_table;
132	drm_intel_bo_gem *handle_table;
133
134	drmMMListHead vma_cache;
135	int vma_count, vma_open, vma_max;
136
137	uint64_t gtt_size;
138	int available_fences;
139	int pci_device;
140	int gen;
141	unsigned int has_bsd : 1;
142	unsigned int has_blt : 1;
143	unsigned int has_relaxed_fencing : 1;
144	unsigned int has_llc : 1;
145	unsigned int has_wait_timeout : 1;
146	unsigned int bo_reuse : 1;
147	unsigned int no_exec : 1;
148	unsigned int has_vebox : 1;
149	unsigned int has_exec_async : 1;
150	bool fenced_relocs;
151
152	struct {
153		void *ptr;
154		uint32_t handle;
155	} userptr_active;
156
157} drm_intel_bufmgr_gem;
158
159#define DRM_INTEL_RELOC_FENCE (1<<0)
160
161typedef struct _drm_intel_reloc_target_info {
162	drm_intel_bo *bo;
163	int flags;
164} drm_intel_reloc_target;
165
166struct _drm_intel_bo_gem {
167	drm_intel_bo bo;
168
169	atomic_t refcount;
170	uint32_t gem_handle;
171	const char *name;
172
173	/**
174	 * Kenel-assigned global name for this object
175         *
176         * List contains both flink named and prime fd'd objects
177	 */
178	unsigned int global_name;
179
180	UT_hash_handle handle_hh;
181	UT_hash_handle name_hh;
182
183	/**
184	 * Index of the buffer within the validation list while preparing a
185	 * batchbuffer execution.
186	 */
187	int validate_index;
188
189	/**
190	 * Current tiling mode
191	 */
192	uint32_t tiling_mode;
193	uint32_t swizzle_mode;
194	unsigned long stride;
195
196	unsigned long kflags;
197
198	time_t free_time;
199
200	/** Array passed to the DRM containing relocation information. */
201	struct drm_i915_gem_relocation_entry *relocs;
202	/**
203	 * Array of info structs corresponding to relocs[i].target_handle etc
204	 */
205	drm_intel_reloc_target *reloc_target_info;
206	/** Number of entries in relocs */
207	int reloc_count;
208	/** Array of BOs that are referenced by this buffer and will be softpinned */
209	drm_intel_bo **softpin_target;
210	/** Number softpinned BOs that are referenced by this buffer */
211	int softpin_target_count;
212	/** Maximum amount of softpinned BOs that are referenced by this buffer */
213	int softpin_target_size;
214
215	/** Mapped address for the buffer, saved across map/unmap cycles */
216	void *mem_virtual;
217	/** GTT virtual address for the buffer, saved across map/unmap cycles */
218	void *gtt_virtual;
219	/** WC CPU address for the buffer, saved across map/unmap cycles */
220	void *wc_virtual;
221	/**
222	 * Virtual address of the buffer allocated by user, used for userptr
223	 * objects only.
224	 */
225	void *user_virtual;
226	int map_count;
227	drmMMListHead vma_list;
228
229	/** BO cache list */
230	drmMMListHead head;
231
232	/**
233	 * Boolean of whether this BO and its children have been included in
234	 * the current drm_intel_bufmgr_check_aperture_space() total.
235	 */
236	bool included_in_check_aperture;
237
238	/**
239	 * Boolean of whether this buffer has been used as a relocation
240	 * target and had its size accounted for, and thus can't have any
241	 * further relocations added to it.
242	 */
243	bool used_as_reloc_target;
244
245	/**
246	 * Boolean of whether we have encountered an error whilst building the relocation tree.
247	 */
248	bool has_error;
249
250	/**
251	 * Boolean of whether this buffer can be re-used
252	 */
253	bool reusable;
254
255	/**
256	 * Boolean of whether the GPU is definitely not accessing the buffer.
257	 *
258	 * This is only valid when reusable, since non-reusable
259	 * buffers are those that have been shared with other
260	 * processes, so we don't know their state.
261	 */
262	bool idle;
263
264	/**
265	 * Boolean of whether this buffer was allocated with userptr
266	 */
267	bool is_userptr;
268
269	/**
270	 * Size in bytes of this buffer and its relocation descendents.
271	 *
272	 * Used to avoid costly tree walking in
273	 * drm_intel_bufmgr_check_aperture in the common case.
274	 */
275	int reloc_tree_size;
276
277	/**
278	 * Number of potential fence registers required by this buffer and its
279	 * relocations.
280	 */
281	int reloc_tree_fences;
282
283	/** Flags that we may need to do the SW_FINISH ioctl on unmap. */
284	bool mapped_cpu_write;
285};
286
287static unsigned int
288drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
289
290static unsigned int
291drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
292
293static int
294drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
295			    uint32_t * swizzle_mode);
296
297static int
298drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
299				     uint32_t tiling_mode,
300				     uint32_t stride);
301
302static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
303						      time_t time);
304
305static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
306
307static void drm_intel_gem_bo_free(drm_intel_bo *bo);
308
309static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo)
310{
311        return (drm_intel_bo_gem *)bo;
312}
313
314static unsigned long
315drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
316			   uint32_t *tiling_mode)
317{
318	unsigned long min_size, max_size;
319	unsigned long i;
320
321	if (*tiling_mode == I915_TILING_NONE)
322		return size;
323
324	/* 965+ just need multiples of page size for tiling */
325	if (bufmgr_gem->gen >= 4)
326		return ROUND_UP_TO(size, 4096);
327
328	/* Older chips need powers of two, of at least 512k or 1M */
329	if (bufmgr_gem->gen == 3) {
330		min_size = 1024*1024;
331		max_size = 128*1024*1024;
332	} else {
333		min_size = 512*1024;
334		max_size = 64*1024*1024;
335	}
336
337	if (size > max_size) {
338		*tiling_mode = I915_TILING_NONE;
339		return size;
340	}
341
342	/* Do we need to allocate every page for the fence? */
343	if (bufmgr_gem->has_relaxed_fencing)
344		return ROUND_UP_TO(size, 4096);
345
346	for (i = min_size; i < size; i <<= 1)
347		;
348
349	return i;
350}
351
352/*
353 * Round a given pitch up to the minimum required for X tiling on a
354 * given chip.  We use 512 as the minimum to allow for a later tiling
355 * change.
356 */
357static unsigned long
358drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
359			    unsigned long pitch, uint32_t *tiling_mode)
360{
361	unsigned long tile_width;
362	unsigned long i;
363
364	/* If untiled, then just align it so that we can do rendering
365	 * to it with the 3D engine.
366	 */
367	if (*tiling_mode == I915_TILING_NONE)
368		return ALIGN(pitch, 64);
369
370	if (*tiling_mode == I915_TILING_X
371			|| (IS_915(bufmgr_gem->pci_device)
372			    && *tiling_mode == I915_TILING_Y))
373		tile_width = 512;
374	else
375		tile_width = 128;
376
377	/* 965 is flexible */
378	if (bufmgr_gem->gen >= 4)
379		return ROUND_UP_TO(pitch, tile_width);
380
381	/* The older hardware has a maximum pitch of 8192 with tiled
382	 * surfaces, so fallback to untiled if it's too large.
383	 */
384	if (pitch > 8192) {
385		*tiling_mode = I915_TILING_NONE;
386		return ALIGN(pitch, 64);
387	}
388
389	/* Pre-965 needs power of two tile width */
390	for (i = tile_width; i < pitch; i <<= 1)
391		;
392
393	return i;
394}
395
396static struct drm_intel_gem_bo_bucket *
397drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
398				 unsigned long size)
399{
400	int i;
401
402	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
403		struct drm_intel_gem_bo_bucket *bucket =
404		    &bufmgr_gem->cache_bucket[i];
405		if (bucket->size >= size) {
406			return bucket;
407		}
408	}
409
410	return NULL;
411}
412
413static void
414drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
415{
416	int i, j;
417
418	for (i = 0; i < bufmgr_gem->exec_count; i++) {
419		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
420		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
421
422		if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
423			DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
424			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
425			    bo_gem->name);
426			continue;
427		}
428
429		for (j = 0; j < bo_gem->reloc_count; j++) {
430			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
431			drm_intel_bo_gem *target_gem =
432			    (drm_intel_bo_gem *) target_bo;
433
434			DBG("%2d: %d %s(%s)@0x%08x %08x -> "
435			    "%d (%s)@0x%08x %08x + 0x%08x\n",
436			    i,
437			    bo_gem->gem_handle,
438			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
439			    bo_gem->name,
440			    upper_32_bits(bo_gem->relocs[j].offset),
441			    lower_32_bits(bo_gem->relocs[j].offset),
442			    target_gem->gem_handle,
443			    target_gem->name,
444			    upper_32_bits(target_bo->offset64),
445			    lower_32_bits(target_bo->offset64),
446			    bo_gem->relocs[j].delta);
447		}
448
449		for (j = 0; j < bo_gem->softpin_target_count; j++) {
450			drm_intel_bo *target_bo = bo_gem->softpin_target[j];
451			drm_intel_bo_gem *target_gem =
452			    (drm_intel_bo_gem *) target_bo;
453			DBG("%2d: %d %s(%s) -> "
454			    "%d *(%s)@0x%08x %08x\n",
455			    i,
456			    bo_gem->gem_handle,
457			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
458			    bo_gem->name,
459			    target_gem->gem_handle,
460			    target_gem->name,
461			    upper_32_bits(target_bo->offset64),
462			    lower_32_bits(target_bo->offset64));
463		}
464	}
465}
466
467static inline void
468drm_intel_gem_bo_reference(drm_intel_bo *bo)
469{
470	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
471
472	atomic_inc(&bo_gem->refcount);
473}
474
475/**
476 * Adds the given buffer to the list of buffers to be validated (moved into the
477 * appropriate memory type) with the next batch submission.
478 *
479 * If a buffer is validated multiple times in a batch submission, it ends up
480 * with the intersection of the memory type flags and the union of the
481 * access flags.
482 */
483static void
484drm_intel_add_validate_buffer(drm_intel_bo *bo)
485{
486	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
487	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
488	int index;
489
490	if (bo_gem->validate_index != -1)
491		return;
492
493	/* Extend the array of validation entries as necessary. */
494	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
495		int new_size = bufmgr_gem->exec_size * 2;
496
497		if (new_size == 0)
498			new_size = 5;
499
500		bufmgr_gem->exec_objects =
501		    realloc(bufmgr_gem->exec_objects,
502			    sizeof(*bufmgr_gem->exec_objects) * new_size);
503		bufmgr_gem->exec_bos =
504		    realloc(bufmgr_gem->exec_bos,
505			    sizeof(*bufmgr_gem->exec_bos) * new_size);
506		bufmgr_gem->exec_size = new_size;
507	}
508
509	index = bufmgr_gem->exec_count;
510	bo_gem->validate_index = index;
511	/* Fill in array entry */
512	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
513	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
514	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
515	bufmgr_gem->exec_objects[index].alignment = bo->align;
516	bufmgr_gem->exec_objects[index].offset = 0;
517	bufmgr_gem->exec_bos[index] = bo;
518	bufmgr_gem->exec_count++;
519}
520
521static void
522drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
523{
524	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
525	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
526	int index;
527	unsigned long flags;
528
529	flags = 0;
530	if (need_fence)
531		flags |= EXEC_OBJECT_NEEDS_FENCE;
532
533	if (bo_gem->validate_index != -1) {
534		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
535		return;
536	}
537
538	/* Extend the array of validation entries as necessary. */
539	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
540		int new_size = bufmgr_gem->exec_size * 2;
541
542		if (new_size == 0)
543			new_size = 5;
544
545		bufmgr_gem->exec2_objects =
546			realloc(bufmgr_gem->exec2_objects,
547				sizeof(*bufmgr_gem->exec2_objects) * new_size);
548		bufmgr_gem->exec_bos =
549			realloc(bufmgr_gem->exec_bos,
550				sizeof(*bufmgr_gem->exec_bos) * new_size);
551		bufmgr_gem->exec_size = new_size;
552	}
553
554	index = bufmgr_gem->exec_count;
555	bo_gem->validate_index = index;
556	/* Fill in array entry */
557	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
558	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
559	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
560	bufmgr_gem->exec2_objects[index].alignment = bo->align;
561	bufmgr_gem->exec2_objects[index].offset = bo->offset64;
562	bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags;
563	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
564	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
565	bufmgr_gem->exec_bos[index] = bo;
566	bufmgr_gem->exec_count++;
567}
568
569#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
570	sizeof(uint32_t))
571
572static void
573drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
574				      drm_intel_bo_gem *bo_gem,
575				      unsigned int alignment)
576{
577	unsigned int size;
578
579	assert(!bo_gem->used_as_reloc_target);
580
581	/* The older chipsets are far-less flexible in terms of tiling,
582	 * and require tiled buffer to be size aligned in the aperture.
583	 * This means that in the worst possible case we will need a hole
584	 * twice as large as the object in order for it to fit into the
585	 * aperture. Optimal packing is for wimps.
586	 */
587	size = bo_gem->bo.size;
588	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
589		unsigned int min_size;
590
591		if (bufmgr_gem->has_relaxed_fencing) {
592			if (bufmgr_gem->gen == 3)
593				min_size = 1024*1024;
594			else
595				min_size = 512*1024;
596
597			while (min_size < size)
598				min_size *= 2;
599		} else
600			min_size = size;
601
602		/* Account for worst-case alignment. */
603		alignment = MAX2(alignment, min_size);
604	}
605
606	bo_gem->reloc_tree_size = size + alignment;
607}
608
609static int
610drm_intel_setup_reloc_list(drm_intel_bo *bo)
611{
612	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
613	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
614	unsigned int max_relocs = bufmgr_gem->max_relocs;
615
616	if (bo->size / 4 < max_relocs)
617		max_relocs = bo->size / 4;
618
619	bo_gem->relocs = malloc(max_relocs *
620				sizeof(struct drm_i915_gem_relocation_entry));
621	bo_gem->reloc_target_info = malloc(max_relocs *
622					   sizeof(drm_intel_reloc_target));
623	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
624		bo_gem->has_error = true;
625
626		free (bo_gem->relocs);
627		bo_gem->relocs = NULL;
628
629		free (bo_gem->reloc_target_info);
630		bo_gem->reloc_target_info = NULL;
631
632		return 1;
633	}
634
635	return 0;
636}
637
638static int
639drm_intel_gem_bo_busy(drm_intel_bo *bo)
640{
641	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
642	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
643	struct drm_i915_gem_busy busy;
644	int ret;
645
646	if (bo_gem->reusable && bo_gem->idle)
647		return false;
648
649	memclear(busy);
650	busy.handle = bo_gem->gem_handle;
651
652	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
653	if (ret == 0) {
654		bo_gem->idle = !busy.busy;
655		return busy.busy;
656	} else {
657		return false;
658	}
659}
660
661static int
662drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
663				  drm_intel_bo_gem *bo_gem, int state)
664{
665	struct drm_i915_gem_madvise madv;
666
667	memclear(madv);
668	madv.handle = bo_gem->gem_handle;
669	madv.madv = state;
670	madv.retained = 1;
671	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
672
673	return madv.retained;
674}
675
676static int
677drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
678{
679	return drm_intel_gem_bo_madvise_internal
680		((drm_intel_bufmgr_gem *) bo->bufmgr,
681		 (drm_intel_bo_gem *) bo,
682		 madv);
683}
684
685/* drop the oldest entries that have been purged by the kernel */
686static void
687drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
688				    struct drm_intel_gem_bo_bucket *bucket)
689{
690	while (!DRMLISTEMPTY(&bucket->head)) {
691		drm_intel_bo_gem *bo_gem;
692
693		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
694				      bucket->head.next, head);
695		if (drm_intel_gem_bo_madvise_internal
696		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
697			break;
698
699		DRMLISTDEL(&bo_gem->head);
700		drm_intel_gem_bo_free(&bo_gem->bo);
701	}
702}
703
704static drm_intel_bo *
705drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
706				const char *name,
707				unsigned long size,
708				unsigned long flags,
709				uint32_t tiling_mode,
710				unsigned long stride,
711				unsigned int alignment)
712{
713	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
714	drm_intel_bo_gem *bo_gem;
715	unsigned int page_size = getpagesize();
716	int ret;
717	struct drm_intel_gem_bo_bucket *bucket;
718	bool alloc_from_cache;
719	unsigned long bo_size;
720	bool for_render = false;
721
722	if (flags & BO_ALLOC_FOR_RENDER)
723		for_render = true;
724
725	/* Round the allocated size up to a power of two number of pages. */
726	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
727
728	/* If we don't have caching at this size, don't actually round the
729	 * allocation up.
730	 */
731	if (bucket == NULL) {
732		bo_size = size;
733		if (bo_size < page_size)
734			bo_size = page_size;
735	} else {
736		bo_size = bucket->size;
737	}
738
739	pthread_mutex_lock(&bufmgr_gem->lock);
740	/* Get a buffer out of the cache if available */
741retry:
742	alloc_from_cache = false;
743	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
744		if (for_render) {
745			/* Allocate new render-target BOs from the tail (MRU)
746			 * of the list, as it will likely be hot in the GPU
747			 * cache and in the aperture for us.
748			 */
749			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
750					      bucket->head.prev, head);
751			DRMLISTDEL(&bo_gem->head);
752			alloc_from_cache = true;
753			bo_gem->bo.align = alignment;
754		} else {
755			assert(alignment == 0);
756			/* For non-render-target BOs (where we're probably
757			 * going to map it first thing in order to fill it
758			 * with data), check if the last BO in the cache is
759			 * unbusy, and only reuse in that case. Otherwise,
760			 * allocating a new buffer is probably faster than
761			 * waiting for the GPU to finish.
762			 */
763			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
764					      bucket->head.next, head);
765			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
766				alloc_from_cache = true;
767				DRMLISTDEL(&bo_gem->head);
768			}
769		}
770
771		if (alloc_from_cache) {
772			if (!drm_intel_gem_bo_madvise_internal
773			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
774				drm_intel_gem_bo_free(&bo_gem->bo);
775				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
776								    bucket);
777				goto retry;
778			}
779
780			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
781								 tiling_mode,
782								 stride)) {
783				drm_intel_gem_bo_free(&bo_gem->bo);
784				goto retry;
785			}
786		}
787	}
788
789	if (!alloc_from_cache) {
790		struct drm_i915_gem_create create;
791
792		bo_gem = calloc(1, sizeof(*bo_gem));
793		if (!bo_gem)
794			goto err;
795
796		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
797		   list (vma_list), so better set the list head here */
798		DRMINITLISTHEAD(&bo_gem->vma_list);
799
800		bo_gem->bo.size = bo_size;
801
802		memclear(create);
803		create.size = bo_size;
804
805		ret = drmIoctl(bufmgr_gem->fd,
806			       DRM_IOCTL_I915_GEM_CREATE,
807			       &create);
808		if (ret != 0) {
809			free(bo_gem);
810			goto err;
811		}
812
813		bo_gem->gem_handle = create.handle;
814		HASH_ADD(handle_hh, bufmgr_gem->handle_table,
815			 gem_handle, sizeof(bo_gem->gem_handle),
816			 bo_gem);
817
818		bo_gem->bo.handle = bo_gem->gem_handle;
819		bo_gem->bo.bufmgr = bufmgr;
820		bo_gem->bo.align = alignment;
821
822		bo_gem->tiling_mode = I915_TILING_NONE;
823		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
824		bo_gem->stride = 0;
825
826		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
827							 tiling_mode,
828							 stride))
829			goto err_free;
830	}
831
832	bo_gem->name = name;
833	atomic_set(&bo_gem->refcount, 1);
834	bo_gem->validate_index = -1;
835	bo_gem->reloc_tree_fences = 0;
836	bo_gem->used_as_reloc_target = false;
837	bo_gem->has_error = false;
838	bo_gem->reusable = true;
839
840	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
841	pthread_mutex_unlock(&bufmgr_gem->lock);
842
843	DBG("bo_create: buf %d (%s) %ldb\n",
844	    bo_gem->gem_handle, bo_gem->name, size);
845
846	return &bo_gem->bo;
847
848err_free:
849	drm_intel_gem_bo_free(&bo_gem->bo);
850err:
851	pthread_mutex_unlock(&bufmgr_gem->lock);
852	return NULL;
853}
854
855static drm_intel_bo *
856drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
857				  const char *name,
858				  unsigned long size,
859				  unsigned int alignment)
860{
861	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
862					       BO_ALLOC_FOR_RENDER,
863					       I915_TILING_NONE, 0,
864					       alignment);
865}
866
867static drm_intel_bo *
868drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
869		       const char *name,
870		       unsigned long size,
871		       unsigned int alignment)
872{
873	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
874					       I915_TILING_NONE, 0, 0);
875}
876
877static drm_intel_bo *
878drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
879			     int x, int y, int cpp, uint32_t *tiling_mode,
880			     unsigned long *pitch, unsigned long flags)
881{
882	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
883	unsigned long size, stride;
884	uint32_t tiling;
885
886	do {
887		unsigned long aligned_y, height_alignment;
888
889		tiling = *tiling_mode;
890
891		/* If we're tiled, our allocations are in 8 or 32-row blocks,
892		 * so failure to align our height means that we won't allocate
893		 * enough pages.
894		 *
895		 * If we're untiled, we still have to align to 2 rows high
896		 * because the data port accesses 2x2 blocks even if the
897		 * bottom row isn't to be rendered, so failure to align means
898		 * we could walk off the end of the GTT and fault.  This is
899		 * documented on 965, and may be the case on older chipsets
900		 * too so we try to be careful.
901		 */
902		aligned_y = y;
903		height_alignment = 2;
904
905		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
906			height_alignment = 16;
907		else if (tiling == I915_TILING_X
908			|| (IS_915(bufmgr_gem->pci_device)
909			    && tiling == I915_TILING_Y))
910			height_alignment = 8;
911		else if (tiling == I915_TILING_Y)
912			height_alignment = 32;
913		aligned_y = ALIGN(y, height_alignment);
914
915		stride = x * cpp;
916		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
917		size = stride * aligned_y;
918		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
919	} while (*tiling_mode != tiling);
920	*pitch = stride;
921
922	if (tiling == I915_TILING_NONE)
923		stride = 0;
924
925	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
926					       tiling, stride, 0);
927}
928
929static drm_intel_bo *
930drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
931				const char *name,
932				void *addr,
933				uint32_t tiling_mode,
934				uint32_t stride,
935				unsigned long size,
936				unsigned long flags)
937{
938	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
939	drm_intel_bo_gem *bo_gem;
940	int ret;
941	struct drm_i915_gem_userptr userptr;
942
943	/* Tiling with userptr surfaces is not supported
944	 * on all hardware so refuse it for time being.
945	 */
946	if (tiling_mode != I915_TILING_NONE)
947		return NULL;
948
949	bo_gem = calloc(1, sizeof(*bo_gem));
950	if (!bo_gem)
951		return NULL;
952
953	atomic_set(&bo_gem->refcount, 1);
954	DRMINITLISTHEAD(&bo_gem->vma_list);
955
956	bo_gem->bo.size = size;
957
958	memclear(userptr);
959	userptr.user_ptr = (__u64)((unsigned long)addr);
960	userptr.user_size = size;
961	userptr.flags = flags;
962
963	ret = drmIoctl(bufmgr_gem->fd,
964			DRM_IOCTL_I915_GEM_USERPTR,
965			&userptr);
966	if (ret != 0) {
967		DBG("bo_create_userptr: "
968		    "ioctl failed with user ptr %p size 0x%lx, "
969		    "user flags 0x%lx\n", addr, size, flags);
970		free(bo_gem);
971		return NULL;
972	}
973
974	pthread_mutex_lock(&bufmgr_gem->lock);
975
976	bo_gem->gem_handle = userptr.handle;
977	bo_gem->bo.handle = bo_gem->gem_handle;
978	bo_gem->bo.bufmgr    = bufmgr;
979	bo_gem->is_userptr   = true;
980	bo_gem->bo.virtual   = addr;
981	/* Save the address provided by user */
982	bo_gem->user_virtual = addr;
983	bo_gem->tiling_mode  = I915_TILING_NONE;
984	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
985	bo_gem->stride       = 0;
986
987	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
988		 gem_handle, sizeof(bo_gem->gem_handle),
989		 bo_gem);
990
991	bo_gem->name = name;
992	bo_gem->validate_index = -1;
993	bo_gem->reloc_tree_fences = 0;
994	bo_gem->used_as_reloc_target = false;
995	bo_gem->has_error = false;
996	bo_gem->reusable = false;
997
998	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
999	pthread_mutex_unlock(&bufmgr_gem->lock);
1000
1001	DBG("bo_create_userptr: "
1002	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
1003		addr, bo_gem->gem_handle, bo_gem->name,
1004		size, stride, tiling_mode);
1005
1006	return &bo_gem->bo;
1007}
1008
1009static bool
1010has_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
1011{
1012	int ret;
1013	void *ptr;
1014	long pgsz;
1015	struct drm_i915_gem_userptr userptr;
1016
1017	pgsz = sysconf(_SC_PAGESIZE);
1018	assert(pgsz > 0);
1019
1020	ret = posix_memalign(&ptr, pgsz, pgsz);
1021	if (ret) {
1022		DBG("Failed to get a page (%ld) for userptr detection!\n",
1023			pgsz);
1024		return false;
1025	}
1026
1027	memclear(userptr);
1028	userptr.user_ptr = (__u64)(unsigned long)ptr;
1029	userptr.user_size = pgsz;
1030
1031retry:
1032	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
1033	if (ret) {
1034		if (errno == ENODEV && userptr.flags == 0) {
1035			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
1036			goto retry;
1037		}
1038		free(ptr);
1039		return false;
1040	}
1041
1042	/* We don't release the userptr bo here as we want to keep the
1043	 * kernel mm tracking alive for our lifetime. The first time we
1044	 * create a userptr object the kernel has to install a mmu_notifer
1045	 * which is a heavyweight operation (e.g. it requires taking all
1046	 * mm_locks and stop_machine()).
1047	 */
1048
1049	bufmgr_gem->userptr_active.ptr = ptr;
1050	bufmgr_gem->userptr_active.handle = userptr.handle;
1051
1052	return true;
1053}
1054
1055static drm_intel_bo *
1056check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
1057		       const char *name,
1058		       void *addr,
1059		       uint32_t tiling_mode,
1060		       uint32_t stride,
1061		       unsigned long size,
1062		       unsigned long flags)
1063{
1064	if (has_userptr((drm_intel_bufmgr_gem *)bufmgr))
1065		bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr;
1066	else
1067		bufmgr->bo_alloc_userptr = NULL;
1068
1069	return drm_intel_bo_alloc_userptr(bufmgr, name, addr,
1070					  tiling_mode, stride, size, flags);
1071}
1072
1073/**
1074 * Returns a drm_intel_bo wrapping the given buffer object handle.
1075 *
1076 * This can be used when one application needs to pass a buffer object
1077 * to another.
1078 */
1079drm_public drm_intel_bo *
1080drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
1081				  const char *name,
1082				  unsigned int handle)
1083{
1084	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1085	drm_intel_bo_gem *bo_gem;
1086	int ret;
1087	struct drm_gem_open open_arg;
1088	struct drm_i915_gem_get_tiling get_tiling;
1089
1090	/* At the moment most applications only have a few named bo.
1091	 * For instance, in a DRI client only the render buffers passed
1092	 * between X and the client are named. And since X returns the
1093	 * alternating names for the front/back buffer a linear search
1094	 * provides a sufficiently fast match.
1095	 */
1096	pthread_mutex_lock(&bufmgr_gem->lock);
1097	HASH_FIND(name_hh, bufmgr_gem->name_table,
1098		  &handle, sizeof(handle), bo_gem);
1099	if (bo_gem) {
1100		drm_intel_gem_bo_reference(&bo_gem->bo);
1101		goto out;
1102	}
1103
1104	memclear(open_arg);
1105	open_arg.name = handle;
1106	ret = drmIoctl(bufmgr_gem->fd,
1107		       DRM_IOCTL_GEM_OPEN,
1108		       &open_arg);
1109	if (ret != 0) {
1110		DBG("Couldn't reference %s handle 0x%08x: %s\n",
1111		    name, handle, strerror(errno));
1112		bo_gem = NULL;
1113		goto out;
1114	}
1115        /* Now see if someone has used a prime handle to get this
1116         * object from the kernel before by looking through the list
1117         * again for a matching gem_handle
1118         */
1119	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
1120		  &open_arg.handle, sizeof(open_arg.handle), bo_gem);
1121	if (bo_gem) {
1122		drm_intel_gem_bo_reference(&bo_gem->bo);
1123		goto out;
1124	}
1125
1126	bo_gem = calloc(1, sizeof(*bo_gem));
1127	if (!bo_gem)
1128		goto out;
1129
1130	atomic_set(&bo_gem->refcount, 1);
1131	DRMINITLISTHEAD(&bo_gem->vma_list);
1132
1133	bo_gem->bo.size = open_arg.size;
1134	bo_gem->bo.offset = 0;
1135	bo_gem->bo.offset64 = 0;
1136	bo_gem->bo.virtual = NULL;
1137	bo_gem->bo.bufmgr = bufmgr;
1138	bo_gem->name = name;
1139	bo_gem->validate_index = -1;
1140	bo_gem->gem_handle = open_arg.handle;
1141	bo_gem->bo.handle = open_arg.handle;
1142	bo_gem->global_name = handle;
1143	bo_gem->reusable = false;
1144
1145	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
1146		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
1147	HASH_ADD(name_hh, bufmgr_gem->name_table,
1148		 global_name, sizeof(bo_gem->global_name), bo_gem);
1149
1150	memclear(get_tiling);
1151	get_tiling.handle = bo_gem->gem_handle;
1152	ret = drmIoctl(bufmgr_gem->fd,
1153		       DRM_IOCTL_I915_GEM_GET_TILING,
1154		       &get_tiling);
1155	if (ret != 0)
1156		goto err_unref;
1157
1158	bo_gem->tiling_mode = get_tiling.tiling_mode;
1159	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1160	/* XXX stride is unknown */
1161	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1162	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1163
1164out:
1165	pthread_mutex_unlock(&bufmgr_gem->lock);
1166	return &bo_gem->bo;
1167
1168err_unref:
1169	drm_intel_gem_bo_free(&bo_gem->bo);
1170	pthread_mutex_unlock(&bufmgr_gem->lock);
1171	return NULL;
1172}
1173
1174static void
1175drm_intel_gem_bo_free(drm_intel_bo *bo)
1176{
1177	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1178	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1179	struct drm_gem_close close;
1180	int ret;
1181
1182	DRMLISTDEL(&bo_gem->vma_list);
1183	if (bo_gem->mem_virtual) {
1184		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1185		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1186		bufmgr_gem->vma_count--;
1187	}
1188	if (bo_gem->wc_virtual) {
1189		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1190		drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1191		bufmgr_gem->vma_count--;
1192	}
1193	if (bo_gem->gtt_virtual) {
1194		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1195		bufmgr_gem->vma_count--;
1196	}
1197
1198	if (bo_gem->global_name)
1199		HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem);
1200	HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem);
1201
1202	/* Close this object */
1203	memclear(close);
1204	close.handle = bo_gem->gem_handle;
1205	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
1206	if (ret != 0) {
1207		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1208		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
1209	}
1210	free(bo);
1211}
1212
1213static void
1214drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
1215{
1216#if HAVE_VALGRIND
1217	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1218
1219	if (bo_gem->mem_virtual)
1220		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1221
1222	if (bo_gem->wc_virtual)
1223		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1224
1225	if (bo_gem->gtt_virtual)
1226		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1227#endif
1228}
1229
1230/** Frees all cached buffers significantly older than @time. */
1231static void
1232drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
1233{
1234	int i;
1235
1236	if (bufmgr_gem->time == time)
1237		return;
1238
1239	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1240		struct drm_intel_gem_bo_bucket *bucket =
1241		    &bufmgr_gem->cache_bucket[i];
1242
1243		while (!DRMLISTEMPTY(&bucket->head)) {
1244			drm_intel_bo_gem *bo_gem;
1245
1246			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1247					      bucket->head.next, head);
1248			if (time - bo_gem->free_time <= 1)
1249				break;
1250
1251			DRMLISTDEL(&bo_gem->head);
1252
1253			drm_intel_gem_bo_free(&bo_gem->bo);
1254		}
1255	}
1256
1257	bufmgr_gem->time = time;
1258}
1259
1260static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
1261{
1262	int limit;
1263
1264	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1265	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1266
1267	if (bufmgr_gem->vma_max < 0)
1268		return;
1269
1270	/* We may need to evict a few entries in order to create new mmaps */
1271	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1272	if (limit < 0)
1273		limit = 0;
1274
1275	while (bufmgr_gem->vma_count > limit) {
1276		drm_intel_bo_gem *bo_gem;
1277
1278		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1279				      bufmgr_gem->vma_cache.next,
1280				      vma_list);
1281		assert(bo_gem->map_count == 0);
1282		DRMLISTDELINIT(&bo_gem->vma_list);
1283
1284		if (bo_gem->mem_virtual) {
1285			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1286			bo_gem->mem_virtual = NULL;
1287			bufmgr_gem->vma_count--;
1288		}
1289		if (bo_gem->wc_virtual) {
1290			drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1291			bo_gem->wc_virtual = NULL;
1292			bufmgr_gem->vma_count--;
1293		}
1294		if (bo_gem->gtt_virtual) {
1295			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1296			bo_gem->gtt_virtual = NULL;
1297			bufmgr_gem->vma_count--;
1298		}
1299	}
1300}
1301
1302static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1303				       drm_intel_bo_gem *bo_gem)
1304{
1305	bufmgr_gem->vma_open--;
1306	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1307	if (bo_gem->mem_virtual)
1308		bufmgr_gem->vma_count++;
1309	if (bo_gem->wc_virtual)
1310		bufmgr_gem->vma_count++;
1311	if (bo_gem->gtt_virtual)
1312		bufmgr_gem->vma_count++;
1313	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1314}
1315
1316static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1317				      drm_intel_bo_gem *bo_gem)
1318{
1319	bufmgr_gem->vma_open++;
1320	DRMLISTDEL(&bo_gem->vma_list);
1321	if (bo_gem->mem_virtual)
1322		bufmgr_gem->vma_count--;
1323	if (bo_gem->wc_virtual)
1324		bufmgr_gem->vma_count--;
1325	if (bo_gem->gtt_virtual)
1326		bufmgr_gem->vma_count--;
1327	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1328}
1329
1330static void
1331drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
1332{
1333	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1334	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1335	struct drm_intel_gem_bo_bucket *bucket;
1336	int i;
1337
1338	/* Unreference all the target buffers */
1339	for (i = 0; i < bo_gem->reloc_count; i++) {
1340		if (bo_gem->reloc_target_info[i].bo != bo) {
1341			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1342								  reloc_target_info[i].bo,
1343								  time);
1344		}
1345	}
1346	for (i = 0; i < bo_gem->softpin_target_count; i++)
1347		drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1348								  time);
1349	bo_gem->kflags = 0;
1350	bo_gem->reloc_count = 0;
1351	bo_gem->used_as_reloc_target = false;
1352	bo_gem->softpin_target_count = 0;
1353
1354	DBG("bo_unreference final: %d (%s)\n",
1355	    bo_gem->gem_handle, bo_gem->name);
1356
1357	/* release memory associated with this object */
1358	if (bo_gem->reloc_target_info) {
1359		free(bo_gem->reloc_target_info);
1360		bo_gem->reloc_target_info = NULL;
1361	}
1362	if (bo_gem->relocs) {
1363		free(bo_gem->relocs);
1364		bo_gem->relocs = NULL;
1365	}
1366	if (bo_gem->softpin_target) {
1367		free(bo_gem->softpin_target);
1368		bo_gem->softpin_target = NULL;
1369		bo_gem->softpin_target_size = 0;
1370	}
1371
1372	/* Clear any left-over mappings */
1373	if (bo_gem->map_count) {
1374		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1375		bo_gem->map_count = 0;
1376		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1377		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1378	}
1379
1380	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1381	/* Put the buffer into our internal cache for reuse if we can. */
1382	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1383	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1384					      I915_MADV_DONTNEED)) {
1385		bo_gem->free_time = time;
1386
1387		bo_gem->name = NULL;
1388		bo_gem->validate_index = -1;
1389
1390		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
1391	} else {
1392		drm_intel_gem_bo_free(bo);
1393	}
1394}
1395
1396static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
1397						      time_t time)
1398{
1399	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1400
1401	assert(atomic_read(&bo_gem->refcount) > 0);
1402	if (atomic_dec_and_test(&bo_gem->refcount))
1403		drm_intel_gem_bo_unreference_final(bo, time);
1404}
1405
1406static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
1407{
1408	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1409
1410	assert(atomic_read(&bo_gem->refcount) > 0);
1411
1412	if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1413		drm_intel_bufmgr_gem *bufmgr_gem =
1414		    (drm_intel_bufmgr_gem *) bo->bufmgr;
1415		struct timespec time;
1416
1417		clock_gettime(CLOCK_MONOTONIC, &time);
1418
1419		pthread_mutex_lock(&bufmgr_gem->lock);
1420
1421		if (atomic_dec_and_test(&bo_gem->refcount)) {
1422			drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
1423			drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1424		}
1425
1426		pthread_mutex_unlock(&bufmgr_gem->lock);
1427	}
1428}
1429
1430static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
1431{
1432	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1433	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1434	struct drm_i915_gem_set_domain set_domain;
1435	int ret;
1436
1437	if (bo_gem->is_userptr) {
1438		/* Return the same user ptr */
1439		bo->virtual = bo_gem->user_virtual;
1440		return 0;
1441	}
1442
1443	pthread_mutex_lock(&bufmgr_gem->lock);
1444
1445	if (bo_gem->map_count++ == 0)
1446		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1447
1448	if (!bo_gem->mem_virtual) {
1449		struct drm_i915_gem_mmap mmap_arg;
1450
1451		DBG("bo_map: %d (%s), map_count=%d\n",
1452		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1453
1454		memclear(mmap_arg);
1455		mmap_arg.handle = bo_gem->gem_handle;
1456		mmap_arg.size = bo->size;
1457		ret = drmIoctl(bufmgr_gem->fd,
1458			       DRM_IOCTL_I915_GEM_MMAP,
1459			       &mmap_arg);
1460		if (ret != 0) {
1461			ret = -errno;
1462			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1463			    __FILE__, __LINE__, bo_gem->gem_handle,
1464			    bo_gem->name, strerror(errno));
1465			if (--bo_gem->map_count == 0)
1466				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1467			pthread_mutex_unlock(&bufmgr_gem->lock);
1468			return ret;
1469		}
1470		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1471		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1472	}
1473	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1474	    bo_gem->mem_virtual);
1475	bo->virtual = bo_gem->mem_virtual;
1476
1477	memclear(set_domain);
1478	set_domain.handle = bo_gem->gem_handle;
1479	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1480	if (write_enable)
1481		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1482	else
1483		set_domain.write_domain = 0;
1484	ret = drmIoctl(bufmgr_gem->fd,
1485		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1486		       &set_domain);
1487	if (ret != 0) {
1488		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1489		    __FILE__, __LINE__, bo_gem->gem_handle,
1490		    strerror(errno));
1491	}
1492
1493	if (write_enable)
1494		bo_gem->mapped_cpu_write = true;
1495
1496	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1497	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1498	pthread_mutex_unlock(&bufmgr_gem->lock);
1499
1500	return 0;
1501}
1502
1503static int
1504map_gtt(drm_intel_bo *bo)
1505{
1506	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1507	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1508	int ret;
1509
1510	if (bo_gem->is_userptr)
1511		return -EINVAL;
1512
1513	if (bo_gem->map_count++ == 0)
1514		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1515
1516	/* Get a mapping of the buffer if we haven't before. */
1517	if (bo_gem->gtt_virtual == NULL) {
1518		struct drm_i915_gem_mmap_gtt mmap_arg;
1519
1520		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1521		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1522
1523		memclear(mmap_arg);
1524		mmap_arg.handle = bo_gem->gem_handle;
1525
1526		/* Get the fake offset back... */
1527		ret = drmIoctl(bufmgr_gem->fd,
1528			       DRM_IOCTL_I915_GEM_MMAP_GTT,
1529			       &mmap_arg);
1530		if (ret != 0) {
1531			ret = -errno;
1532			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1533			    __FILE__, __LINE__,
1534			    bo_gem->gem_handle, bo_gem->name,
1535			    strerror(errno));
1536			if (--bo_gem->map_count == 0)
1537				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1538			return ret;
1539		}
1540
1541		/* and mmap it */
1542		ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size,
1543		    &bo_gem->gtt_virtual);
1544		if (ret) {
1545			bo_gem->gtt_virtual = NULL;
1546			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1547			    __FILE__, __LINE__,
1548			    bo_gem->gem_handle, bo_gem->name,
1549			    strerror(errno));
1550			if (--bo_gem->map_count == 0)
1551				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1552			return ret;
1553		}
1554	}
1555
1556	bo->virtual = bo_gem->gtt_virtual;
1557
1558	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1559	    bo_gem->gtt_virtual);
1560
1561	return 0;
1562}
1563
1564drm_public int
1565drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1566{
1567	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1568	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1569	struct drm_i915_gem_set_domain set_domain;
1570	int ret;
1571
1572	pthread_mutex_lock(&bufmgr_gem->lock);
1573
1574	ret = map_gtt(bo);
1575	if (ret) {
1576		pthread_mutex_unlock(&bufmgr_gem->lock);
1577		return ret;
1578	}
1579
1580	/* Now move it to the GTT domain so that the GPU and CPU
1581	 * caches are flushed and the GPU isn't actively using the
1582	 * buffer.
1583	 *
1584	 * The pagefault handler does this domain change for us when
1585	 * it has unbound the BO from the GTT, but it's up to us to
1586	 * tell it when we're about to use things if we had done
1587	 * rendering and it still happens to be bound to the GTT.
1588	 */
1589	memclear(set_domain);
1590	set_domain.handle = bo_gem->gem_handle;
1591	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1592	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1593	ret = drmIoctl(bufmgr_gem->fd,
1594		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1595		       &set_domain);
1596	if (ret != 0) {
1597		DBG("%s:%d: Error setting domain %d: %s\n",
1598		    __FILE__, __LINE__, bo_gem->gem_handle,
1599		    strerror(errno));
1600	}
1601
1602	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1603	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1604	pthread_mutex_unlock(&bufmgr_gem->lock);
1605
1606	return 0;
1607}
1608
1609/**
1610 * Performs a mapping of the buffer object like the normal GTT
1611 * mapping, but avoids waiting for the GPU to be done reading from or
1612 * rendering to the buffer.
1613 *
1614 * This is used in the implementation of GL_ARB_map_buffer_range: The
1615 * user asks to create a buffer, then does a mapping, fills some
1616 * space, runs a drawing command, then asks to map it again without
1617 * synchronizing because it guarantees that it won't write over the
1618 * data that the GPU is busy using (or, more specifically, that if it
1619 * does write over the data, it acknowledges that rendering is
1620 * undefined).
1621 */
1622
1623drm_public int
1624drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
1625{
1626	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1627#if HAVE_VALGRIND
1628	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1629#endif
1630	int ret;
1631
1632	/* If the CPU cache isn't coherent with the GTT, then use a
1633	 * regular synchronized mapping.  The problem is that we don't
1634	 * track where the buffer was last used on the CPU side in
1635	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
1636	 * we would potentially corrupt the buffer even when the user
1637	 * does reasonable things.
1638	 */
1639	if (!bufmgr_gem->has_llc)
1640		return drm_intel_gem_bo_map_gtt(bo);
1641
1642	pthread_mutex_lock(&bufmgr_gem->lock);
1643
1644	ret = map_gtt(bo);
1645	if (ret == 0) {
1646		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1647		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1648	}
1649
1650	pthread_mutex_unlock(&bufmgr_gem->lock);
1651
1652	return ret;
1653}
1654
1655static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1656{
1657	drm_intel_bufmgr_gem *bufmgr_gem;
1658	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1659	int ret = 0;
1660
1661	if (bo == NULL)
1662		return 0;
1663
1664	if (bo_gem->is_userptr)
1665		return 0;
1666
1667	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1668
1669	pthread_mutex_lock(&bufmgr_gem->lock);
1670
1671	if (bo_gem->map_count <= 0) {
1672		DBG("attempted to unmap an unmapped bo\n");
1673		pthread_mutex_unlock(&bufmgr_gem->lock);
1674		/* Preserve the old behaviour of just treating this as a
1675		 * no-op rather than reporting the error.
1676		 */
1677		return 0;
1678	}
1679
1680	if (bo_gem->mapped_cpu_write) {
1681		struct drm_i915_gem_sw_finish sw_finish;
1682
1683		/* Cause a flush to happen if the buffer's pinned for
1684		 * scanout, so the results show up in a timely manner.
1685		 * Unlike GTT set domains, this only does work if the
1686		 * buffer should be scanout-related.
1687		 */
1688		memclear(sw_finish);
1689		sw_finish.handle = bo_gem->gem_handle;
1690		ret = drmIoctl(bufmgr_gem->fd,
1691			       DRM_IOCTL_I915_GEM_SW_FINISH,
1692			       &sw_finish);
1693		ret = ret == -1 ? -errno : 0;
1694
1695		bo_gem->mapped_cpu_write = false;
1696	}
1697
1698	/* We need to unmap after every innovation as we cannot track
1699	 * an open vma for every bo as that will exhaust the system
1700	 * limits and cause later failures.
1701	 */
1702	if (--bo_gem->map_count == 0) {
1703		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1704		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1705		bo->virtual = NULL;
1706	}
1707	pthread_mutex_unlock(&bufmgr_gem->lock);
1708
1709	return ret;
1710}
1711
1712drm_public int
1713drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1714{
1715	return drm_intel_gem_bo_unmap(bo);
1716}
1717
1718static int
1719drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1720			 unsigned long size, const void *data)
1721{
1722	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1723	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1724	struct drm_i915_gem_pwrite pwrite;
1725	int ret;
1726
1727	if (bo_gem->is_userptr)
1728		return -EINVAL;
1729
1730	memclear(pwrite);
1731	pwrite.handle = bo_gem->gem_handle;
1732	pwrite.offset = offset;
1733	pwrite.size = size;
1734	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1735	ret = drmIoctl(bufmgr_gem->fd,
1736		       DRM_IOCTL_I915_GEM_PWRITE,
1737		       &pwrite);
1738	if (ret != 0) {
1739		ret = -errno;
1740		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1741		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1742		    (int)size, strerror(errno));
1743	}
1744
1745	return ret;
1746}
1747
1748static int
1749drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1750{
1751	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1752	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1753	int ret;
1754
1755	memclear(get_pipe_from_crtc_id);
1756	get_pipe_from_crtc_id.crtc_id = crtc_id;
1757	ret = drmIoctl(bufmgr_gem->fd,
1758		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1759		       &get_pipe_from_crtc_id);
1760	if (ret != 0) {
1761		/* We return -1 here to signal that we don't
1762		 * know which pipe is associated with this crtc.
1763		 * This lets the caller know that this information
1764		 * isn't available; using the wrong pipe for
1765		 * vblank waiting can cause the chipset to lock up
1766		 */
1767		return -1;
1768	}
1769
1770	return get_pipe_from_crtc_id.pipe;
1771}
1772
1773static int
1774drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1775			     unsigned long size, void *data)
1776{
1777	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1778	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1779	struct drm_i915_gem_pread pread;
1780	int ret;
1781
1782	if (bo_gem->is_userptr)
1783		return -EINVAL;
1784
1785	memclear(pread);
1786	pread.handle = bo_gem->gem_handle;
1787	pread.offset = offset;
1788	pread.size = size;
1789	pread.data_ptr = (uint64_t) (uintptr_t) data;
1790	ret = drmIoctl(bufmgr_gem->fd,
1791		       DRM_IOCTL_I915_GEM_PREAD,
1792		       &pread);
1793	if (ret != 0) {
1794		ret = -errno;
1795		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1796		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1797		    (int)size, strerror(errno));
1798	}
1799
1800	return ret;
1801}
1802
1803/** Waits for all GPU rendering with the object to have completed. */
1804static void
1805drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1806{
1807	drm_intel_gem_bo_start_gtt_access(bo, 1);
1808}
1809
1810/**
1811 * Waits on a BO for the given amount of time.
1812 *
1813 * @bo: buffer object to wait for
1814 * @timeout_ns: amount of time to wait in nanoseconds.
1815 *   If value is less than 0, an infinite wait will occur.
1816 *
1817 * Returns 0 if the wait was successful ie. the last batch referencing the
1818 * object has completed within the allotted time. Otherwise some negative return
1819 * value describes the error. Of particular interest is -ETIME when the wait has
1820 * failed to yield the desired result.
1821 *
1822 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
1823 * the operation to give up after a certain amount of time. Another subtle
1824 * difference is the internal locking semantics are different (this variant does
1825 * not hold the lock for the duration of the wait). This makes the wait subject
1826 * to a larger userspace race window.
1827 *
1828 * The implementation shall wait until the object is no longer actively
1829 * referenced within a batch buffer at the time of the call. The wait will
1830 * not guarantee that the buffer is re-issued via another thread, or an flinked
1831 * handle. Userspace must make sure this race does not occur if such precision
1832 * is important.
1833 *
1834 * Note that some kernels have broken the inifite wait for negative values
1835 * promise, upgrade to latest stable kernels if this is the case.
1836 */
1837drm_public int
1838drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
1839{
1840	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1841	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1842	struct drm_i915_gem_wait wait;
1843	int ret;
1844
1845	if (!bufmgr_gem->has_wait_timeout) {
1846		DBG("%s:%d: Timed wait is not supported. Falling back to "
1847		    "infinite wait\n", __FILE__, __LINE__);
1848		if (timeout_ns) {
1849			drm_intel_gem_bo_wait_rendering(bo);
1850			return 0;
1851		} else {
1852			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
1853		}
1854	}
1855
1856	memclear(wait);
1857	wait.bo_handle = bo_gem->gem_handle;
1858	wait.timeout_ns = timeout_ns;
1859	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1860	if (ret == -1)
1861		return -errno;
1862
1863	return ret;
1864}
1865
1866/**
1867 * Sets the object to the GTT read and possibly write domain, used by the X
1868 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1869 *
1870 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1871 * can do tiled pixmaps this way.
1872 */
1873drm_public void
1874drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1875{
1876	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1877	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1878	struct drm_i915_gem_set_domain set_domain;
1879	int ret;
1880
1881	memclear(set_domain);
1882	set_domain.handle = bo_gem->gem_handle;
1883	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1884	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1885	ret = drmIoctl(bufmgr_gem->fd,
1886		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1887		       &set_domain);
1888	if (ret != 0) {
1889		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1890		    __FILE__, __LINE__, bo_gem->gem_handle,
1891		    set_domain.read_domains, set_domain.write_domain,
1892		    strerror(errno));
1893	}
1894}
1895
1896static void
1897drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1898{
1899	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1900	struct drm_gem_close close_bo;
1901	int i, ret;
1902
1903	free(bufmgr_gem->exec2_objects);
1904	free(bufmgr_gem->exec_objects);
1905	free(bufmgr_gem->exec_bos);
1906
1907	pthread_mutex_destroy(&bufmgr_gem->lock);
1908
1909	/* Free any cached buffer objects we were going to reuse */
1910	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1911		struct drm_intel_gem_bo_bucket *bucket =
1912		    &bufmgr_gem->cache_bucket[i];
1913		drm_intel_bo_gem *bo_gem;
1914
1915		while (!DRMLISTEMPTY(&bucket->head)) {
1916			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1917					      bucket->head.next, head);
1918			DRMLISTDEL(&bo_gem->head);
1919
1920			drm_intel_gem_bo_free(&bo_gem->bo);
1921		}
1922	}
1923
1924	/* Release userptr bo kept hanging around for optimisation. */
1925	if (bufmgr_gem->userptr_active.ptr) {
1926		memclear(close_bo);
1927		close_bo.handle = bufmgr_gem->userptr_active.handle;
1928		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
1929		free(bufmgr_gem->userptr_active.ptr);
1930		if (ret)
1931			fprintf(stderr,
1932				"Failed to release test userptr object! (%d) "
1933				"i915 kernel driver may not be sane!\n", errno);
1934	}
1935
1936	free(bufmgr);
1937}
1938
1939/**
1940 * Adds the target buffer to the validation list and adds the relocation
1941 * to the reloc_buffer's relocation list.
1942 *
1943 * The relocation entry at the given offset must already contain the
1944 * precomputed relocation value, because the kernel will optimize out
1945 * the relocation entry write when the buffer hasn't moved from the
1946 * last known offset in target_bo.
1947 */
1948static int
1949do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1950		 drm_intel_bo *target_bo, uint32_t target_offset,
1951		 uint32_t read_domains, uint32_t write_domain,
1952		 bool need_fence)
1953{
1954	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1955	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1956	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
1957	bool fenced_command;
1958
1959	if (bo_gem->has_error)
1960		return -ENOMEM;
1961
1962	if (target_bo_gem->has_error) {
1963		bo_gem->has_error = true;
1964		return -ENOMEM;
1965	}
1966
1967	/* We never use HW fences for rendering on 965+ */
1968	if (bufmgr_gem->gen >= 4)
1969		need_fence = false;
1970
1971	fenced_command = need_fence;
1972	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
1973		need_fence = false;
1974
1975	/* Create a new relocation list if needed */
1976	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
1977		return -ENOMEM;
1978
1979	/* Check overflow */
1980	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1981
1982	/* Check args */
1983	assert(offset <= bo->size - 4);
1984	assert((write_domain & (write_domain - 1)) == 0);
1985
1986	/* An object needing a fence is a tiled buffer, so it won't have
1987	 * relocs to other buffers.
1988	 */
1989	if (need_fence) {
1990		assert(target_bo_gem->reloc_count == 0);
1991		target_bo_gem->reloc_tree_fences = 1;
1992	}
1993
1994	/* Make sure that we're not adding a reloc to something whose size has
1995	 * already been accounted for.
1996	 */
1997	assert(!bo_gem->used_as_reloc_target);
1998	if (target_bo_gem != bo_gem) {
1999		target_bo_gem->used_as_reloc_target = true;
2000		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
2001		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
2002	}
2003
2004	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
2005	if (target_bo != bo)
2006		drm_intel_gem_bo_reference(target_bo);
2007	if (fenced_command)
2008		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
2009			DRM_INTEL_RELOC_FENCE;
2010	else
2011		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
2012
2013	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
2014	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
2015	bo_gem->relocs[bo_gem->reloc_count].target_handle =
2016	    target_bo_gem->gem_handle;
2017	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
2018	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
2019	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
2020	bo_gem->reloc_count++;
2021
2022	return 0;
2023}
2024
2025static void
2026drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
2027{
2028	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2029
2030	if (enable)
2031		bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2032	else
2033		bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2034}
2035
2036static int
2037drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo)
2038{
2039	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2040	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2041	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2042	if (bo_gem->has_error)
2043		return -ENOMEM;
2044
2045	if (target_bo_gem->has_error) {
2046		bo_gem->has_error = true;
2047		return -ENOMEM;
2048	}
2049
2050	if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
2051		return -EINVAL;
2052	if (target_bo_gem == bo_gem)
2053		return -EINVAL;
2054
2055	if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
2056		int new_size = bo_gem->softpin_target_size * 2;
2057		if (new_size == 0)
2058			new_size = bufmgr_gem->max_relocs;
2059
2060		bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
2061				sizeof(drm_intel_bo *));
2062		if (!bo_gem->softpin_target)
2063			return -ENOMEM;
2064
2065		bo_gem->softpin_target_size = new_size;
2066	}
2067	bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
2068	drm_intel_gem_bo_reference(target_bo);
2069	bo_gem->softpin_target_count++;
2070
2071	return 0;
2072}
2073
2074static int
2075drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
2076			    drm_intel_bo *target_bo, uint32_t target_offset,
2077			    uint32_t read_domains, uint32_t write_domain)
2078{
2079	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2080	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
2081
2082	if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
2083		return drm_intel_gem_bo_add_softpin_target(bo, target_bo);
2084	else
2085		return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2086					read_domains, write_domain,
2087					!bufmgr_gem->fenced_relocs);
2088}
2089
2090static int
2091drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
2092				  drm_intel_bo *target_bo,
2093				  uint32_t target_offset,
2094				  uint32_t read_domains, uint32_t write_domain)
2095{
2096	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2097				read_domains, write_domain, true);
2098}
2099
2100drm_public int
2101drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
2102{
2103	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2104
2105	return bo_gem->reloc_count;
2106}
2107
2108/**
2109 * Removes existing relocation entries in the BO after "start".
2110 *
2111 * This allows a user to avoid a two-step process for state setup with
2112 * counting up all the buffer objects and doing a
2113 * drm_intel_bufmgr_check_aperture_space() before emitting any of the
2114 * relocations for the state setup.  Instead, save the state of the
2115 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
2116 * state, and then check if it still fits in the aperture.
2117 *
2118 * Any further drm_intel_bufmgr_check_aperture_space() queries
2119 * involving this buffer in the tree are undefined after this call.
2120 *
2121 * This also removes all softpinned targets being referenced by the BO.
2122 */
2123drm_public void
2124drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
2125{
2126	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2127	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2128	int i;
2129	struct timespec time;
2130
2131	clock_gettime(CLOCK_MONOTONIC, &time);
2132
2133	assert(bo_gem->reloc_count >= start);
2134
2135	/* Unreference the cleared target buffers */
2136	pthread_mutex_lock(&bufmgr_gem->lock);
2137
2138	for (i = start; i < bo_gem->reloc_count; i++) {
2139		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
2140		if (&target_bo_gem->bo != bo) {
2141			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
2142			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
2143								  time.tv_sec);
2144		}
2145	}
2146	bo_gem->reloc_count = start;
2147
2148	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2149		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i];
2150		drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
2151	}
2152	bo_gem->softpin_target_count = 0;
2153
2154	pthread_mutex_unlock(&bufmgr_gem->lock);
2155
2156}
2157
2158/**
2159 * Walk the tree of relocations rooted at BO and accumulate the list of
2160 * validations to be performed and update the relocation buffers with
2161 * index values into the validation list.
2162 */
2163static void
2164drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
2165{
2166	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2167	int i;
2168
2169	if (bo_gem->relocs == NULL)
2170		return;
2171
2172	for (i = 0; i < bo_gem->reloc_count; i++) {
2173		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2174
2175		if (target_bo == bo)
2176			continue;
2177
2178		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2179
2180		/* Continue walking the tree depth-first. */
2181		drm_intel_gem_bo_process_reloc(target_bo);
2182
2183		/* Add the target to the validate list */
2184		drm_intel_add_validate_buffer(target_bo);
2185	}
2186}
2187
2188static void
2189drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
2190{
2191	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2192	int i;
2193
2194	if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
2195		return;
2196
2197	for (i = 0; i < bo_gem->reloc_count; i++) {
2198		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2199		int need_fence;
2200
2201		if (target_bo == bo)
2202			continue;
2203
2204		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2205
2206		/* Continue walking the tree depth-first. */
2207		drm_intel_gem_bo_process_reloc2(target_bo);
2208
2209		need_fence = (bo_gem->reloc_target_info[i].flags &
2210			      DRM_INTEL_RELOC_FENCE);
2211
2212		/* Add the target to the validate list */
2213		drm_intel_add_validate_buffer2(target_bo, need_fence);
2214	}
2215
2216	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2217		drm_intel_bo *target_bo = bo_gem->softpin_target[i];
2218
2219		if (target_bo == bo)
2220			continue;
2221
2222		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2223		drm_intel_gem_bo_process_reloc2(target_bo);
2224		drm_intel_add_validate_buffer2(target_bo, false);
2225	}
2226}
2227
2228
2229static void
2230drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
2231{
2232	int i;
2233
2234	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2235		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2236		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2237
2238		/* Update the buffer offset */
2239		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
2240			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2241			    bo_gem->gem_handle, bo_gem->name,
2242			    upper_32_bits(bo->offset64),
2243			    lower_32_bits(bo->offset64),
2244			    upper_32_bits(bufmgr_gem->exec_objects[i].offset),
2245			    lower_32_bits(bufmgr_gem->exec_objects[i].offset));
2246			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
2247			bo->offset = bufmgr_gem->exec_objects[i].offset;
2248		}
2249	}
2250}
2251
2252static void
2253drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
2254{
2255	int i;
2256
2257	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2258		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2259		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2260
2261		/* Update the buffer offset */
2262		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2263			/* If we're seeing softpinned object here it means that the kernel
2264			 * has relocated our object... Indicating a programming error
2265			 */
2266			assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
2267			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2268			    bo_gem->gem_handle, bo_gem->name,
2269			    upper_32_bits(bo->offset64),
2270			    lower_32_bits(bo->offset64),
2271			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
2272			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
2273			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
2274			bo->offset = bufmgr_gem->exec2_objects[i].offset;
2275		}
2276	}
2277}
2278
2279drm_public void
2280drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
2281			      int x1, int y1, int width, int height,
2282			      enum aub_dump_bmp_format format,
2283			      int pitch, int offset)
2284{
2285}
2286
2287static int
2288drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
2289		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
2290{
2291	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2292	struct drm_i915_gem_execbuffer execbuf;
2293	int ret, i;
2294
2295	if (to_bo_gem(bo)->has_error)
2296		return -ENOMEM;
2297
2298	pthread_mutex_lock(&bufmgr_gem->lock);
2299	/* Update indices and set up the validate list. */
2300	drm_intel_gem_bo_process_reloc(bo);
2301
2302	/* Add the batch buffer to the validation list.  There are no
2303	 * relocations pointing to it.
2304	 */
2305	drm_intel_add_validate_buffer(bo);
2306
2307	memclear(execbuf);
2308	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
2309	execbuf.buffer_count = bufmgr_gem->exec_count;
2310	execbuf.batch_start_offset = 0;
2311	execbuf.batch_len = used;
2312	execbuf.cliprects_ptr = (uintptr_t) cliprects;
2313	execbuf.num_cliprects = num_cliprects;
2314	execbuf.DR1 = 0;
2315	execbuf.DR4 = DR4;
2316
2317	ret = drmIoctl(bufmgr_gem->fd,
2318		       DRM_IOCTL_I915_GEM_EXECBUFFER,
2319		       &execbuf);
2320	if (ret != 0) {
2321		ret = -errno;
2322		if (errno == ENOSPC) {
2323			DBG("Execbuffer fails to pin. "
2324			    "Estimate: %u. Actual: %u. Available: %u\n",
2325			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2326							       bufmgr_gem->
2327							       exec_count),
2328			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2329							      bufmgr_gem->
2330							      exec_count),
2331			    (unsigned int)bufmgr_gem->gtt_size);
2332		}
2333	}
2334	drm_intel_update_buffer_offsets(bufmgr_gem);
2335
2336	if (bufmgr_gem->bufmgr.debug)
2337		drm_intel_gem_dump_validation_list(bufmgr_gem);
2338
2339	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2340		drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2341
2342		bo_gem->idle = false;
2343
2344		/* Disconnect the buffer from the validate list */
2345		bo_gem->validate_index = -1;
2346		bufmgr_gem->exec_bos[i] = NULL;
2347	}
2348	bufmgr_gem->exec_count = 0;
2349	pthread_mutex_unlock(&bufmgr_gem->lock);
2350
2351	return ret;
2352}
2353
2354static int
2355do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
2356	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2357	 int in_fence, int *out_fence,
2358	 unsigned int flags)
2359{
2360	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2361	struct drm_i915_gem_execbuffer2 execbuf;
2362	int ret = 0;
2363	int i;
2364
2365	if (to_bo_gem(bo)->has_error)
2366		return -ENOMEM;
2367
2368	switch (flags & 0x7) {
2369	default:
2370		return -EINVAL;
2371	case I915_EXEC_BLT:
2372		if (!bufmgr_gem->has_blt)
2373			return -EINVAL;
2374		break;
2375	case I915_EXEC_BSD:
2376		if (!bufmgr_gem->has_bsd)
2377			return -EINVAL;
2378		break;
2379	case I915_EXEC_VEBOX:
2380		if (!bufmgr_gem->has_vebox)
2381			return -EINVAL;
2382		break;
2383	case I915_EXEC_RENDER:
2384	case I915_EXEC_DEFAULT:
2385		break;
2386	}
2387
2388	pthread_mutex_lock(&bufmgr_gem->lock);
2389	/* Update indices and set up the validate list. */
2390	drm_intel_gem_bo_process_reloc2(bo);
2391
2392	/* Add the batch buffer to the validation list.  There are no relocations
2393	 * pointing to it.
2394	 */
2395	drm_intel_add_validate_buffer2(bo, 0);
2396
2397	memclear(execbuf);
2398	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2399	execbuf.buffer_count = bufmgr_gem->exec_count;
2400	execbuf.batch_start_offset = 0;
2401	execbuf.batch_len = used;
2402	execbuf.cliprects_ptr = (uintptr_t)cliprects;
2403	execbuf.num_cliprects = num_cliprects;
2404	execbuf.DR1 = 0;
2405	execbuf.DR4 = DR4;
2406	execbuf.flags = flags;
2407	if (ctx == NULL)
2408		i915_execbuffer2_set_context_id(execbuf, 0);
2409	else
2410		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2411	execbuf.rsvd2 = 0;
2412	if (in_fence != -1) {
2413		execbuf.rsvd2 = in_fence;
2414		execbuf.flags |= I915_EXEC_FENCE_IN;
2415	}
2416	if (out_fence != NULL) {
2417		*out_fence = -1;
2418		execbuf.flags |= I915_EXEC_FENCE_OUT;
2419	}
2420
2421	if (bufmgr_gem->no_exec)
2422		goto skip_execution;
2423
2424	ret = drmIoctl(bufmgr_gem->fd,
2425		       DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2426		       &execbuf);
2427	if (ret != 0) {
2428		ret = -errno;
2429		if (ret == -ENOSPC) {
2430			DBG("Execbuffer fails to pin. "
2431			    "Estimate: %u. Actual: %u. Available: %u\n",
2432			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2433							       bufmgr_gem->exec_count),
2434			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2435							      bufmgr_gem->exec_count),
2436			    (unsigned int) bufmgr_gem->gtt_size);
2437		}
2438	}
2439	drm_intel_update_buffer_offsets2(bufmgr_gem);
2440
2441	if (ret == 0 && out_fence != NULL)
2442		*out_fence = execbuf.rsvd2 >> 32;
2443
2444skip_execution:
2445	if (bufmgr_gem->bufmgr.debug)
2446		drm_intel_gem_dump_validation_list(bufmgr_gem);
2447
2448	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2449		drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2450
2451		bo_gem->idle = false;
2452
2453		/* Disconnect the buffer from the validate list */
2454		bo_gem->validate_index = -1;
2455		bufmgr_gem->exec_bos[i] = NULL;
2456	}
2457	bufmgr_gem->exec_count = 0;
2458	pthread_mutex_unlock(&bufmgr_gem->lock);
2459
2460	return ret;
2461}
2462
2463static int
2464drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2465		       drm_clip_rect_t *cliprects, int num_cliprects,
2466		       int DR4)
2467{
2468	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2469			-1, NULL, I915_EXEC_RENDER);
2470}
2471
2472static int
2473drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
2474			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2475			unsigned int flags)
2476{
2477	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2478			-1, NULL, flags);
2479}
2480
2481drm_public int
2482drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
2483			      int used, unsigned int flags)
2484{
2485	return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags);
2486}
2487
2488drm_public int
2489drm_intel_gem_bo_fence_exec(drm_intel_bo *bo,
2490			    drm_intel_context *ctx,
2491			    int used,
2492			    int in_fence,
2493			    int *out_fence,
2494			    unsigned int flags)
2495{
2496	return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags);
2497}
2498
2499static int
2500drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
2501{
2502	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2503	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2504	struct drm_i915_gem_pin pin;
2505	int ret;
2506
2507	memclear(pin);
2508	pin.handle = bo_gem->gem_handle;
2509	pin.alignment = alignment;
2510
2511	ret = drmIoctl(bufmgr_gem->fd,
2512		       DRM_IOCTL_I915_GEM_PIN,
2513		       &pin);
2514	if (ret != 0)
2515		return -errno;
2516
2517	bo->offset64 = pin.offset;
2518	bo->offset = pin.offset;
2519	return 0;
2520}
2521
2522static int
2523drm_intel_gem_bo_unpin(drm_intel_bo *bo)
2524{
2525	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2526	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2527	struct drm_i915_gem_unpin unpin;
2528	int ret;
2529
2530	memclear(unpin);
2531	unpin.handle = bo_gem->gem_handle;
2532
2533	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
2534	if (ret != 0)
2535		return -errno;
2536
2537	return 0;
2538}
2539
2540static int
2541drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2542				     uint32_t tiling_mode,
2543				     uint32_t stride)
2544{
2545	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2546	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2547	struct drm_i915_gem_set_tiling set_tiling;
2548	int ret;
2549
2550	if (bo_gem->global_name == 0 &&
2551	    tiling_mode == bo_gem->tiling_mode &&
2552	    stride == bo_gem->stride)
2553		return 0;
2554
2555	memset(&set_tiling, 0, sizeof(set_tiling));
2556	do {
2557		/* set_tiling is slightly broken and overwrites the
2558		 * input on the error path, so we have to open code
2559		 * rmIoctl.
2560		 */
2561		set_tiling.handle = bo_gem->gem_handle;
2562		set_tiling.tiling_mode = tiling_mode;
2563		set_tiling.stride = stride;
2564
2565		ret = ioctl(bufmgr_gem->fd,
2566			    DRM_IOCTL_I915_GEM_SET_TILING,
2567			    &set_tiling);
2568	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2569	if (ret == -1)
2570		return -errno;
2571
2572	bo_gem->tiling_mode = set_tiling.tiling_mode;
2573	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2574	bo_gem->stride = set_tiling.stride;
2575	return 0;
2576}
2577
2578static int
2579drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2580			    uint32_t stride)
2581{
2582	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2583	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2584	int ret;
2585
2586	/* Tiling with userptr surfaces is not supported
2587	 * on all hardware so refuse it for time being.
2588	 */
2589	if (bo_gem->is_userptr)
2590		return -EINVAL;
2591
2592	/* Linear buffers have no stride. By ensuring that we only ever use
2593	 * stride 0 with linear buffers, we simplify our code.
2594	 */
2595	if (*tiling_mode == I915_TILING_NONE)
2596		stride = 0;
2597
2598	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2599	if (ret == 0)
2600		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2601
2602	*tiling_mode = bo_gem->tiling_mode;
2603	return ret;
2604}
2605
2606static int
2607drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2608			    uint32_t * swizzle_mode)
2609{
2610	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2611
2612	*tiling_mode = bo_gem->tiling_mode;
2613	*swizzle_mode = bo_gem->swizzle_mode;
2614	return 0;
2615}
2616
2617static int
2618drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
2619{
2620	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2621
2622	bo->offset64 = offset;
2623	bo->offset = offset;
2624	bo_gem->kflags |= EXEC_OBJECT_PINNED;
2625
2626	return 0;
2627}
2628
2629drm_public drm_intel_bo *
2630drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
2631{
2632	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2633	int ret;
2634	uint32_t handle;
2635	drm_intel_bo_gem *bo_gem;
2636	struct drm_i915_gem_get_tiling get_tiling;
2637
2638	pthread_mutex_lock(&bufmgr_gem->lock);
2639	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2640	if (ret) {
2641		DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2642		pthread_mutex_unlock(&bufmgr_gem->lock);
2643		return NULL;
2644	}
2645
2646	/*
2647	 * See if the kernel has already returned this buffer to us. Just as
2648	 * for named buffers, we must not create two bo's pointing at the same
2649	 * kernel object
2650	 */
2651	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
2652		  &handle, sizeof(handle), bo_gem);
2653	if (bo_gem) {
2654		drm_intel_gem_bo_reference(&bo_gem->bo);
2655		goto out;
2656	}
2657
2658	bo_gem = calloc(1, sizeof(*bo_gem));
2659	if (!bo_gem)
2660		goto out;
2661
2662	atomic_set(&bo_gem->refcount, 1);
2663	DRMINITLISTHEAD(&bo_gem->vma_list);
2664
2665	/* Determine size of bo.  The fd-to-handle ioctl really should
2666	 * return the size, but it doesn't.  If we have kernel 3.12 or
2667	 * later, we can lseek on the prime fd to get the size.  Older
2668	 * kernels will just fail, in which case we fall back to the
2669	 * provided (estimated or guess size). */
2670	ret = lseek(prime_fd, 0, SEEK_END);
2671	if (ret != -1)
2672		bo_gem->bo.size = ret;
2673	else
2674		bo_gem->bo.size = size;
2675
2676	bo_gem->bo.handle = handle;
2677	bo_gem->bo.bufmgr = bufmgr;
2678
2679	bo_gem->gem_handle = handle;
2680	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
2681		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2682
2683	bo_gem->name = "prime";
2684	bo_gem->validate_index = -1;
2685	bo_gem->reloc_tree_fences = 0;
2686	bo_gem->used_as_reloc_target = false;
2687	bo_gem->has_error = false;
2688	bo_gem->reusable = false;
2689
2690	memclear(get_tiling);
2691	get_tiling.handle = bo_gem->gem_handle;
2692	if (drmIoctl(bufmgr_gem->fd,
2693		     DRM_IOCTL_I915_GEM_GET_TILING,
2694		     &get_tiling))
2695		goto err;
2696
2697	bo_gem->tiling_mode = get_tiling.tiling_mode;
2698	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
2699	/* XXX stride is unknown */
2700	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2701
2702out:
2703	pthread_mutex_unlock(&bufmgr_gem->lock);
2704	return &bo_gem->bo;
2705
2706err:
2707	drm_intel_gem_bo_free(&bo_gem->bo);
2708	pthread_mutex_unlock(&bufmgr_gem->lock);
2709	return NULL;
2710}
2711
2712drm_public int
2713drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
2714{
2715	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2716	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2717
2718	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2719			       DRM_CLOEXEC, prime_fd) != 0)
2720		return -errno;
2721
2722	bo_gem->reusable = false;
2723
2724	return 0;
2725}
2726
2727static int
2728drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
2729{
2730	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2731	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2732
2733	if (!bo_gem->global_name) {
2734		struct drm_gem_flink flink;
2735
2736		memclear(flink);
2737		flink.handle = bo_gem->gem_handle;
2738		if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink))
2739			return -errno;
2740
2741		pthread_mutex_lock(&bufmgr_gem->lock);
2742		if (!bo_gem->global_name) {
2743			bo_gem->global_name = flink.name;
2744			bo_gem->reusable = false;
2745
2746			HASH_ADD(name_hh, bufmgr_gem->name_table,
2747				 global_name, sizeof(bo_gem->global_name),
2748				 bo_gem);
2749		}
2750		pthread_mutex_unlock(&bufmgr_gem->lock);
2751	}
2752
2753	*name = bo_gem->global_name;
2754	return 0;
2755}
2756
2757/**
2758 * Enables unlimited caching of buffer objects for reuse.
2759 *
2760 * This is potentially very memory expensive, as the cache at each bucket
2761 * size is only bounded by how many buffers of that size we've managed to have
2762 * in flight at once.
2763 */
2764drm_public void
2765drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
2766{
2767	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2768
2769	bufmgr_gem->bo_reuse = true;
2770}
2771
2772/**
2773 * Disables implicit synchronisation before executing the bo
2774 *
2775 * This will cause rendering corruption unless you correctly manage explicit
2776 * fences for all rendering involving this buffer - including use by others.
2777 * Disabling the implicit serialisation is only required if that serialisation
2778 * is too coarse (for example, you have split the buffer into many
2779 * non-overlapping regions and are sharing the whole buffer between concurrent
2780 * independent command streams).
2781 *
2782 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2783 * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync,
2784 * or subsequent execbufs involving the bo will generate EINVAL.
2785 */
2786drm_public void
2787drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo)
2788{
2789	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2790
2791	bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2792}
2793
2794/**
2795 * Enables implicit synchronisation before executing the bo
2796 *
2797 * This is the default behaviour of the kernel, to wait upon prior writes
2798 * completing on the object before rendering with it, or to wait for prior
2799 * reads to complete before writing into the object.
2800 * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2801 * the kernel never to insert a stall before using the object. Then this
2802 * function can be used to restore the implicit sync before subsequent
2803 * rendering.
2804 */
2805drm_public void
2806drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo)
2807{
2808	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2809
2810	bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2811}
2812
2813/**
2814 * Query whether the kernel supports disabling of its implicit synchronisation
2815 * before execbuf. See drm_intel_gem_bo_disable_implicit_sync()
2816 */
2817drm_public int
2818drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr)
2819{
2820	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2821
2822	return bufmgr_gem->has_exec_async;
2823}
2824
2825/**
2826 * Enable use of fenced reloc type.
2827 *
2828 * New code should enable this to avoid unnecessary fence register
2829 * allocation.  If this option is not enabled, all relocs will have fence
2830 * register allocated.
2831 */
2832drm_public void
2833drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
2834{
2835	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2836
2837	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
2838		bufmgr_gem->fenced_relocs = true;
2839}
2840
2841/**
2842 * Return the additional aperture space required by the tree of buffer objects
2843 * rooted at bo.
2844 */
2845static int
2846drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
2847{
2848	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2849	int i;
2850	int total = 0;
2851
2852	if (bo == NULL || bo_gem->included_in_check_aperture)
2853		return 0;
2854
2855	total += bo->size;
2856	bo_gem->included_in_check_aperture = true;
2857
2858	for (i = 0; i < bo_gem->reloc_count; i++)
2859		total +=
2860		    drm_intel_gem_bo_get_aperture_space(bo_gem->
2861							reloc_target_info[i].bo);
2862
2863	return total;
2864}
2865
2866/**
2867 * Count the number of buffers in this list that need a fence reg
2868 *
2869 * If the count is greater than the number of available regs, we'll have
2870 * to ask the caller to resubmit a batch with fewer tiled buffers.
2871 *
2872 * This function over-counts if the same buffer is used multiple times.
2873 */
2874static unsigned int
2875drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
2876{
2877	int i;
2878	unsigned int total = 0;
2879
2880	for (i = 0; i < count; i++) {
2881		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2882
2883		if (bo_gem == NULL)
2884			continue;
2885
2886		total += bo_gem->reloc_tree_fences;
2887	}
2888	return total;
2889}
2890
2891/**
2892 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
2893 * for the next drm_intel_bufmgr_check_aperture_space() call.
2894 */
2895static void
2896drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
2897{
2898	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2899	int i;
2900
2901	if (bo == NULL || !bo_gem->included_in_check_aperture)
2902		return;
2903
2904	bo_gem->included_in_check_aperture = false;
2905
2906	for (i = 0; i < bo_gem->reloc_count; i++)
2907		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
2908							   reloc_target_info[i].bo);
2909}
2910
2911/**
2912 * Return a conservative estimate for the amount of aperture required
2913 * for a collection of buffers. This may double-count some buffers.
2914 */
2915static unsigned int
2916drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
2917{
2918	int i;
2919	unsigned int total = 0;
2920
2921	for (i = 0; i < count; i++) {
2922		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2923		if (bo_gem != NULL)
2924			total += bo_gem->reloc_tree_size;
2925	}
2926	return total;
2927}
2928
2929/**
2930 * Return the amount of aperture needed for a collection of buffers.
2931 * This avoids double counting any buffers, at the cost of looking
2932 * at every buffer in the set.
2933 */
2934static unsigned int
2935drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
2936{
2937	int i;
2938	unsigned int total = 0;
2939
2940	for (i = 0; i < count; i++) {
2941		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
2942		/* For the first buffer object in the array, we get an
2943		 * accurate count back for its reloc_tree size (since nothing
2944		 * had been flagged as being counted yet).  We can save that
2945		 * value out as a more conservative reloc_tree_size that
2946		 * avoids double-counting target buffers.  Since the first
2947		 * buffer happens to usually be the batch buffer in our
2948		 * callers, this can pull us back from doing the tree
2949		 * walk on every new batch emit.
2950		 */
2951		if (i == 0) {
2952			drm_intel_bo_gem *bo_gem =
2953			    (drm_intel_bo_gem *) bo_array[i];
2954			bo_gem->reloc_tree_size = total;
2955		}
2956	}
2957
2958	for (i = 0; i < count; i++)
2959		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
2960	return total;
2961}
2962
2963/**
2964 * Return -1 if the batchbuffer should be flushed before attempting to
2965 * emit rendering referencing the buffers pointed to by bo_array.
2966 *
2967 * This is required because if we try to emit a batchbuffer with relocations
2968 * to a tree of buffers that won't simultaneously fit in the aperture,
2969 * the rendering will return an error at a point where the software is not
2970 * prepared to recover from it.
2971 *
2972 * However, we also want to emit the batchbuffer significantly before we reach
2973 * the limit, as a series of batchbuffers each of which references buffers
2974 * covering almost all of the aperture means that at each emit we end up
2975 * waiting to evict a buffer from the last rendering, and we get synchronous
2976 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
2977 * get better parallelism.
2978 */
2979static int
2980drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
2981{
2982	drm_intel_bufmgr_gem *bufmgr_gem =
2983	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
2984	unsigned int total = 0;
2985	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2986	int total_fences;
2987
2988	/* Check for fence reg constraints if necessary */
2989	if (bufmgr_gem->available_fences) {
2990		total_fences = drm_intel_gem_total_fences(bo_array, count);
2991		if (total_fences > bufmgr_gem->available_fences)
2992			return -ENOSPC;
2993	}
2994
2995	total = drm_intel_gem_estimate_batch_space(bo_array, count);
2996
2997	if (total > threshold)
2998		total = drm_intel_gem_compute_batch_space(bo_array, count);
2999
3000	if (total > threshold) {
3001		DBG("check_space: overflowed available aperture, "
3002		    "%dkb vs %dkb\n",
3003		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
3004		return -ENOSPC;
3005	} else {
3006		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
3007		    (int)bufmgr_gem->gtt_size / 1024);
3008		return 0;
3009	}
3010}
3011
3012/*
3013 * Disable buffer reuse for objects which are shared with the kernel
3014 * as scanout buffers
3015 */
3016static int
3017drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
3018{
3019	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3020
3021	bo_gem->reusable = false;
3022	return 0;
3023}
3024
3025static int
3026drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
3027{
3028	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3029
3030	return bo_gem->reusable;
3031}
3032
3033static int
3034_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
3035{
3036	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3037	int i;
3038
3039	for (i = 0; i < bo_gem->reloc_count; i++) {
3040		if (bo_gem->reloc_target_info[i].bo == target_bo)
3041			return 1;
3042		if (bo == bo_gem->reloc_target_info[i].bo)
3043			continue;
3044		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
3045						target_bo))
3046			return 1;
3047	}
3048
3049	for (i = 0; i< bo_gem->softpin_target_count; i++) {
3050		if (bo_gem->softpin_target[i] == target_bo)
3051			return 1;
3052		if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo))
3053			return 1;
3054	}
3055
3056	return 0;
3057}
3058
3059/** Return true if target_bo is referenced by bo's relocation tree. */
3060static int
3061drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
3062{
3063	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
3064
3065	if (bo == NULL || target_bo == NULL)
3066		return 0;
3067	if (target_bo_gem->used_as_reloc_target)
3068		return _drm_intel_gem_bo_references(bo, target_bo);
3069	return 0;
3070}
3071
3072static void
3073add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
3074{
3075	unsigned int i = bufmgr_gem->num_buckets;
3076
3077	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3078
3079	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3080	bufmgr_gem->cache_bucket[i].size = size;
3081	bufmgr_gem->num_buckets++;
3082}
3083
3084static void
3085init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
3086{
3087	unsigned long size, cache_max_size = 64 * 1024 * 1024;
3088
3089	/* OK, so power of two buckets was too wasteful of memory.
3090	 * Give 3 other sizes between each power of two, to hopefully
3091	 * cover things accurately enough.  (The alternative is
3092	 * probably to just go for exact matching of sizes, and assume
3093	 * that for things like composited window resize the tiled
3094	 * width/height alignment and rounding of sizes to pages will
3095	 * get us useful cache hit rates anyway)
3096	 */
3097	add_bucket(bufmgr_gem, 4096);
3098	add_bucket(bufmgr_gem, 4096 * 2);
3099	add_bucket(bufmgr_gem, 4096 * 3);
3100
3101	/* Initialize the linked lists for BO reuse cache. */
3102	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3103		add_bucket(bufmgr_gem, size);
3104
3105		add_bucket(bufmgr_gem, size + size * 1 / 4);
3106		add_bucket(bufmgr_gem, size + size * 2 / 4);
3107		add_bucket(bufmgr_gem, size + size * 3 / 4);
3108	}
3109}
3110
3111drm_public void
3112drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
3113{
3114	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3115
3116	bufmgr_gem->vma_max = limit;
3117
3118	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
3119}
3120
3121static int
3122parse_devid_override(const char *devid_override)
3123{
3124	static const struct {
3125		const char *name;
3126		int pci_id;
3127	} name_map[] = {
3128		{ "brw", PCI_CHIP_I965_GM },
3129		{ "g4x", PCI_CHIP_GM45_GM },
3130		{ "ilk", PCI_CHIP_ILD_G },
3131		{ "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS },
3132		{ "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 },
3133		{ "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 },
3134		{ "byt", PCI_CHIP_VALLEYVIEW_3 },
3135		{ "bdw", 0x1620 | BDW_ULX },
3136		{ "skl", PCI_CHIP_SKYLAKE_DT_GT2 },
3137		{ "kbl", PCI_CHIP_KABYLAKE_DT_GT2 },
3138	};
3139	unsigned int i;
3140
3141	for (i = 0; i < ARRAY_SIZE(name_map); i++) {
3142		if (!strcmp(name_map[i].name, devid_override))
3143			return name_map[i].pci_id;
3144	}
3145
3146	return strtod(devid_override, NULL);
3147}
3148
3149/**
3150 * Get the PCI ID for the device.  This can be overridden by setting the
3151 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
3152 */
3153static int
3154get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
3155{
3156	char *devid_override;
3157	int devid = 0;
3158	int ret;
3159	drm_i915_getparam_t gp;
3160
3161	if (geteuid() == getuid()) {
3162		devid_override = getenv("INTEL_DEVID_OVERRIDE");
3163		if (devid_override) {
3164			bufmgr_gem->no_exec = true;
3165			return parse_devid_override(devid_override);
3166		}
3167	}
3168
3169	memclear(gp);
3170	gp.param = I915_PARAM_CHIPSET_ID;
3171	gp.value = &devid;
3172	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3173	if (ret) {
3174		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
3175		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
3176	}
3177	return devid;
3178}
3179
3180drm_public int
3181drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
3182{
3183	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3184
3185	return bufmgr_gem->pci_device;
3186}
3187
3188/**
3189 * Sets the AUB filename.
3190 *
3191 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
3192 * for it to have any effect.
3193 */
3194drm_public void
3195drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
3196				      const char *filename)
3197{
3198}
3199
3200/**
3201 * Sets up AUB dumping.
3202 *
3203 * This is a trace file format that can be used with the simulator.
3204 * Packets are emitted in a format somewhat like GPU command packets.
3205 * You can set up a GTT and upload your objects into the referenced
3206 * space, then send off batchbuffers and get BMPs out the other end.
3207 */
3208drm_public void
3209drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
3210{
3211	fprintf(stderr, "libdrm aub dumping is deprecated.\n\n"
3212		"Use intel_aubdump from intel-gpu-tools instead.  Install intel-gpu-tools,\n"
3213		"then run (for example)\n\n"
3214		"\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n"
3215		"See the intel_aubdump man page for more details.\n");
3216}
3217
3218drm_public drm_intel_context *
3219drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
3220{
3221	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3222	struct drm_i915_gem_context_create create;
3223	drm_intel_context *context = NULL;
3224	int ret;
3225
3226	context = calloc(1, sizeof(*context));
3227	if (!context)
3228		return NULL;
3229
3230	memclear(create);
3231	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
3232	if (ret != 0) {
3233		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
3234		    strerror(errno));
3235		free(context);
3236		return NULL;
3237	}
3238
3239	context->ctx_id = create.ctx_id;
3240	context->bufmgr = bufmgr;
3241
3242	return context;
3243}
3244
3245drm_public int
3246drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id)
3247{
3248	if (ctx == NULL)
3249		return -EINVAL;
3250
3251	*ctx_id = ctx->ctx_id;
3252
3253	return 0;
3254}
3255
3256drm_public void
3257drm_intel_gem_context_destroy(drm_intel_context *ctx)
3258{
3259	drm_intel_bufmgr_gem *bufmgr_gem;
3260	struct drm_i915_gem_context_destroy destroy;
3261	int ret;
3262
3263	if (ctx == NULL)
3264		return;
3265
3266	memclear(destroy);
3267
3268	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3269	destroy.ctx_id = ctx->ctx_id;
3270	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
3271		       &destroy);
3272	if (ret != 0)
3273		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
3274			strerror(errno));
3275
3276	free(ctx);
3277}
3278
3279drm_public int
3280drm_intel_get_reset_stats(drm_intel_context *ctx,
3281			  uint32_t *reset_count,
3282			  uint32_t *active,
3283			  uint32_t *pending)
3284{
3285	drm_intel_bufmgr_gem *bufmgr_gem;
3286	struct drm_i915_reset_stats stats;
3287	int ret;
3288
3289	if (ctx == NULL)
3290		return -EINVAL;
3291
3292	memclear(stats);
3293
3294	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3295	stats.ctx_id = ctx->ctx_id;
3296	ret = drmIoctl(bufmgr_gem->fd,
3297		       DRM_IOCTL_I915_GET_RESET_STATS,
3298		       &stats);
3299	if (ret == 0) {
3300		if (reset_count != NULL)
3301			*reset_count = stats.reset_count;
3302
3303		if (active != NULL)
3304			*active = stats.batch_active;
3305
3306		if (pending != NULL)
3307			*pending = stats.batch_pending;
3308	}
3309
3310	return ret;
3311}
3312
3313drm_public int
3314drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
3315		   uint32_t offset,
3316		   uint64_t *result)
3317{
3318	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3319	struct drm_i915_reg_read reg_read;
3320	int ret;
3321
3322	memclear(reg_read);
3323	reg_read.offset = offset;
3324
3325	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
3326
3327	*result = reg_read.val;
3328	return ret;
3329}
3330
3331drm_public int
3332drm_intel_get_subslice_total(int fd, unsigned int *subslice_total)
3333{
3334	drm_i915_getparam_t gp;
3335	int ret;
3336
3337	memclear(gp);
3338	gp.value = (int*)subslice_total;
3339	gp.param = I915_PARAM_SUBSLICE_TOTAL;
3340	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3341	if (ret)
3342		return -errno;
3343
3344	return 0;
3345}
3346
3347drm_public int
3348drm_intel_get_eu_total(int fd, unsigned int *eu_total)
3349{
3350	drm_i915_getparam_t gp;
3351	int ret;
3352
3353	memclear(gp);
3354	gp.value = (int*)eu_total;
3355	gp.param = I915_PARAM_EU_TOTAL;
3356	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3357	if (ret)
3358		return -errno;
3359
3360	return 0;
3361}
3362
3363drm_public int
3364drm_intel_get_pooled_eu(int fd)
3365{
3366	drm_i915_getparam_t gp;
3367	int ret = -1;
3368
3369	memclear(gp);
3370	gp.param = I915_PARAM_HAS_POOLED_EU;
3371	gp.value = &ret;
3372	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3373		return -errno;
3374
3375	return ret;
3376}
3377
3378drm_public int
3379drm_intel_get_min_eu_in_pool(int fd)
3380{
3381	drm_i915_getparam_t gp;
3382	int ret = -1;
3383
3384	memclear(gp);
3385	gp.param = I915_PARAM_MIN_EU_IN_POOL;
3386	gp.value = &ret;
3387	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3388		return -errno;
3389
3390	return ret;
3391}
3392
3393/**
3394 * Annotate the given bo for use in aub dumping.
3395 *
3396 * \param annotations is an array of drm_intel_aub_annotation objects
3397 * describing the type of data in various sections of the bo.  Each
3398 * element of the array specifies the type and subtype of a section of
3399 * the bo, and the past-the-end offset of that section.  The elements
3400 * of \c annotations must be sorted so that ending_offset is
3401 * increasing.
3402 *
3403 * \param count is the number of elements in the \c annotations array.
3404 * If \c count is zero, then \c annotations will not be dereferenced.
3405 *
3406 * Annotations are copied into a private data structure, so caller may
3407 * re-use the memory pointed to by \c annotations after the call
3408 * returns.
3409 *
3410 * Annotations are stored for the lifetime of the bo; to reset to the
3411 * default state (no annotations), call this function with a \c count
3412 * of zero.
3413 */
3414drm_public void drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
3415					 drm_intel_aub_annotation *annotations,
3416					 unsigned count)
3417{
3418}
3419
3420static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3421static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3422
3423static drm_intel_bufmgr_gem *
3424drm_intel_bufmgr_gem_find(int fd)
3425{
3426	drm_intel_bufmgr_gem *bufmgr_gem;
3427
3428	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3429		if (bufmgr_gem->fd == fd) {
3430			atomic_inc(&bufmgr_gem->refcount);
3431			return bufmgr_gem;
3432		}
3433	}
3434
3435	return NULL;
3436}
3437
3438static void
3439drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
3440{
3441	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3442
3443	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3444		pthread_mutex_lock(&bufmgr_list_mutex);
3445
3446		if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
3447			DRMLISTDEL(&bufmgr_gem->managers);
3448			drm_intel_bufmgr_gem_destroy(bufmgr);
3449		}
3450
3451		pthread_mutex_unlock(&bufmgr_list_mutex);
3452	}
3453}
3454
3455drm_public void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo)
3456{
3457	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3458	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3459
3460	if (bo_gem->gtt_virtual)
3461		return bo_gem->gtt_virtual;
3462
3463	if (bo_gem->is_userptr)
3464		return NULL;
3465
3466	pthread_mutex_lock(&bufmgr_gem->lock);
3467	if (bo_gem->gtt_virtual == NULL) {
3468		struct drm_i915_gem_mmap_gtt mmap_arg;
3469		void *ptr;
3470
3471		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
3472		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3473
3474		if (bo_gem->map_count++ == 0)
3475			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3476
3477		memclear(mmap_arg);
3478		mmap_arg.handle = bo_gem->gem_handle;
3479
3480		/* Get the fake offset back... */
3481		ptr = MAP_FAILED;
3482		if (drmIoctl(bufmgr_gem->fd,
3483			     DRM_IOCTL_I915_GEM_MMAP_GTT,
3484			     &mmap_arg) == 0) {
3485			/* and mmap it */
3486			ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
3487				       MAP_SHARED, bufmgr_gem->fd,
3488				       mmap_arg.offset);
3489		}
3490		if (ptr == MAP_FAILED) {
3491			if (--bo_gem->map_count == 0)
3492				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3493			ptr = NULL;
3494		}
3495
3496		bo_gem->gtt_virtual = ptr;
3497	}
3498	pthread_mutex_unlock(&bufmgr_gem->lock);
3499
3500	return bo_gem->gtt_virtual;
3501}
3502
3503drm_public void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo)
3504{
3505	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3506	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3507
3508	if (bo_gem->mem_virtual)
3509		return bo_gem->mem_virtual;
3510
3511	if (bo_gem->is_userptr) {
3512		/* Return the same user ptr */
3513		return bo_gem->user_virtual;
3514	}
3515
3516	pthread_mutex_lock(&bufmgr_gem->lock);
3517	if (!bo_gem->mem_virtual) {
3518		struct drm_i915_gem_mmap mmap_arg;
3519
3520		if (bo_gem->map_count++ == 0)
3521			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3522
3523		DBG("bo_map: %d (%s), map_count=%d\n",
3524		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3525
3526		memclear(mmap_arg);
3527		mmap_arg.handle = bo_gem->gem_handle;
3528		mmap_arg.size = bo->size;
3529		if (drmIoctl(bufmgr_gem->fd,
3530			     DRM_IOCTL_I915_GEM_MMAP,
3531			     &mmap_arg)) {
3532			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3533			    __FILE__, __LINE__, bo_gem->gem_handle,
3534			    bo_gem->name, strerror(errno));
3535			if (--bo_gem->map_count == 0)
3536				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3537		} else {
3538			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3539			bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3540		}
3541	}
3542	pthread_mutex_unlock(&bufmgr_gem->lock);
3543
3544	return bo_gem->mem_virtual;
3545}
3546
3547drm_public void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo)
3548{
3549	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3550	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3551
3552	if (bo_gem->wc_virtual)
3553		return bo_gem->wc_virtual;
3554
3555	if (bo_gem->is_userptr)
3556		return NULL;
3557
3558	pthread_mutex_lock(&bufmgr_gem->lock);
3559	if (!bo_gem->wc_virtual) {
3560		struct drm_i915_gem_mmap mmap_arg;
3561
3562		if (bo_gem->map_count++ == 0)
3563			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3564
3565		DBG("bo_map: %d (%s), map_count=%d\n",
3566		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3567
3568		memclear(mmap_arg);
3569		mmap_arg.handle = bo_gem->gem_handle;
3570		mmap_arg.size = bo->size;
3571		mmap_arg.flags = I915_MMAP_WC;
3572		if (drmIoctl(bufmgr_gem->fd,
3573			     DRM_IOCTL_I915_GEM_MMAP,
3574			     &mmap_arg)) {
3575			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3576			    __FILE__, __LINE__, bo_gem->gem_handle,
3577			    bo_gem->name, strerror(errno));
3578			if (--bo_gem->map_count == 0)
3579				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3580		} else {
3581			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3582			bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3583		}
3584	}
3585	pthread_mutex_unlock(&bufmgr_gem->lock);
3586
3587	return bo_gem->wc_virtual;
3588}
3589
3590/**
3591 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3592 * and manage map buffer objections.
3593 *
3594 * \param fd File descriptor of the opened DRM device.
3595 */
3596drm_public drm_intel_bufmgr *
3597drm_intel_bufmgr_gem_init(int fd, int batch_size)
3598{
3599	drm_intel_bufmgr_gem *bufmgr_gem;
3600	struct drm_i915_gem_get_aperture aperture;
3601	drm_i915_getparam_t gp;
3602	int ret, tmp;
3603	bool exec2 = false;
3604
3605	pthread_mutex_lock(&bufmgr_list_mutex);
3606
3607	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
3608	if (bufmgr_gem)
3609		goto exit;
3610
3611	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3612	if (bufmgr_gem == NULL)
3613		goto exit;
3614
3615	bufmgr_gem->fd = fd;
3616	atomic_set(&bufmgr_gem->refcount, 1);
3617
3618	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3619		free(bufmgr_gem);
3620		bufmgr_gem = NULL;
3621		goto exit;
3622	}
3623
3624	memclear(aperture);
3625	ret = drmIoctl(bufmgr_gem->fd,
3626		       DRM_IOCTL_I915_GEM_GET_APERTURE,
3627		       &aperture);
3628
3629	if (ret == 0)
3630		bufmgr_gem->gtt_size = aperture.aper_available_size;
3631	else {
3632		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3633			strerror(errno));
3634		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3635		fprintf(stderr, "Assuming %dkB available aperture size.\n"
3636			"May lead to reduced performance or incorrect "
3637			"rendering.\n",
3638			(int)bufmgr_gem->gtt_size / 1024);
3639	}
3640
3641	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3642
3643	if (IS_GEN2(bufmgr_gem->pci_device))
3644		bufmgr_gem->gen = 2;
3645	else if (IS_GEN3(bufmgr_gem->pci_device))
3646		bufmgr_gem->gen = 3;
3647	else if (IS_GEN4(bufmgr_gem->pci_device))
3648		bufmgr_gem->gen = 4;
3649	else if (IS_GEN5(bufmgr_gem->pci_device))
3650		bufmgr_gem->gen = 5;
3651	else if (IS_GEN6(bufmgr_gem->pci_device))
3652		bufmgr_gem->gen = 6;
3653	else if (IS_GEN7(bufmgr_gem->pci_device))
3654		bufmgr_gem->gen = 7;
3655	else if (IS_GEN8(bufmgr_gem->pci_device))
3656		bufmgr_gem->gen = 8;
3657	else if (!intel_get_genx(bufmgr_gem->pci_device, &bufmgr_gem->gen)) {
3658		free(bufmgr_gem);
3659		bufmgr_gem = NULL;
3660		goto exit;
3661	}
3662
3663	if (IS_GEN3(bufmgr_gem->pci_device) &&
3664	    bufmgr_gem->gtt_size > 256*1024*1024) {
3665		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3666		 * be used for tiled blits. To simplify the accounting, just
3667		 * subtract the unmappable part (fixed to 256MB on all known
3668		 * gen3 devices) if the kernel advertises it. */
3669		bufmgr_gem->gtt_size -= 256*1024*1024;
3670	}
3671
3672	memclear(gp);
3673	gp.value = &tmp;
3674
3675	gp.param = I915_PARAM_HAS_EXECBUF2;
3676	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3677	if (!ret)
3678		exec2 = true;
3679
3680	gp.param = I915_PARAM_HAS_BSD;
3681	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3682	bufmgr_gem->has_bsd = ret == 0;
3683
3684	gp.param = I915_PARAM_HAS_BLT;
3685	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3686	bufmgr_gem->has_blt = ret == 0;
3687
3688	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
3689	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3690	bufmgr_gem->has_relaxed_fencing = ret == 0;
3691
3692	gp.param = I915_PARAM_HAS_EXEC_ASYNC;
3693	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3694	bufmgr_gem->has_exec_async = ret == 0;
3695
3696	bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr;
3697
3698	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3699	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3700	bufmgr_gem->has_wait_timeout = ret == 0;
3701
3702	gp.param = I915_PARAM_HAS_LLC;
3703	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3704	if (ret != 0) {
3705		/* Kernel does not supports HAS_LLC query, fallback to GPU
3706		 * generation detection and assume that we have LLC on GEN6/7
3707		 */
3708		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3709				IS_GEN7(bufmgr_gem->pci_device));
3710	} else
3711		bufmgr_gem->has_llc = *gp.value;
3712
3713	gp.param = I915_PARAM_HAS_VEBOX;
3714	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3715	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3716
3717	gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
3718	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3719	if (ret == 0 && *gp.value > 0)
3720		bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset;
3721
3722	if (bufmgr_gem->gen < 4) {
3723		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
3724		gp.value = &bufmgr_gem->available_fences;
3725		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3726		if (ret) {
3727			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
3728				errno);
3729			fprintf(stderr, "param: %d, val: %d\n", gp.param,
3730				*gp.value);
3731			bufmgr_gem->available_fences = 0;
3732		} else {
3733			/* XXX The kernel reports the total number of fences,
3734			 * including any that may be pinned.
3735			 *
3736			 * We presume that there will be at least one pinned
3737			 * fence for the scanout buffer, but there may be more
3738			 * than one scanout and the user may be manually
3739			 * pinning buffers. Let's move to execbuffer2 and
3740			 * thereby forget the insanity of using fences...
3741			 */
3742			bufmgr_gem->available_fences -= 2;
3743			if (bufmgr_gem->available_fences < 0)
3744				bufmgr_gem->available_fences = 0;
3745		}
3746	}
3747
3748	if (bufmgr_gem->gen >= 8) {
3749		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
3750		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3751		if (ret == 0 && *gp.value == 3)
3752			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
3753	}
3754
3755	/* Let's go with one relocation per every 2 dwords (but round down a bit
3756	 * since a power of two will mean an extra page allocation for the reloc
3757	 * buffer).
3758	 *
3759	 * Every 4 was too few for the blender benchmark.
3760	 */
3761	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3762
3763	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
3764	bufmgr_gem->bufmgr.bo_alloc_for_render =
3765	    drm_intel_gem_bo_alloc_for_render;
3766	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
3767	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
3768	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
3769	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
3770	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
3771	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
3772	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
3773	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
3774	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
3775	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
3776	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
3777	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
3778	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
3779	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
3780	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
3781	/* Use the new one if available */
3782	if (exec2) {
3783		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
3784		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3785	} else
3786		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
3787	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
3788	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3789	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
3790	bufmgr_gem->bufmgr.debug = 0;
3791	bufmgr_gem->bufmgr.check_aperture_space =
3792	    drm_intel_gem_check_aperture_space;
3793	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3794	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
3795	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
3796	    drm_intel_gem_get_pipe_from_crtc_id;
3797	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
3798
3799	init_cache_buckets(bufmgr_gem);
3800
3801	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
3802	bufmgr_gem->vma_max = -1; /* unlimited by default */
3803
3804	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3805
3806exit:
3807	pthread_mutex_unlock(&bufmgr_list_mutex);
3808
3809	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
3810}
3811