intel_bufmgr_gem.c revision 0655efef
1/**************************************************************************
2 *
3 * Copyright � 2007 Red Hat Inc.
4 * Copyright � 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30/*
31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32 *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 *	    Eric Anholt <eric@anholt.net>
34 *	    Dave Airlie <airlied@linux.ie>
35 */
36
37#ifdef HAVE_CONFIG_H
38#include "config.h"
39#endif
40
41#include <xf86drm.h>
42#include <xf86atomic.h>
43#include <fcntl.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <assert.h>
49#include <pthread.h>
50#include <stddef.h>
51#include <sys/ioctl.h>
52#include <sys/stat.h>
53#include <sys/types.h>
54#include <stdbool.h>
55
56#include "errno.h"
57#ifndef ETIME
58#define ETIME ETIMEDOUT
59#endif
60#include "libdrm_macros.h"
61#include "libdrm_lists.h"
62#include "intel_bufmgr.h"
63#include "intel_bufmgr_priv.h"
64#include "intel_chipset.h"
65#include "string.h"
66
67#include "i915_drm.h"
68#include "uthash.h"
69
70#ifdef HAVE_VALGRIND
71#include <valgrind.h>
72#include <memcheck.h>
73#define VG(x) x
74#else
75#define VG(x)
76#endif
77
78#define memclear(s) memset(&s, 0, sizeof(s))
79
80#define DBG(...) do {					\
81	if (bufmgr_gem->bufmgr.debug)			\
82		fprintf(stderr, __VA_ARGS__);		\
83} while (0)
84
85#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
86#define MAX2(A, B) ((A) > (B) ? (A) : (B))
87
88/**
89 * upper_32_bits - return bits 32-63 of a number
90 * @n: the number we're accessing
91 *
92 * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
93 * the "right shift count >= width of type" warning when that quantity is
94 * 32-bits.
95 */
96#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
97
98/**
99 * lower_32_bits - return bits 0-31 of a number
100 * @n: the number we're accessing
101 */
102#define lower_32_bits(n) ((__u32)(n))
103
104typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
105
106struct drm_intel_gem_bo_bucket {
107	drmMMListHead head;
108	unsigned long size;
109};
110
111typedef struct _drm_intel_bufmgr_gem {
112	drm_intel_bufmgr bufmgr;
113
114	atomic_t refcount;
115
116	int fd;
117
118	int max_relocs;
119
120	pthread_mutex_t lock;
121
122	struct drm_i915_gem_exec_object *exec_objects;
123	struct drm_i915_gem_exec_object2 *exec2_objects;
124	drm_intel_bo **exec_bos;
125	int exec_size;
126	int exec_count;
127
128	/** Array of lists of cached gem objects of power-of-two sizes */
129	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
130	int num_buckets;
131	time_t time;
132
133	drmMMListHead managers;
134
135	drm_intel_bo_gem *name_table;
136	drm_intel_bo_gem *handle_table;
137
138	drmMMListHead vma_cache;
139	int vma_count, vma_open, vma_max;
140
141	uint64_t gtt_size;
142	int available_fences;
143	int pci_device;
144	int gen;
145	unsigned int has_bsd : 1;
146	unsigned int has_blt : 1;
147	unsigned int has_relaxed_fencing : 1;
148	unsigned int has_llc : 1;
149	unsigned int has_wait_timeout : 1;
150	unsigned int bo_reuse : 1;
151	unsigned int no_exec : 1;
152	unsigned int has_vebox : 1;
153	unsigned int has_exec_async : 1;
154	bool fenced_relocs;
155
156	struct {
157		void *ptr;
158		uint32_t handle;
159	} userptr_active;
160
161} drm_intel_bufmgr_gem;
162
163#define DRM_INTEL_RELOC_FENCE (1<<0)
164
165typedef struct _drm_intel_reloc_target_info {
166	drm_intel_bo *bo;
167	int flags;
168} drm_intel_reloc_target;
169
170struct _drm_intel_bo_gem {
171	drm_intel_bo bo;
172
173	atomic_t refcount;
174	uint32_t gem_handle;
175	const char *name;
176
177	/**
178	 * Kenel-assigned global name for this object
179         *
180         * List contains both flink named and prime fd'd objects
181	 */
182	unsigned int global_name;
183
184	UT_hash_handle handle_hh;
185	UT_hash_handle name_hh;
186
187	/**
188	 * Index of the buffer within the validation list while preparing a
189	 * batchbuffer execution.
190	 */
191	int validate_index;
192
193	/**
194	 * Current tiling mode
195	 */
196	uint32_t tiling_mode;
197	uint32_t swizzle_mode;
198	unsigned long stride;
199
200	unsigned long kflags;
201
202	time_t free_time;
203
204	/** Array passed to the DRM containing relocation information. */
205	struct drm_i915_gem_relocation_entry *relocs;
206	/**
207	 * Array of info structs corresponding to relocs[i].target_handle etc
208	 */
209	drm_intel_reloc_target *reloc_target_info;
210	/** Number of entries in relocs */
211	int reloc_count;
212	/** Array of BOs that are referenced by this buffer and will be softpinned */
213	drm_intel_bo **softpin_target;
214	/** Number softpinned BOs that are referenced by this buffer */
215	int softpin_target_count;
216	/** Maximum amount of softpinned BOs that are referenced by this buffer */
217	int softpin_target_size;
218
219	/** Mapped address for the buffer, saved across map/unmap cycles */
220	void *mem_virtual;
221	/** GTT virtual address for the buffer, saved across map/unmap cycles */
222	void *gtt_virtual;
223	/** WC CPU address for the buffer, saved across map/unmap cycles */
224	void *wc_virtual;
225	/**
226	 * Virtual address of the buffer allocated by user, used for userptr
227	 * objects only.
228	 */
229	void *user_virtual;
230	int map_count;
231	drmMMListHead vma_list;
232
233	/** BO cache list */
234	drmMMListHead head;
235
236	/**
237	 * Boolean of whether this BO and its children have been included in
238	 * the current drm_intel_bufmgr_check_aperture_space() total.
239	 */
240	bool included_in_check_aperture;
241
242	/**
243	 * Boolean of whether this buffer has been used as a relocation
244	 * target and had its size accounted for, and thus can't have any
245	 * further relocations added to it.
246	 */
247	bool used_as_reloc_target;
248
249	/**
250	 * Boolean of whether we have encountered an error whilst building the relocation tree.
251	 */
252	bool has_error;
253
254	/**
255	 * Boolean of whether this buffer can be re-used
256	 */
257	bool reusable;
258
259	/**
260	 * Boolean of whether the GPU is definitely not accessing the buffer.
261	 *
262	 * This is only valid when reusable, since non-reusable
263	 * buffers are those that have been shared with other
264	 * processes, so we don't know their state.
265	 */
266	bool idle;
267
268	/**
269	 * Boolean of whether this buffer was allocated with userptr
270	 */
271	bool is_userptr;
272
273	/**
274	 * Size in bytes of this buffer and its relocation descendents.
275	 *
276	 * Used to avoid costly tree walking in
277	 * drm_intel_bufmgr_check_aperture in the common case.
278	 */
279	int reloc_tree_size;
280
281	/**
282	 * Number of potential fence registers required by this buffer and its
283	 * relocations.
284	 */
285	int reloc_tree_fences;
286
287	/** Flags that we may need to do the SW_FINISH ioctl on unmap. */
288	bool mapped_cpu_write;
289};
290
291static unsigned int
292drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
293
294static unsigned int
295drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
296
297static int
298drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
299			    uint32_t * swizzle_mode);
300
301static int
302drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
303				     uint32_t tiling_mode,
304				     uint32_t stride);
305
306static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
307						      time_t time);
308
309static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
310
311static void drm_intel_gem_bo_free(drm_intel_bo *bo);
312
313static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo)
314{
315        return (drm_intel_bo_gem *)bo;
316}
317
318static unsigned long
319drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
320			   uint32_t *tiling_mode)
321{
322	unsigned long min_size, max_size;
323	unsigned long i;
324
325	if (*tiling_mode == I915_TILING_NONE)
326		return size;
327
328	/* 965+ just need multiples of page size for tiling */
329	if (bufmgr_gem->gen >= 4)
330		return ROUND_UP_TO(size, 4096);
331
332	/* Older chips need powers of two, of at least 512k or 1M */
333	if (bufmgr_gem->gen == 3) {
334		min_size = 1024*1024;
335		max_size = 128*1024*1024;
336	} else {
337		min_size = 512*1024;
338		max_size = 64*1024*1024;
339	}
340
341	if (size > max_size) {
342		*tiling_mode = I915_TILING_NONE;
343		return size;
344	}
345
346	/* Do we need to allocate every page for the fence? */
347	if (bufmgr_gem->has_relaxed_fencing)
348		return ROUND_UP_TO(size, 4096);
349
350	for (i = min_size; i < size; i <<= 1)
351		;
352
353	return i;
354}
355
356/*
357 * Round a given pitch up to the minimum required for X tiling on a
358 * given chip.  We use 512 as the minimum to allow for a later tiling
359 * change.
360 */
361static unsigned long
362drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
363			    unsigned long pitch, uint32_t *tiling_mode)
364{
365	unsigned long tile_width;
366	unsigned long i;
367
368	/* If untiled, then just align it so that we can do rendering
369	 * to it with the 3D engine.
370	 */
371	if (*tiling_mode == I915_TILING_NONE)
372		return ALIGN(pitch, 64);
373
374	if (*tiling_mode == I915_TILING_X
375			|| (IS_915(bufmgr_gem->pci_device)
376			    && *tiling_mode == I915_TILING_Y))
377		tile_width = 512;
378	else
379		tile_width = 128;
380
381	/* 965 is flexible */
382	if (bufmgr_gem->gen >= 4)
383		return ROUND_UP_TO(pitch, tile_width);
384
385	/* The older hardware has a maximum pitch of 8192 with tiled
386	 * surfaces, so fallback to untiled if it's too large.
387	 */
388	if (pitch > 8192) {
389		*tiling_mode = I915_TILING_NONE;
390		return ALIGN(pitch, 64);
391	}
392
393	/* Pre-965 needs power of two tile width */
394	for (i = tile_width; i < pitch; i <<= 1)
395		;
396
397	return i;
398}
399
400static struct drm_intel_gem_bo_bucket *
401drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
402				 unsigned long size)
403{
404	int i;
405
406	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
407		struct drm_intel_gem_bo_bucket *bucket =
408		    &bufmgr_gem->cache_bucket[i];
409		if (bucket->size >= size) {
410			return bucket;
411		}
412	}
413
414	return NULL;
415}
416
417static void
418drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
419{
420	int i, j;
421
422	for (i = 0; i < bufmgr_gem->exec_count; i++) {
423		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
424		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
425
426		if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
427			DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
428			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
429			    bo_gem->name);
430			continue;
431		}
432
433		for (j = 0; j < bo_gem->reloc_count; j++) {
434			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
435			drm_intel_bo_gem *target_gem =
436			    (drm_intel_bo_gem *) target_bo;
437
438			DBG("%2d: %d %s(%s)@0x%08x %08x -> "
439			    "%d (%s)@0x%08x %08x + 0x%08x\n",
440			    i,
441			    bo_gem->gem_handle,
442			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
443			    bo_gem->name,
444			    upper_32_bits(bo_gem->relocs[j].offset),
445			    lower_32_bits(bo_gem->relocs[j].offset),
446			    target_gem->gem_handle,
447			    target_gem->name,
448			    upper_32_bits(target_bo->offset64),
449			    lower_32_bits(target_bo->offset64),
450			    bo_gem->relocs[j].delta);
451		}
452
453		for (j = 0; j < bo_gem->softpin_target_count; j++) {
454			drm_intel_bo *target_bo = bo_gem->softpin_target[j];
455			drm_intel_bo_gem *target_gem =
456			    (drm_intel_bo_gem *) target_bo;
457			DBG("%2d: %d %s(%s) -> "
458			    "%d *(%s)@0x%08x %08x\n",
459			    i,
460			    bo_gem->gem_handle,
461			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
462			    bo_gem->name,
463			    target_gem->gem_handle,
464			    target_gem->name,
465			    upper_32_bits(target_bo->offset64),
466			    lower_32_bits(target_bo->offset64));
467		}
468	}
469}
470
471static inline void
472drm_intel_gem_bo_reference(drm_intel_bo *bo)
473{
474	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
475
476	atomic_inc(&bo_gem->refcount);
477}
478
479/**
480 * Adds the given buffer to the list of buffers to be validated (moved into the
481 * appropriate memory type) with the next batch submission.
482 *
483 * If a buffer is validated multiple times in a batch submission, it ends up
484 * with the intersection of the memory type flags and the union of the
485 * access flags.
486 */
487static void
488drm_intel_add_validate_buffer(drm_intel_bo *bo)
489{
490	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
491	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
492	int index;
493
494	if (bo_gem->validate_index != -1)
495		return;
496
497	/* Extend the array of validation entries as necessary. */
498	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
499		int new_size = bufmgr_gem->exec_size * 2;
500
501		if (new_size == 0)
502			new_size = 5;
503
504		bufmgr_gem->exec_objects =
505		    realloc(bufmgr_gem->exec_objects,
506			    sizeof(*bufmgr_gem->exec_objects) * new_size);
507		bufmgr_gem->exec_bos =
508		    realloc(bufmgr_gem->exec_bos,
509			    sizeof(*bufmgr_gem->exec_bos) * new_size);
510		bufmgr_gem->exec_size = new_size;
511	}
512
513	index = bufmgr_gem->exec_count;
514	bo_gem->validate_index = index;
515	/* Fill in array entry */
516	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
517	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
518	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
519	bufmgr_gem->exec_objects[index].alignment = bo->align;
520	bufmgr_gem->exec_objects[index].offset = 0;
521	bufmgr_gem->exec_bos[index] = bo;
522	bufmgr_gem->exec_count++;
523}
524
525static void
526drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
527{
528	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
529	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
530	int index;
531	unsigned long flags;
532
533	flags = 0;
534	if (need_fence)
535		flags |= EXEC_OBJECT_NEEDS_FENCE;
536
537	if (bo_gem->validate_index != -1) {
538		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
539		return;
540	}
541
542	/* Extend the array of validation entries as necessary. */
543	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
544		int new_size = bufmgr_gem->exec_size * 2;
545
546		if (new_size == 0)
547			new_size = 5;
548
549		bufmgr_gem->exec2_objects =
550			realloc(bufmgr_gem->exec2_objects,
551				sizeof(*bufmgr_gem->exec2_objects) * new_size);
552		bufmgr_gem->exec_bos =
553			realloc(bufmgr_gem->exec_bos,
554				sizeof(*bufmgr_gem->exec_bos) * new_size);
555		bufmgr_gem->exec_size = new_size;
556	}
557
558	index = bufmgr_gem->exec_count;
559	bo_gem->validate_index = index;
560	/* Fill in array entry */
561	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
562	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
563	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
564	bufmgr_gem->exec2_objects[index].alignment = bo->align;
565	bufmgr_gem->exec2_objects[index].offset = bo->offset64;
566	bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags;
567	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
568	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
569	bufmgr_gem->exec_bos[index] = bo;
570	bufmgr_gem->exec_count++;
571}
572
573#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
574	sizeof(uint32_t))
575
576static void
577drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
578				      drm_intel_bo_gem *bo_gem,
579				      unsigned int alignment)
580{
581	unsigned int size;
582
583	assert(!bo_gem->used_as_reloc_target);
584
585	/* The older chipsets are far-less flexible in terms of tiling,
586	 * and require tiled buffer to be size aligned in the aperture.
587	 * This means that in the worst possible case we will need a hole
588	 * twice as large as the object in order for it to fit into the
589	 * aperture. Optimal packing is for wimps.
590	 */
591	size = bo_gem->bo.size;
592	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
593		unsigned int min_size;
594
595		if (bufmgr_gem->has_relaxed_fencing) {
596			if (bufmgr_gem->gen == 3)
597				min_size = 1024*1024;
598			else
599				min_size = 512*1024;
600
601			while (min_size < size)
602				min_size *= 2;
603		} else
604			min_size = size;
605
606		/* Account for worst-case alignment. */
607		alignment = MAX2(alignment, min_size);
608	}
609
610	bo_gem->reloc_tree_size = size + alignment;
611}
612
613static int
614drm_intel_setup_reloc_list(drm_intel_bo *bo)
615{
616	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
617	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
618	unsigned int max_relocs = bufmgr_gem->max_relocs;
619
620	if (bo->size / 4 < max_relocs)
621		max_relocs = bo->size / 4;
622
623	bo_gem->relocs = malloc(max_relocs *
624				sizeof(struct drm_i915_gem_relocation_entry));
625	bo_gem->reloc_target_info = malloc(max_relocs *
626					   sizeof(drm_intel_reloc_target));
627	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
628		bo_gem->has_error = true;
629
630		free (bo_gem->relocs);
631		bo_gem->relocs = NULL;
632
633		free (bo_gem->reloc_target_info);
634		bo_gem->reloc_target_info = NULL;
635
636		return 1;
637	}
638
639	return 0;
640}
641
642static int
643drm_intel_gem_bo_busy(drm_intel_bo *bo)
644{
645	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
646	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
647	struct drm_i915_gem_busy busy;
648	int ret;
649
650	if (bo_gem->reusable && bo_gem->idle)
651		return false;
652
653	memclear(busy);
654	busy.handle = bo_gem->gem_handle;
655
656	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
657	if (ret == 0) {
658		bo_gem->idle = !busy.busy;
659		return busy.busy;
660	} else {
661		return false;
662	}
663}
664
665static int
666drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
667				  drm_intel_bo_gem *bo_gem, int state)
668{
669	struct drm_i915_gem_madvise madv;
670
671	memclear(madv);
672	madv.handle = bo_gem->gem_handle;
673	madv.madv = state;
674	madv.retained = 1;
675	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
676
677	return madv.retained;
678}
679
680static int
681drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
682{
683	return drm_intel_gem_bo_madvise_internal
684		((drm_intel_bufmgr_gem *) bo->bufmgr,
685		 (drm_intel_bo_gem *) bo,
686		 madv);
687}
688
689/* drop the oldest entries that have been purged by the kernel */
690static void
691drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
692				    struct drm_intel_gem_bo_bucket *bucket)
693{
694	while (!DRMLISTEMPTY(&bucket->head)) {
695		drm_intel_bo_gem *bo_gem;
696
697		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
698				      bucket->head.next, head);
699		if (drm_intel_gem_bo_madvise_internal
700		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
701			break;
702
703		DRMLISTDEL(&bo_gem->head);
704		drm_intel_gem_bo_free(&bo_gem->bo);
705	}
706}
707
708static drm_intel_bo *
709drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
710				const char *name,
711				unsigned long size,
712				unsigned long flags,
713				uint32_t tiling_mode,
714				unsigned long stride,
715				unsigned int alignment)
716{
717	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
718	drm_intel_bo_gem *bo_gem;
719	unsigned int page_size = getpagesize();
720	int ret;
721	struct drm_intel_gem_bo_bucket *bucket;
722	bool alloc_from_cache;
723	unsigned long bo_size;
724	bool for_render = false;
725
726	if (flags & BO_ALLOC_FOR_RENDER)
727		for_render = true;
728
729	/* Round the allocated size up to a power of two number of pages. */
730	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
731
732	/* If we don't have caching at this size, don't actually round the
733	 * allocation up.
734	 */
735	if (bucket == NULL) {
736		bo_size = size;
737		if (bo_size < page_size)
738			bo_size = page_size;
739	} else {
740		bo_size = bucket->size;
741	}
742
743	pthread_mutex_lock(&bufmgr_gem->lock);
744	/* Get a buffer out of the cache if available */
745retry:
746	alloc_from_cache = false;
747	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
748		if (for_render) {
749			/* Allocate new render-target BOs from the tail (MRU)
750			 * of the list, as it will likely be hot in the GPU
751			 * cache and in the aperture for us.
752			 */
753			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
754					      bucket->head.prev, head);
755			DRMLISTDEL(&bo_gem->head);
756			alloc_from_cache = true;
757			bo_gem->bo.align = alignment;
758		} else {
759			assert(alignment == 0);
760			/* For non-render-target BOs (where we're probably
761			 * going to map it first thing in order to fill it
762			 * with data), check if the last BO in the cache is
763			 * unbusy, and only reuse in that case. Otherwise,
764			 * allocating a new buffer is probably faster than
765			 * waiting for the GPU to finish.
766			 */
767			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
768					      bucket->head.next, head);
769			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
770				alloc_from_cache = true;
771				DRMLISTDEL(&bo_gem->head);
772			}
773		}
774
775		if (alloc_from_cache) {
776			if (!drm_intel_gem_bo_madvise_internal
777			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
778				drm_intel_gem_bo_free(&bo_gem->bo);
779				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
780								    bucket);
781				goto retry;
782			}
783
784			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
785								 tiling_mode,
786								 stride)) {
787				drm_intel_gem_bo_free(&bo_gem->bo);
788				goto retry;
789			}
790		}
791	}
792
793	if (!alloc_from_cache) {
794		struct drm_i915_gem_create create;
795
796		bo_gem = calloc(1, sizeof(*bo_gem));
797		if (!bo_gem)
798			goto err;
799
800		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
801		   list (vma_list), so better set the list head here */
802		DRMINITLISTHEAD(&bo_gem->vma_list);
803
804		bo_gem->bo.size = bo_size;
805
806		memclear(create);
807		create.size = bo_size;
808
809		ret = drmIoctl(bufmgr_gem->fd,
810			       DRM_IOCTL_I915_GEM_CREATE,
811			       &create);
812		if (ret != 0) {
813			free(bo_gem);
814			goto err;
815		}
816
817		bo_gem->gem_handle = create.handle;
818		HASH_ADD(handle_hh, bufmgr_gem->handle_table,
819			 gem_handle, sizeof(bo_gem->gem_handle),
820			 bo_gem);
821
822		bo_gem->bo.handle = bo_gem->gem_handle;
823		bo_gem->bo.bufmgr = bufmgr;
824		bo_gem->bo.align = alignment;
825
826		bo_gem->tiling_mode = I915_TILING_NONE;
827		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
828		bo_gem->stride = 0;
829
830		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
831							 tiling_mode,
832							 stride))
833			goto err_free;
834	}
835
836	bo_gem->name = name;
837	atomic_set(&bo_gem->refcount, 1);
838	bo_gem->validate_index = -1;
839	bo_gem->reloc_tree_fences = 0;
840	bo_gem->used_as_reloc_target = false;
841	bo_gem->has_error = false;
842	bo_gem->reusable = true;
843
844	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
845	pthread_mutex_unlock(&bufmgr_gem->lock);
846
847	DBG("bo_create: buf %d (%s) %ldb\n",
848	    bo_gem->gem_handle, bo_gem->name, size);
849
850	return &bo_gem->bo;
851
852err_free:
853	drm_intel_gem_bo_free(&bo_gem->bo);
854err:
855	pthread_mutex_unlock(&bufmgr_gem->lock);
856	return NULL;
857}
858
859static drm_intel_bo *
860drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
861				  const char *name,
862				  unsigned long size,
863				  unsigned int alignment)
864{
865	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
866					       BO_ALLOC_FOR_RENDER,
867					       I915_TILING_NONE, 0,
868					       alignment);
869}
870
871static drm_intel_bo *
872drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
873		       const char *name,
874		       unsigned long size,
875		       unsigned int alignment)
876{
877	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
878					       I915_TILING_NONE, 0, 0);
879}
880
881static drm_intel_bo *
882drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
883			     int x, int y, int cpp, uint32_t *tiling_mode,
884			     unsigned long *pitch, unsigned long flags)
885{
886	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
887	unsigned long size, stride;
888	uint32_t tiling;
889
890	do {
891		unsigned long aligned_y, height_alignment;
892
893		tiling = *tiling_mode;
894
895		/* If we're tiled, our allocations are in 8 or 32-row blocks,
896		 * so failure to align our height means that we won't allocate
897		 * enough pages.
898		 *
899		 * If we're untiled, we still have to align to 2 rows high
900		 * because the data port accesses 2x2 blocks even if the
901		 * bottom row isn't to be rendered, so failure to align means
902		 * we could walk off the end of the GTT and fault.  This is
903		 * documented on 965, and may be the case on older chipsets
904		 * too so we try to be careful.
905		 */
906		aligned_y = y;
907		height_alignment = 2;
908
909		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
910			height_alignment = 16;
911		else if (tiling == I915_TILING_X
912			|| (IS_915(bufmgr_gem->pci_device)
913			    && tiling == I915_TILING_Y))
914			height_alignment = 8;
915		else if (tiling == I915_TILING_Y)
916			height_alignment = 32;
917		aligned_y = ALIGN(y, height_alignment);
918
919		stride = x * cpp;
920		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
921		size = stride * aligned_y;
922		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
923	} while (*tiling_mode != tiling);
924	*pitch = stride;
925
926	if (tiling == I915_TILING_NONE)
927		stride = 0;
928
929	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
930					       tiling, stride, 0);
931}
932
933static drm_intel_bo *
934drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
935				const char *name,
936				void *addr,
937				uint32_t tiling_mode,
938				uint32_t stride,
939				unsigned long size,
940				unsigned long flags)
941{
942	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
943	drm_intel_bo_gem *bo_gem;
944	int ret;
945	struct drm_i915_gem_userptr userptr;
946
947	/* Tiling with userptr surfaces is not supported
948	 * on all hardware so refuse it for time being.
949	 */
950	if (tiling_mode != I915_TILING_NONE)
951		return NULL;
952
953	bo_gem = calloc(1, sizeof(*bo_gem));
954	if (!bo_gem)
955		return NULL;
956
957	atomic_set(&bo_gem->refcount, 1);
958	DRMINITLISTHEAD(&bo_gem->vma_list);
959
960	bo_gem->bo.size = size;
961
962	memclear(userptr);
963	userptr.user_ptr = (__u64)((unsigned long)addr);
964	userptr.user_size = size;
965	userptr.flags = flags;
966
967	ret = drmIoctl(bufmgr_gem->fd,
968			DRM_IOCTL_I915_GEM_USERPTR,
969			&userptr);
970	if (ret != 0) {
971		DBG("bo_create_userptr: "
972		    "ioctl failed with user ptr %p size 0x%lx, "
973		    "user flags 0x%lx\n", addr, size, flags);
974		free(bo_gem);
975		return NULL;
976	}
977
978	pthread_mutex_lock(&bufmgr_gem->lock);
979
980	bo_gem->gem_handle = userptr.handle;
981	bo_gem->bo.handle = bo_gem->gem_handle;
982	bo_gem->bo.bufmgr    = bufmgr;
983	bo_gem->is_userptr   = true;
984	bo_gem->bo.virtual   = addr;
985	/* Save the address provided by user */
986	bo_gem->user_virtual = addr;
987	bo_gem->tiling_mode  = I915_TILING_NONE;
988	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
989	bo_gem->stride       = 0;
990
991	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
992		 gem_handle, sizeof(bo_gem->gem_handle),
993		 bo_gem);
994
995	bo_gem->name = name;
996	bo_gem->validate_index = -1;
997	bo_gem->reloc_tree_fences = 0;
998	bo_gem->used_as_reloc_target = false;
999	bo_gem->has_error = false;
1000	bo_gem->reusable = false;
1001
1002	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1003	pthread_mutex_unlock(&bufmgr_gem->lock);
1004
1005	DBG("bo_create_userptr: "
1006	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
1007		addr, bo_gem->gem_handle, bo_gem->name,
1008		size, stride, tiling_mode);
1009
1010	return &bo_gem->bo;
1011}
1012
1013static bool
1014has_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
1015{
1016	int ret;
1017	void *ptr;
1018	long pgsz;
1019	struct drm_i915_gem_userptr userptr;
1020
1021	pgsz = sysconf(_SC_PAGESIZE);
1022	assert(pgsz > 0);
1023
1024	ret = posix_memalign(&ptr, pgsz, pgsz);
1025	if (ret) {
1026		DBG("Failed to get a page (%ld) for userptr detection!\n",
1027			pgsz);
1028		return false;
1029	}
1030
1031	memclear(userptr);
1032	userptr.user_ptr = (__u64)(unsigned long)ptr;
1033	userptr.user_size = pgsz;
1034
1035retry:
1036	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
1037	if (ret) {
1038		if (errno == ENODEV && userptr.flags == 0) {
1039			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
1040			goto retry;
1041		}
1042		free(ptr);
1043		return false;
1044	}
1045
1046	/* We don't release the userptr bo here as we want to keep the
1047	 * kernel mm tracking alive for our lifetime. The first time we
1048	 * create a userptr object the kernel has to install a mmu_notifer
1049	 * which is a heavyweight operation (e.g. it requires taking all
1050	 * mm_locks and stop_machine()).
1051	 */
1052
1053	bufmgr_gem->userptr_active.ptr = ptr;
1054	bufmgr_gem->userptr_active.handle = userptr.handle;
1055
1056	return true;
1057}
1058
1059static drm_intel_bo *
1060check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
1061		       const char *name,
1062		       void *addr,
1063		       uint32_t tiling_mode,
1064		       uint32_t stride,
1065		       unsigned long size,
1066		       unsigned long flags)
1067{
1068	if (has_userptr((drm_intel_bufmgr_gem *)bufmgr))
1069		bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr;
1070	else
1071		bufmgr->bo_alloc_userptr = NULL;
1072
1073	return drm_intel_bo_alloc_userptr(bufmgr, name, addr,
1074					  tiling_mode, stride, size, flags);
1075}
1076
1077/**
1078 * Returns a drm_intel_bo wrapping the given buffer object handle.
1079 *
1080 * This can be used when one application needs to pass a buffer object
1081 * to another.
1082 */
1083drm_intel_bo *
1084drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
1085				  const char *name,
1086				  unsigned int handle)
1087{
1088	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1089	drm_intel_bo_gem *bo_gem;
1090	int ret;
1091	struct drm_gem_open open_arg;
1092	struct drm_i915_gem_get_tiling get_tiling;
1093
1094	/* At the moment most applications only have a few named bo.
1095	 * For instance, in a DRI client only the render buffers passed
1096	 * between X and the client are named. And since X returns the
1097	 * alternating names for the front/back buffer a linear search
1098	 * provides a sufficiently fast match.
1099	 */
1100	pthread_mutex_lock(&bufmgr_gem->lock);
1101	HASH_FIND(name_hh, bufmgr_gem->name_table,
1102		  &handle, sizeof(handle), bo_gem);
1103	if (bo_gem) {
1104		drm_intel_gem_bo_reference(&bo_gem->bo);
1105		goto out;
1106	}
1107
1108	memclear(open_arg);
1109	open_arg.name = handle;
1110	ret = drmIoctl(bufmgr_gem->fd,
1111		       DRM_IOCTL_GEM_OPEN,
1112		       &open_arg);
1113	if (ret != 0) {
1114		DBG("Couldn't reference %s handle 0x%08x: %s\n",
1115		    name, handle, strerror(errno));
1116		bo_gem = NULL;
1117		goto out;
1118	}
1119        /* Now see if someone has used a prime handle to get this
1120         * object from the kernel before by looking through the list
1121         * again for a matching gem_handle
1122         */
1123	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
1124		  &open_arg.handle, sizeof(open_arg.handle), bo_gem);
1125	if (bo_gem) {
1126		drm_intel_gem_bo_reference(&bo_gem->bo);
1127		goto out;
1128	}
1129
1130	bo_gem = calloc(1, sizeof(*bo_gem));
1131	if (!bo_gem)
1132		goto out;
1133
1134	atomic_set(&bo_gem->refcount, 1);
1135	DRMINITLISTHEAD(&bo_gem->vma_list);
1136
1137	bo_gem->bo.size = open_arg.size;
1138	bo_gem->bo.offset = 0;
1139	bo_gem->bo.offset64 = 0;
1140	bo_gem->bo.virtual = NULL;
1141	bo_gem->bo.bufmgr = bufmgr;
1142	bo_gem->name = name;
1143	bo_gem->validate_index = -1;
1144	bo_gem->gem_handle = open_arg.handle;
1145	bo_gem->bo.handle = open_arg.handle;
1146	bo_gem->global_name = handle;
1147	bo_gem->reusable = false;
1148
1149	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
1150		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
1151	HASH_ADD(name_hh, bufmgr_gem->name_table,
1152		 global_name, sizeof(bo_gem->global_name), bo_gem);
1153
1154	memclear(get_tiling);
1155	get_tiling.handle = bo_gem->gem_handle;
1156	ret = drmIoctl(bufmgr_gem->fd,
1157		       DRM_IOCTL_I915_GEM_GET_TILING,
1158		       &get_tiling);
1159	if (ret != 0)
1160		goto err_unref;
1161
1162	bo_gem->tiling_mode = get_tiling.tiling_mode;
1163	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1164	/* XXX stride is unknown */
1165	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1166	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1167
1168out:
1169	pthread_mutex_unlock(&bufmgr_gem->lock);
1170	return &bo_gem->bo;
1171
1172err_unref:
1173	drm_intel_gem_bo_free(&bo_gem->bo);
1174	pthread_mutex_unlock(&bufmgr_gem->lock);
1175	return NULL;
1176}
1177
1178static void
1179drm_intel_gem_bo_free(drm_intel_bo *bo)
1180{
1181	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1182	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1183	struct drm_gem_close close;
1184	int ret;
1185
1186	DRMLISTDEL(&bo_gem->vma_list);
1187	if (bo_gem->mem_virtual) {
1188		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1189		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1190		bufmgr_gem->vma_count--;
1191	}
1192	if (bo_gem->wc_virtual) {
1193		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1194		drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1195		bufmgr_gem->vma_count--;
1196	}
1197	if (bo_gem->gtt_virtual) {
1198		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1199		bufmgr_gem->vma_count--;
1200	}
1201
1202	if (bo_gem->global_name)
1203		HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem);
1204	HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem);
1205
1206	/* Close this object */
1207	memclear(close);
1208	close.handle = bo_gem->gem_handle;
1209	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
1210	if (ret != 0) {
1211		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1212		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
1213	}
1214	free(bo);
1215}
1216
1217static void
1218drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
1219{
1220#if HAVE_VALGRIND
1221	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1222
1223	if (bo_gem->mem_virtual)
1224		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1225
1226	if (bo_gem->wc_virtual)
1227		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1228
1229	if (bo_gem->gtt_virtual)
1230		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1231#endif
1232}
1233
1234/** Frees all cached buffers significantly older than @time. */
1235static void
1236drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
1237{
1238	int i;
1239
1240	if (bufmgr_gem->time == time)
1241		return;
1242
1243	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1244		struct drm_intel_gem_bo_bucket *bucket =
1245		    &bufmgr_gem->cache_bucket[i];
1246
1247		while (!DRMLISTEMPTY(&bucket->head)) {
1248			drm_intel_bo_gem *bo_gem;
1249
1250			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1251					      bucket->head.next, head);
1252			if (time - bo_gem->free_time <= 1)
1253				break;
1254
1255			DRMLISTDEL(&bo_gem->head);
1256
1257			drm_intel_gem_bo_free(&bo_gem->bo);
1258		}
1259	}
1260
1261	bufmgr_gem->time = time;
1262}
1263
1264static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
1265{
1266	int limit;
1267
1268	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1269	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1270
1271	if (bufmgr_gem->vma_max < 0)
1272		return;
1273
1274	/* We may need to evict a few entries in order to create new mmaps */
1275	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1276	if (limit < 0)
1277		limit = 0;
1278
1279	while (bufmgr_gem->vma_count > limit) {
1280		drm_intel_bo_gem *bo_gem;
1281
1282		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1283				      bufmgr_gem->vma_cache.next,
1284				      vma_list);
1285		assert(bo_gem->map_count == 0);
1286		DRMLISTDELINIT(&bo_gem->vma_list);
1287
1288		if (bo_gem->mem_virtual) {
1289			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1290			bo_gem->mem_virtual = NULL;
1291			bufmgr_gem->vma_count--;
1292		}
1293		if (bo_gem->wc_virtual) {
1294			drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1295			bo_gem->wc_virtual = NULL;
1296			bufmgr_gem->vma_count--;
1297		}
1298		if (bo_gem->gtt_virtual) {
1299			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1300			bo_gem->gtt_virtual = NULL;
1301			bufmgr_gem->vma_count--;
1302		}
1303	}
1304}
1305
1306static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1307				       drm_intel_bo_gem *bo_gem)
1308{
1309	bufmgr_gem->vma_open--;
1310	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1311	if (bo_gem->mem_virtual)
1312		bufmgr_gem->vma_count++;
1313	if (bo_gem->wc_virtual)
1314		bufmgr_gem->vma_count++;
1315	if (bo_gem->gtt_virtual)
1316		bufmgr_gem->vma_count++;
1317	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1318}
1319
1320static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1321				      drm_intel_bo_gem *bo_gem)
1322{
1323	bufmgr_gem->vma_open++;
1324	DRMLISTDEL(&bo_gem->vma_list);
1325	if (bo_gem->mem_virtual)
1326		bufmgr_gem->vma_count--;
1327	if (bo_gem->wc_virtual)
1328		bufmgr_gem->vma_count--;
1329	if (bo_gem->gtt_virtual)
1330		bufmgr_gem->vma_count--;
1331	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1332}
1333
1334static void
1335drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
1336{
1337	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1338	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1339	struct drm_intel_gem_bo_bucket *bucket;
1340	int i;
1341
1342	/* Unreference all the target buffers */
1343	for (i = 0; i < bo_gem->reloc_count; i++) {
1344		if (bo_gem->reloc_target_info[i].bo != bo) {
1345			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1346								  reloc_target_info[i].bo,
1347								  time);
1348		}
1349	}
1350	for (i = 0; i < bo_gem->softpin_target_count; i++)
1351		drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1352								  time);
1353	bo_gem->kflags = 0;
1354	bo_gem->reloc_count = 0;
1355	bo_gem->used_as_reloc_target = false;
1356	bo_gem->softpin_target_count = 0;
1357
1358	DBG("bo_unreference final: %d (%s)\n",
1359	    bo_gem->gem_handle, bo_gem->name);
1360
1361	/* release memory associated with this object */
1362	if (bo_gem->reloc_target_info) {
1363		free(bo_gem->reloc_target_info);
1364		bo_gem->reloc_target_info = NULL;
1365	}
1366	if (bo_gem->relocs) {
1367		free(bo_gem->relocs);
1368		bo_gem->relocs = NULL;
1369	}
1370	if (bo_gem->softpin_target) {
1371		free(bo_gem->softpin_target);
1372		bo_gem->softpin_target = NULL;
1373		bo_gem->softpin_target_size = 0;
1374	}
1375
1376	/* Clear any left-over mappings */
1377	if (bo_gem->map_count) {
1378		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1379		bo_gem->map_count = 0;
1380		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1381		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1382	}
1383
1384	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1385	/* Put the buffer into our internal cache for reuse if we can. */
1386	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1387	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1388					      I915_MADV_DONTNEED)) {
1389		bo_gem->free_time = time;
1390
1391		bo_gem->name = NULL;
1392		bo_gem->validate_index = -1;
1393
1394		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
1395	} else {
1396		drm_intel_gem_bo_free(bo);
1397	}
1398}
1399
1400static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
1401						      time_t time)
1402{
1403	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1404
1405	assert(atomic_read(&bo_gem->refcount) > 0);
1406	if (atomic_dec_and_test(&bo_gem->refcount))
1407		drm_intel_gem_bo_unreference_final(bo, time);
1408}
1409
1410static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
1411{
1412	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1413
1414	assert(atomic_read(&bo_gem->refcount) > 0);
1415
1416	if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1417		drm_intel_bufmgr_gem *bufmgr_gem =
1418		    (drm_intel_bufmgr_gem *) bo->bufmgr;
1419		struct timespec time;
1420
1421		clock_gettime(CLOCK_MONOTONIC, &time);
1422
1423		pthread_mutex_lock(&bufmgr_gem->lock);
1424
1425		if (atomic_dec_and_test(&bo_gem->refcount)) {
1426			drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
1427			drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1428		}
1429
1430		pthread_mutex_unlock(&bufmgr_gem->lock);
1431	}
1432}
1433
1434static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
1435{
1436	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1437	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1438	struct drm_i915_gem_set_domain set_domain;
1439	int ret;
1440
1441	if (bo_gem->is_userptr) {
1442		/* Return the same user ptr */
1443		bo->virtual = bo_gem->user_virtual;
1444		return 0;
1445	}
1446
1447	pthread_mutex_lock(&bufmgr_gem->lock);
1448
1449	if (bo_gem->map_count++ == 0)
1450		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1451
1452	if (!bo_gem->mem_virtual) {
1453		struct drm_i915_gem_mmap mmap_arg;
1454
1455		DBG("bo_map: %d (%s), map_count=%d\n",
1456		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1457
1458		memclear(mmap_arg);
1459		mmap_arg.handle = bo_gem->gem_handle;
1460		mmap_arg.size = bo->size;
1461		ret = drmIoctl(bufmgr_gem->fd,
1462			       DRM_IOCTL_I915_GEM_MMAP,
1463			       &mmap_arg);
1464		if (ret != 0) {
1465			ret = -errno;
1466			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1467			    __FILE__, __LINE__, bo_gem->gem_handle,
1468			    bo_gem->name, strerror(errno));
1469			if (--bo_gem->map_count == 0)
1470				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1471			pthread_mutex_unlock(&bufmgr_gem->lock);
1472			return ret;
1473		}
1474		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1475		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1476	}
1477	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1478	    bo_gem->mem_virtual);
1479	bo->virtual = bo_gem->mem_virtual;
1480
1481	memclear(set_domain);
1482	set_domain.handle = bo_gem->gem_handle;
1483	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1484	if (write_enable)
1485		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1486	else
1487		set_domain.write_domain = 0;
1488	ret = drmIoctl(bufmgr_gem->fd,
1489		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1490		       &set_domain);
1491	if (ret != 0) {
1492		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1493		    __FILE__, __LINE__, bo_gem->gem_handle,
1494		    strerror(errno));
1495	}
1496
1497	if (write_enable)
1498		bo_gem->mapped_cpu_write = true;
1499
1500	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1501	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1502	pthread_mutex_unlock(&bufmgr_gem->lock);
1503
1504	return 0;
1505}
1506
1507static int
1508map_gtt(drm_intel_bo *bo)
1509{
1510	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1511	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1512	int ret;
1513
1514	if (bo_gem->is_userptr)
1515		return -EINVAL;
1516
1517	if (bo_gem->map_count++ == 0)
1518		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1519
1520	/* Get a mapping of the buffer if we haven't before. */
1521	if (bo_gem->gtt_virtual == NULL) {
1522		struct drm_i915_gem_mmap_gtt mmap_arg;
1523
1524		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1525		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1526
1527		memclear(mmap_arg);
1528		mmap_arg.handle = bo_gem->gem_handle;
1529
1530		/* Get the fake offset back... */
1531		ret = drmIoctl(bufmgr_gem->fd,
1532			       DRM_IOCTL_I915_GEM_MMAP_GTT,
1533			       &mmap_arg);
1534		if (ret != 0) {
1535			ret = -errno;
1536			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1537			    __FILE__, __LINE__,
1538			    bo_gem->gem_handle, bo_gem->name,
1539			    strerror(errno));
1540			if (--bo_gem->map_count == 0)
1541				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1542			return ret;
1543		}
1544
1545		/* and mmap it */
1546		ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size,
1547		    &bo_gem->gtt_virtual);
1548		if (ret) {
1549			bo_gem->gtt_virtual = NULL;
1550			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1551			    __FILE__, __LINE__,
1552			    bo_gem->gem_handle, bo_gem->name,
1553			    strerror(errno));
1554			if (--bo_gem->map_count == 0)
1555				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1556			return ret;
1557		}
1558	}
1559
1560	bo->virtual = bo_gem->gtt_virtual;
1561
1562	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1563	    bo_gem->gtt_virtual);
1564
1565	return 0;
1566}
1567
1568int
1569drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1570{
1571	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1572	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1573	struct drm_i915_gem_set_domain set_domain;
1574	int ret;
1575
1576	pthread_mutex_lock(&bufmgr_gem->lock);
1577
1578	ret = map_gtt(bo);
1579	if (ret) {
1580		pthread_mutex_unlock(&bufmgr_gem->lock);
1581		return ret;
1582	}
1583
1584	/* Now move it to the GTT domain so that the GPU and CPU
1585	 * caches are flushed and the GPU isn't actively using the
1586	 * buffer.
1587	 *
1588	 * The pagefault handler does this domain change for us when
1589	 * it has unbound the BO from the GTT, but it's up to us to
1590	 * tell it when we're about to use things if we had done
1591	 * rendering and it still happens to be bound to the GTT.
1592	 */
1593	memclear(set_domain);
1594	set_domain.handle = bo_gem->gem_handle;
1595	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1596	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1597	ret = drmIoctl(bufmgr_gem->fd,
1598		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1599		       &set_domain);
1600	if (ret != 0) {
1601		DBG("%s:%d: Error setting domain %d: %s\n",
1602		    __FILE__, __LINE__, bo_gem->gem_handle,
1603		    strerror(errno));
1604	}
1605
1606	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1607	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1608	pthread_mutex_unlock(&bufmgr_gem->lock);
1609
1610	return 0;
1611}
1612
1613/**
1614 * Performs a mapping of the buffer object like the normal GTT
1615 * mapping, but avoids waiting for the GPU to be done reading from or
1616 * rendering to the buffer.
1617 *
1618 * This is used in the implementation of GL_ARB_map_buffer_range: The
1619 * user asks to create a buffer, then does a mapping, fills some
1620 * space, runs a drawing command, then asks to map it again without
1621 * synchronizing because it guarantees that it won't write over the
1622 * data that the GPU is busy using (or, more specifically, that if it
1623 * does write over the data, it acknowledges that rendering is
1624 * undefined).
1625 */
1626
1627int
1628drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
1629{
1630	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1631#ifdef HAVE_VALGRIND
1632	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1633#endif
1634	int ret;
1635
1636	/* If the CPU cache isn't coherent with the GTT, then use a
1637	 * regular synchronized mapping.  The problem is that we don't
1638	 * track where the buffer was last used on the CPU side in
1639	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
1640	 * we would potentially corrupt the buffer even when the user
1641	 * does reasonable things.
1642	 */
1643	if (!bufmgr_gem->has_llc)
1644		return drm_intel_gem_bo_map_gtt(bo);
1645
1646	pthread_mutex_lock(&bufmgr_gem->lock);
1647
1648	ret = map_gtt(bo);
1649	if (ret == 0) {
1650		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1651		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1652	}
1653
1654	pthread_mutex_unlock(&bufmgr_gem->lock);
1655
1656	return ret;
1657}
1658
1659static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1660{
1661	drm_intel_bufmgr_gem *bufmgr_gem;
1662	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1663	int ret = 0;
1664
1665	if (bo == NULL)
1666		return 0;
1667
1668	if (bo_gem->is_userptr)
1669		return 0;
1670
1671	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1672
1673	pthread_mutex_lock(&bufmgr_gem->lock);
1674
1675	if (bo_gem->map_count <= 0) {
1676		DBG("attempted to unmap an unmapped bo\n");
1677		pthread_mutex_unlock(&bufmgr_gem->lock);
1678		/* Preserve the old behaviour of just treating this as a
1679		 * no-op rather than reporting the error.
1680		 */
1681		return 0;
1682	}
1683
1684	if (bo_gem->mapped_cpu_write) {
1685		struct drm_i915_gem_sw_finish sw_finish;
1686
1687		/* Cause a flush to happen if the buffer's pinned for
1688		 * scanout, so the results show up in a timely manner.
1689		 * Unlike GTT set domains, this only does work if the
1690		 * buffer should be scanout-related.
1691		 */
1692		memclear(sw_finish);
1693		sw_finish.handle = bo_gem->gem_handle;
1694		ret = drmIoctl(bufmgr_gem->fd,
1695			       DRM_IOCTL_I915_GEM_SW_FINISH,
1696			       &sw_finish);
1697		ret = ret == -1 ? -errno : 0;
1698
1699		bo_gem->mapped_cpu_write = false;
1700	}
1701
1702	/* We need to unmap after every innovation as we cannot track
1703	 * an open vma for every bo as that will exhaust the system
1704	 * limits and cause later failures.
1705	 */
1706	if (--bo_gem->map_count == 0) {
1707		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1708		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1709		bo->virtual = NULL;
1710	}
1711	pthread_mutex_unlock(&bufmgr_gem->lock);
1712
1713	return ret;
1714}
1715
1716int
1717drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1718{
1719	return drm_intel_gem_bo_unmap(bo);
1720}
1721
1722static int
1723drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1724			 unsigned long size, const void *data)
1725{
1726	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1727	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1728	struct drm_i915_gem_pwrite pwrite;
1729	int ret;
1730
1731	if (bo_gem->is_userptr)
1732		return -EINVAL;
1733
1734	memclear(pwrite);
1735	pwrite.handle = bo_gem->gem_handle;
1736	pwrite.offset = offset;
1737	pwrite.size = size;
1738	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1739	ret = drmIoctl(bufmgr_gem->fd,
1740		       DRM_IOCTL_I915_GEM_PWRITE,
1741		       &pwrite);
1742	if (ret != 0) {
1743		ret = -errno;
1744		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1745		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1746		    (int)size, strerror(errno));
1747	}
1748
1749	return ret;
1750}
1751
1752static int
1753drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1754{
1755	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1756	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1757	int ret;
1758
1759	memclear(get_pipe_from_crtc_id);
1760	get_pipe_from_crtc_id.crtc_id = crtc_id;
1761	ret = drmIoctl(bufmgr_gem->fd,
1762		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1763		       &get_pipe_from_crtc_id);
1764	if (ret != 0) {
1765		/* We return -1 here to signal that we don't
1766		 * know which pipe is associated with this crtc.
1767		 * This lets the caller know that this information
1768		 * isn't available; using the wrong pipe for
1769		 * vblank waiting can cause the chipset to lock up
1770		 */
1771		return -1;
1772	}
1773
1774	return get_pipe_from_crtc_id.pipe;
1775}
1776
1777static int
1778drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1779			     unsigned long size, void *data)
1780{
1781	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1782	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1783	struct drm_i915_gem_pread pread;
1784	int ret;
1785
1786	if (bo_gem->is_userptr)
1787		return -EINVAL;
1788
1789	memclear(pread);
1790	pread.handle = bo_gem->gem_handle;
1791	pread.offset = offset;
1792	pread.size = size;
1793	pread.data_ptr = (uint64_t) (uintptr_t) data;
1794	ret = drmIoctl(bufmgr_gem->fd,
1795		       DRM_IOCTL_I915_GEM_PREAD,
1796		       &pread);
1797	if (ret != 0) {
1798		ret = -errno;
1799		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1800		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1801		    (int)size, strerror(errno));
1802	}
1803
1804	return ret;
1805}
1806
1807/** Waits for all GPU rendering with the object to have completed. */
1808static void
1809drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1810{
1811	drm_intel_gem_bo_start_gtt_access(bo, 1);
1812}
1813
1814/**
1815 * Waits on a BO for the given amount of time.
1816 *
1817 * @bo: buffer object to wait for
1818 * @timeout_ns: amount of time to wait in nanoseconds.
1819 *   If value is less than 0, an infinite wait will occur.
1820 *
1821 * Returns 0 if the wait was successful ie. the last batch referencing the
1822 * object has completed within the allotted time. Otherwise some negative return
1823 * value describes the error. Of particular interest is -ETIME when the wait has
1824 * failed to yield the desired result.
1825 *
1826 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
1827 * the operation to give up after a certain amount of time. Another subtle
1828 * difference is the internal locking semantics are different (this variant does
1829 * not hold the lock for the duration of the wait). This makes the wait subject
1830 * to a larger userspace race window.
1831 *
1832 * The implementation shall wait until the object is no longer actively
1833 * referenced within a batch buffer at the time of the call. The wait will
1834 * not guarantee that the buffer is re-issued via another thread, or an flinked
1835 * handle. Userspace must make sure this race does not occur if such precision
1836 * is important.
1837 *
1838 * Note that some kernels have broken the inifite wait for negative values
1839 * promise, upgrade to latest stable kernels if this is the case.
1840 */
1841int
1842drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
1843{
1844	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1845	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1846	struct drm_i915_gem_wait wait;
1847	int ret;
1848
1849	if (!bufmgr_gem->has_wait_timeout) {
1850		DBG("%s:%d: Timed wait is not supported. Falling back to "
1851		    "infinite wait\n", __FILE__, __LINE__);
1852		if (timeout_ns) {
1853			drm_intel_gem_bo_wait_rendering(bo);
1854			return 0;
1855		} else {
1856			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
1857		}
1858	}
1859
1860	memclear(wait);
1861	wait.bo_handle = bo_gem->gem_handle;
1862	wait.timeout_ns = timeout_ns;
1863	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1864	if (ret == -1)
1865		return -errno;
1866
1867	return ret;
1868}
1869
1870/**
1871 * Sets the object to the GTT read and possibly write domain, used by the X
1872 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1873 *
1874 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1875 * can do tiled pixmaps this way.
1876 */
1877void
1878drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1879{
1880	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1881	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1882	struct drm_i915_gem_set_domain set_domain;
1883	int ret;
1884
1885	memclear(set_domain);
1886	set_domain.handle = bo_gem->gem_handle;
1887	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1888	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1889	ret = drmIoctl(bufmgr_gem->fd,
1890		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1891		       &set_domain);
1892	if (ret != 0) {
1893		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1894		    __FILE__, __LINE__, bo_gem->gem_handle,
1895		    set_domain.read_domains, set_domain.write_domain,
1896		    strerror(errno));
1897	}
1898}
1899
1900static void
1901drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1902{
1903	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1904	struct drm_gem_close close_bo;
1905	int i, ret;
1906
1907	free(bufmgr_gem->exec2_objects);
1908	free(bufmgr_gem->exec_objects);
1909	free(bufmgr_gem->exec_bos);
1910
1911	pthread_mutex_destroy(&bufmgr_gem->lock);
1912
1913	/* Free any cached buffer objects we were going to reuse */
1914	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1915		struct drm_intel_gem_bo_bucket *bucket =
1916		    &bufmgr_gem->cache_bucket[i];
1917		drm_intel_bo_gem *bo_gem;
1918
1919		while (!DRMLISTEMPTY(&bucket->head)) {
1920			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1921					      bucket->head.next, head);
1922			DRMLISTDEL(&bo_gem->head);
1923
1924			drm_intel_gem_bo_free(&bo_gem->bo);
1925		}
1926	}
1927
1928	/* Release userptr bo kept hanging around for optimisation. */
1929	if (bufmgr_gem->userptr_active.ptr) {
1930		memclear(close_bo);
1931		close_bo.handle = bufmgr_gem->userptr_active.handle;
1932		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
1933		free(bufmgr_gem->userptr_active.ptr);
1934		if (ret)
1935			fprintf(stderr,
1936				"Failed to release test userptr object! (%d) "
1937				"i915 kernel driver may not be sane!\n", errno);
1938	}
1939
1940	free(bufmgr);
1941}
1942
1943/**
1944 * Adds the target buffer to the validation list and adds the relocation
1945 * to the reloc_buffer's relocation list.
1946 *
1947 * The relocation entry at the given offset must already contain the
1948 * precomputed relocation value, because the kernel will optimize out
1949 * the relocation entry write when the buffer hasn't moved from the
1950 * last known offset in target_bo.
1951 */
1952static int
1953do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1954		 drm_intel_bo *target_bo, uint32_t target_offset,
1955		 uint32_t read_domains, uint32_t write_domain,
1956		 bool need_fence)
1957{
1958	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1959	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1960	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
1961	bool fenced_command;
1962
1963	if (bo_gem->has_error)
1964		return -ENOMEM;
1965
1966	if (target_bo_gem->has_error) {
1967		bo_gem->has_error = true;
1968		return -ENOMEM;
1969	}
1970
1971	/* We never use HW fences for rendering on 965+ */
1972	if (bufmgr_gem->gen >= 4)
1973		need_fence = false;
1974
1975	fenced_command = need_fence;
1976	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
1977		need_fence = false;
1978
1979	/* Create a new relocation list if needed */
1980	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
1981		return -ENOMEM;
1982
1983	/* Check overflow */
1984	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1985
1986	/* Check args */
1987	assert(offset <= bo->size - 4);
1988	assert((write_domain & (write_domain - 1)) == 0);
1989
1990	/* An object needing a fence is a tiled buffer, so it won't have
1991	 * relocs to other buffers.
1992	 */
1993	if (need_fence) {
1994		assert(target_bo_gem->reloc_count == 0);
1995		target_bo_gem->reloc_tree_fences = 1;
1996	}
1997
1998	/* Make sure that we're not adding a reloc to something whose size has
1999	 * already been accounted for.
2000	 */
2001	assert(!bo_gem->used_as_reloc_target);
2002	if (target_bo_gem != bo_gem) {
2003		target_bo_gem->used_as_reloc_target = true;
2004		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
2005		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
2006	}
2007
2008	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
2009	if (target_bo != bo)
2010		drm_intel_gem_bo_reference(target_bo);
2011	if (fenced_command)
2012		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
2013			DRM_INTEL_RELOC_FENCE;
2014	else
2015		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
2016
2017	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
2018	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
2019	bo_gem->relocs[bo_gem->reloc_count].target_handle =
2020	    target_bo_gem->gem_handle;
2021	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
2022	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
2023	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
2024	bo_gem->reloc_count++;
2025
2026	return 0;
2027}
2028
2029static void
2030drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
2031{
2032	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2033
2034	if (enable)
2035		bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2036	else
2037		bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2038}
2039
2040static int
2041drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo)
2042{
2043	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2044	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2045	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2046	if (bo_gem->has_error)
2047		return -ENOMEM;
2048
2049	if (target_bo_gem->has_error) {
2050		bo_gem->has_error = true;
2051		return -ENOMEM;
2052	}
2053
2054	if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
2055		return -EINVAL;
2056	if (target_bo_gem == bo_gem)
2057		return -EINVAL;
2058
2059	if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
2060		int new_size = bo_gem->softpin_target_size * 2;
2061		if (new_size == 0)
2062			new_size = bufmgr_gem->max_relocs;
2063
2064		bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
2065				sizeof(drm_intel_bo *));
2066		if (!bo_gem->softpin_target)
2067			return -ENOMEM;
2068
2069		bo_gem->softpin_target_size = new_size;
2070	}
2071	bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
2072	drm_intel_gem_bo_reference(target_bo);
2073	bo_gem->softpin_target_count++;
2074
2075	return 0;
2076}
2077
2078static int
2079drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
2080			    drm_intel_bo *target_bo, uint32_t target_offset,
2081			    uint32_t read_domains, uint32_t write_domain)
2082{
2083	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2084	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
2085
2086	if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
2087		return drm_intel_gem_bo_add_softpin_target(bo, target_bo);
2088	else
2089		return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2090					read_domains, write_domain,
2091					!bufmgr_gem->fenced_relocs);
2092}
2093
2094static int
2095drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
2096				  drm_intel_bo *target_bo,
2097				  uint32_t target_offset,
2098				  uint32_t read_domains, uint32_t write_domain)
2099{
2100	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2101				read_domains, write_domain, true);
2102}
2103
2104int
2105drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
2106{
2107	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2108
2109	return bo_gem->reloc_count;
2110}
2111
2112/**
2113 * Removes existing relocation entries in the BO after "start".
2114 *
2115 * This allows a user to avoid a two-step process for state setup with
2116 * counting up all the buffer objects and doing a
2117 * drm_intel_bufmgr_check_aperture_space() before emitting any of the
2118 * relocations for the state setup.  Instead, save the state of the
2119 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
2120 * state, and then check if it still fits in the aperture.
2121 *
2122 * Any further drm_intel_bufmgr_check_aperture_space() queries
2123 * involving this buffer in the tree are undefined after this call.
2124 *
2125 * This also removes all softpinned targets being referenced by the BO.
2126 */
2127void
2128drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
2129{
2130	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2131	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2132	int i;
2133	struct timespec time;
2134
2135	clock_gettime(CLOCK_MONOTONIC, &time);
2136
2137	assert(bo_gem->reloc_count >= start);
2138
2139	/* Unreference the cleared target buffers */
2140	pthread_mutex_lock(&bufmgr_gem->lock);
2141
2142	for (i = start; i < bo_gem->reloc_count; i++) {
2143		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
2144		if (&target_bo_gem->bo != bo) {
2145			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
2146			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
2147								  time.tv_sec);
2148		}
2149	}
2150	bo_gem->reloc_count = start;
2151
2152	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2153		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i];
2154		drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
2155	}
2156	bo_gem->softpin_target_count = 0;
2157
2158	pthread_mutex_unlock(&bufmgr_gem->lock);
2159
2160}
2161
2162/**
2163 * Walk the tree of relocations rooted at BO and accumulate the list of
2164 * validations to be performed and update the relocation buffers with
2165 * index values into the validation list.
2166 */
2167static void
2168drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
2169{
2170	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2171	int i;
2172
2173	if (bo_gem->relocs == NULL)
2174		return;
2175
2176	for (i = 0; i < bo_gem->reloc_count; i++) {
2177		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2178
2179		if (target_bo == bo)
2180			continue;
2181
2182		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2183
2184		/* Continue walking the tree depth-first. */
2185		drm_intel_gem_bo_process_reloc(target_bo);
2186
2187		/* Add the target to the validate list */
2188		drm_intel_add_validate_buffer(target_bo);
2189	}
2190}
2191
2192static void
2193drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
2194{
2195	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2196	int i;
2197
2198	if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
2199		return;
2200
2201	for (i = 0; i < bo_gem->reloc_count; i++) {
2202		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2203		int need_fence;
2204
2205		if (target_bo == bo)
2206			continue;
2207
2208		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2209
2210		/* Continue walking the tree depth-first. */
2211		drm_intel_gem_bo_process_reloc2(target_bo);
2212
2213		need_fence = (bo_gem->reloc_target_info[i].flags &
2214			      DRM_INTEL_RELOC_FENCE);
2215
2216		/* Add the target to the validate list */
2217		drm_intel_add_validate_buffer2(target_bo, need_fence);
2218	}
2219
2220	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2221		drm_intel_bo *target_bo = bo_gem->softpin_target[i];
2222
2223		if (target_bo == bo)
2224			continue;
2225
2226		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2227		drm_intel_gem_bo_process_reloc2(target_bo);
2228		drm_intel_add_validate_buffer2(target_bo, false);
2229	}
2230}
2231
2232
2233static void
2234drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
2235{
2236	int i;
2237
2238	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2239		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2240		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2241
2242		/* Update the buffer offset */
2243		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
2244			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2245			    bo_gem->gem_handle, bo_gem->name,
2246			    upper_32_bits(bo->offset64),
2247			    lower_32_bits(bo->offset64),
2248			    upper_32_bits(bufmgr_gem->exec_objects[i].offset),
2249			    lower_32_bits(bufmgr_gem->exec_objects[i].offset));
2250			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
2251			bo->offset = bufmgr_gem->exec_objects[i].offset;
2252		}
2253	}
2254}
2255
2256static void
2257drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
2258{
2259	int i;
2260
2261	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2262		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2263		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2264
2265		/* Update the buffer offset */
2266		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2267			/* If we're seeing softpinned object here it means that the kernel
2268			 * has relocated our object... Indicating a programming error
2269			 */
2270			assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
2271			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2272			    bo_gem->gem_handle, bo_gem->name,
2273			    upper_32_bits(bo->offset64),
2274			    lower_32_bits(bo->offset64),
2275			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
2276			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
2277			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
2278			bo->offset = bufmgr_gem->exec2_objects[i].offset;
2279		}
2280	}
2281}
2282
2283void
2284drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
2285			      int x1, int y1, int width, int height,
2286			      enum aub_dump_bmp_format format,
2287			      int pitch, int offset)
2288{
2289}
2290
2291static int
2292drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
2293		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
2294{
2295	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2296	struct drm_i915_gem_execbuffer execbuf;
2297	int ret, i;
2298
2299	if (to_bo_gem(bo)->has_error)
2300		return -ENOMEM;
2301
2302	pthread_mutex_lock(&bufmgr_gem->lock);
2303	/* Update indices and set up the validate list. */
2304	drm_intel_gem_bo_process_reloc(bo);
2305
2306	/* Add the batch buffer to the validation list.  There are no
2307	 * relocations pointing to it.
2308	 */
2309	drm_intel_add_validate_buffer(bo);
2310
2311	memclear(execbuf);
2312	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
2313	execbuf.buffer_count = bufmgr_gem->exec_count;
2314	execbuf.batch_start_offset = 0;
2315	execbuf.batch_len = used;
2316	execbuf.cliprects_ptr = (uintptr_t) cliprects;
2317	execbuf.num_cliprects = num_cliprects;
2318	execbuf.DR1 = 0;
2319	execbuf.DR4 = DR4;
2320
2321	ret = drmIoctl(bufmgr_gem->fd,
2322		       DRM_IOCTL_I915_GEM_EXECBUFFER,
2323		       &execbuf);
2324	if (ret != 0) {
2325		ret = -errno;
2326		if (errno == ENOSPC) {
2327			DBG("Execbuffer fails to pin. "
2328			    "Estimate: %u. Actual: %u. Available: %u\n",
2329			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2330							       bufmgr_gem->
2331							       exec_count),
2332			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2333							      bufmgr_gem->
2334							      exec_count),
2335			    (unsigned int)bufmgr_gem->gtt_size);
2336		}
2337	}
2338	drm_intel_update_buffer_offsets(bufmgr_gem);
2339
2340	if (bufmgr_gem->bufmgr.debug)
2341		drm_intel_gem_dump_validation_list(bufmgr_gem);
2342
2343	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2344		drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2345
2346		bo_gem->idle = false;
2347
2348		/* Disconnect the buffer from the validate list */
2349		bo_gem->validate_index = -1;
2350		bufmgr_gem->exec_bos[i] = NULL;
2351	}
2352	bufmgr_gem->exec_count = 0;
2353	pthread_mutex_unlock(&bufmgr_gem->lock);
2354
2355	return ret;
2356}
2357
2358static int
2359do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
2360	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2361	 int in_fence, int *out_fence,
2362	 unsigned int flags)
2363{
2364	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2365	struct drm_i915_gem_execbuffer2 execbuf;
2366	int ret = 0;
2367	int i;
2368
2369	if (to_bo_gem(bo)->has_error)
2370		return -ENOMEM;
2371
2372	switch (flags & 0x7) {
2373	default:
2374		return -EINVAL;
2375	case I915_EXEC_BLT:
2376		if (!bufmgr_gem->has_blt)
2377			return -EINVAL;
2378		break;
2379	case I915_EXEC_BSD:
2380		if (!bufmgr_gem->has_bsd)
2381			return -EINVAL;
2382		break;
2383	case I915_EXEC_VEBOX:
2384		if (!bufmgr_gem->has_vebox)
2385			return -EINVAL;
2386		break;
2387	case I915_EXEC_RENDER:
2388	case I915_EXEC_DEFAULT:
2389		break;
2390	}
2391
2392	pthread_mutex_lock(&bufmgr_gem->lock);
2393	/* Update indices and set up the validate list. */
2394	drm_intel_gem_bo_process_reloc2(bo);
2395
2396	/* Add the batch buffer to the validation list.  There are no relocations
2397	 * pointing to it.
2398	 */
2399	drm_intel_add_validate_buffer2(bo, 0);
2400
2401	memclear(execbuf);
2402	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2403	execbuf.buffer_count = bufmgr_gem->exec_count;
2404	execbuf.batch_start_offset = 0;
2405	execbuf.batch_len = used;
2406	execbuf.cliprects_ptr = (uintptr_t)cliprects;
2407	execbuf.num_cliprects = num_cliprects;
2408	execbuf.DR1 = 0;
2409	execbuf.DR4 = DR4;
2410	execbuf.flags = flags;
2411	if (ctx == NULL)
2412		i915_execbuffer2_set_context_id(execbuf, 0);
2413	else
2414		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2415	execbuf.rsvd2 = 0;
2416	if (in_fence != -1) {
2417		execbuf.rsvd2 = in_fence;
2418		execbuf.flags |= I915_EXEC_FENCE_IN;
2419	}
2420	if (out_fence != NULL) {
2421		*out_fence = -1;
2422		execbuf.flags |= I915_EXEC_FENCE_OUT;
2423	}
2424
2425	if (bufmgr_gem->no_exec)
2426		goto skip_execution;
2427
2428	ret = drmIoctl(bufmgr_gem->fd,
2429		       DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2430		       &execbuf);
2431	if (ret != 0) {
2432		ret = -errno;
2433		if (ret == -ENOSPC) {
2434			DBG("Execbuffer fails to pin. "
2435			    "Estimate: %u. Actual: %u. Available: %u\n",
2436			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2437							       bufmgr_gem->exec_count),
2438			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2439							      bufmgr_gem->exec_count),
2440			    (unsigned int) bufmgr_gem->gtt_size);
2441		}
2442	}
2443	drm_intel_update_buffer_offsets2(bufmgr_gem);
2444
2445	if (ret == 0 && out_fence != NULL)
2446		*out_fence = execbuf.rsvd2 >> 32;
2447
2448skip_execution:
2449	if (bufmgr_gem->bufmgr.debug)
2450		drm_intel_gem_dump_validation_list(bufmgr_gem);
2451
2452	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2453		drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2454
2455		bo_gem->idle = false;
2456
2457		/* Disconnect the buffer from the validate list */
2458		bo_gem->validate_index = -1;
2459		bufmgr_gem->exec_bos[i] = NULL;
2460	}
2461	bufmgr_gem->exec_count = 0;
2462	pthread_mutex_unlock(&bufmgr_gem->lock);
2463
2464	return ret;
2465}
2466
2467static int
2468drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2469		       drm_clip_rect_t *cliprects, int num_cliprects,
2470		       int DR4)
2471{
2472	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2473			-1, NULL, I915_EXEC_RENDER);
2474}
2475
2476static int
2477drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
2478			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2479			unsigned int flags)
2480{
2481	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2482			-1, NULL, flags);
2483}
2484
2485int
2486drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
2487			      int used, unsigned int flags)
2488{
2489	return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags);
2490}
2491
2492int
2493drm_intel_gem_bo_fence_exec(drm_intel_bo *bo,
2494			    drm_intel_context *ctx,
2495			    int used,
2496			    int in_fence,
2497			    int *out_fence,
2498			    unsigned int flags)
2499{
2500	return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags);
2501}
2502
2503static int
2504drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
2505{
2506	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2507	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2508	struct drm_i915_gem_pin pin;
2509	int ret;
2510
2511	memclear(pin);
2512	pin.handle = bo_gem->gem_handle;
2513	pin.alignment = alignment;
2514
2515	ret = drmIoctl(bufmgr_gem->fd,
2516		       DRM_IOCTL_I915_GEM_PIN,
2517		       &pin);
2518	if (ret != 0)
2519		return -errno;
2520
2521	bo->offset64 = pin.offset;
2522	bo->offset = pin.offset;
2523	return 0;
2524}
2525
2526static int
2527drm_intel_gem_bo_unpin(drm_intel_bo *bo)
2528{
2529	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2530	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2531	struct drm_i915_gem_unpin unpin;
2532	int ret;
2533
2534	memclear(unpin);
2535	unpin.handle = bo_gem->gem_handle;
2536
2537	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
2538	if (ret != 0)
2539		return -errno;
2540
2541	return 0;
2542}
2543
2544static int
2545drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2546				     uint32_t tiling_mode,
2547				     uint32_t stride)
2548{
2549	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2550	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2551	struct drm_i915_gem_set_tiling set_tiling;
2552	int ret;
2553
2554	if (bo_gem->global_name == 0 &&
2555	    tiling_mode == bo_gem->tiling_mode &&
2556	    stride == bo_gem->stride)
2557		return 0;
2558
2559	memset(&set_tiling, 0, sizeof(set_tiling));
2560	do {
2561		/* set_tiling is slightly broken and overwrites the
2562		 * input on the error path, so we have to open code
2563		 * rmIoctl.
2564		 */
2565		set_tiling.handle = bo_gem->gem_handle;
2566		set_tiling.tiling_mode = tiling_mode;
2567		set_tiling.stride = stride;
2568
2569		ret = ioctl(bufmgr_gem->fd,
2570			    DRM_IOCTL_I915_GEM_SET_TILING,
2571			    &set_tiling);
2572	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2573	if (ret == -1)
2574		return -errno;
2575
2576	bo_gem->tiling_mode = set_tiling.tiling_mode;
2577	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2578	bo_gem->stride = set_tiling.stride;
2579	return 0;
2580}
2581
2582static int
2583drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2584			    uint32_t stride)
2585{
2586	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2587	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2588	int ret;
2589
2590	/* Tiling with userptr surfaces is not supported
2591	 * on all hardware so refuse it for time being.
2592	 */
2593	if (bo_gem->is_userptr)
2594		return -EINVAL;
2595
2596	/* Linear buffers have no stride. By ensuring that we only ever use
2597	 * stride 0 with linear buffers, we simplify our code.
2598	 */
2599	if (*tiling_mode == I915_TILING_NONE)
2600		stride = 0;
2601
2602	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2603	if (ret == 0)
2604		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2605
2606	*tiling_mode = bo_gem->tiling_mode;
2607	return ret;
2608}
2609
2610static int
2611drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2612			    uint32_t * swizzle_mode)
2613{
2614	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2615
2616	*tiling_mode = bo_gem->tiling_mode;
2617	*swizzle_mode = bo_gem->swizzle_mode;
2618	return 0;
2619}
2620
2621static int
2622drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
2623{
2624	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2625
2626	bo->offset64 = offset;
2627	bo->offset = offset;
2628	bo_gem->kflags |= EXEC_OBJECT_PINNED;
2629
2630	return 0;
2631}
2632
2633drm_intel_bo *
2634drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
2635{
2636	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2637	int ret;
2638	uint32_t handle;
2639	drm_intel_bo_gem *bo_gem;
2640	struct drm_i915_gem_get_tiling get_tiling;
2641
2642	pthread_mutex_lock(&bufmgr_gem->lock);
2643	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2644	if (ret) {
2645		DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2646		pthread_mutex_unlock(&bufmgr_gem->lock);
2647		return NULL;
2648	}
2649
2650	/*
2651	 * See if the kernel has already returned this buffer to us. Just as
2652	 * for named buffers, we must not create two bo's pointing at the same
2653	 * kernel object
2654	 */
2655	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
2656		  &handle, sizeof(handle), bo_gem);
2657	if (bo_gem) {
2658		drm_intel_gem_bo_reference(&bo_gem->bo);
2659		goto out;
2660	}
2661
2662	bo_gem = calloc(1, sizeof(*bo_gem));
2663	if (!bo_gem)
2664		goto out;
2665
2666	atomic_set(&bo_gem->refcount, 1);
2667	DRMINITLISTHEAD(&bo_gem->vma_list);
2668
2669	/* Determine size of bo.  The fd-to-handle ioctl really should
2670	 * return the size, but it doesn't.  If we have kernel 3.12 or
2671	 * later, we can lseek on the prime fd to get the size.  Older
2672	 * kernels will just fail, in which case we fall back to the
2673	 * provided (estimated or guess size). */
2674	ret = lseek(prime_fd, 0, SEEK_END);
2675	if (ret != -1)
2676		bo_gem->bo.size = ret;
2677	else
2678		bo_gem->bo.size = size;
2679
2680	bo_gem->bo.handle = handle;
2681	bo_gem->bo.bufmgr = bufmgr;
2682
2683	bo_gem->gem_handle = handle;
2684	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
2685		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2686
2687	bo_gem->name = "prime";
2688	bo_gem->validate_index = -1;
2689	bo_gem->reloc_tree_fences = 0;
2690	bo_gem->used_as_reloc_target = false;
2691	bo_gem->has_error = false;
2692	bo_gem->reusable = false;
2693
2694	memclear(get_tiling);
2695	get_tiling.handle = bo_gem->gem_handle;
2696	if (drmIoctl(bufmgr_gem->fd,
2697		     DRM_IOCTL_I915_GEM_GET_TILING,
2698		     &get_tiling))
2699		goto err;
2700
2701	bo_gem->tiling_mode = get_tiling.tiling_mode;
2702	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
2703	/* XXX stride is unknown */
2704	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2705
2706out:
2707	pthread_mutex_unlock(&bufmgr_gem->lock);
2708	return &bo_gem->bo;
2709
2710err:
2711	drm_intel_gem_bo_free(&bo_gem->bo);
2712	pthread_mutex_unlock(&bufmgr_gem->lock);
2713	return NULL;
2714}
2715
2716int
2717drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
2718{
2719	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2720	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2721
2722	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2723			       DRM_CLOEXEC, prime_fd) != 0)
2724		return -errno;
2725
2726	bo_gem->reusable = false;
2727
2728	return 0;
2729}
2730
2731static int
2732drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
2733{
2734	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2735	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2736
2737	if (!bo_gem->global_name) {
2738		struct drm_gem_flink flink;
2739
2740		memclear(flink);
2741		flink.handle = bo_gem->gem_handle;
2742		if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink))
2743			return -errno;
2744
2745		pthread_mutex_lock(&bufmgr_gem->lock);
2746		if (!bo_gem->global_name) {
2747			bo_gem->global_name = flink.name;
2748			bo_gem->reusable = false;
2749
2750			HASH_ADD(name_hh, bufmgr_gem->name_table,
2751				 global_name, sizeof(bo_gem->global_name),
2752				 bo_gem);
2753		}
2754		pthread_mutex_unlock(&bufmgr_gem->lock);
2755	}
2756
2757	*name = bo_gem->global_name;
2758	return 0;
2759}
2760
2761/**
2762 * Enables unlimited caching of buffer objects for reuse.
2763 *
2764 * This is potentially very memory expensive, as the cache at each bucket
2765 * size is only bounded by how many buffers of that size we've managed to have
2766 * in flight at once.
2767 */
2768void
2769drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
2770{
2771	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2772
2773	bufmgr_gem->bo_reuse = true;
2774}
2775
2776/**
2777 * Disables implicit synchronisation before executing the bo
2778 *
2779 * This will cause rendering corruption unless you correctly manage explicit
2780 * fences for all rendering involving this buffer - including use by others.
2781 * Disabling the implicit serialisation is only required if that serialisation
2782 * is too coarse (for example, you have split the buffer into many
2783 * non-overlapping regions and are sharing the whole buffer between concurrent
2784 * independent command streams).
2785 *
2786 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2787 * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync,
2788 * or subsequent execbufs involving the bo will generate EINVAL.
2789 */
2790void
2791drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo)
2792{
2793	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2794
2795	bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2796}
2797
2798/**
2799 * Enables implicit synchronisation before executing the bo
2800 *
2801 * This is the default behaviour of the kernel, to wait upon prior writes
2802 * completing on the object before rendering with it, or to wait for prior
2803 * reads to complete before writing into the object.
2804 * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2805 * the kernel never to insert a stall before using the object. Then this
2806 * function can be used to restore the implicit sync before subsequent
2807 * rendering.
2808 */
2809void
2810drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo)
2811{
2812	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2813
2814	bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2815}
2816
2817/**
2818 * Query whether the kernel supports disabling of its implicit synchronisation
2819 * before execbuf. See drm_intel_gem_bo_disable_implicit_sync()
2820 */
2821int
2822drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr)
2823{
2824	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2825
2826	return bufmgr_gem->has_exec_async;
2827}
2828
2829/**
2830 * Enable use of fenced reloc type.
2831 *
2832 * New code should enable this to avoid unnecessary fence register
2833 * allocation.  If this option is not enabled, all relocs will have fence
2834 * register allocated.
2835 */
2836void
2837drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
2838{
2839	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2840
2841	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
2842		bufmgr_gem->fenced_relocs = true;
2843}
2844
2845/**
2846 * Return the additional aperture space required by the tree of buffer objects
2847 * rooted at bo.
2848 */
2849static int
2850drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
2851{
2852	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2853	int i;
2854	int total = 0;
2855
2856	if (bo == NULL || bo_gem->included_in_check_aperture)
2857		return 0;
2858
2859	total += bo->size;
2860	bo_gem->included_in_check_aperture = true;
2861
2862	for (i = 0; i < bo_gem->reloc_count; i++)
2863		total +=
2864		    drm_intel_gem_bo_get_aperture_space(bo_gem->
2865							reloc_target_info[i].bo);
2866
2867	return total;
2868}
2869
2870/**
2871 * Count the number of buffers in this list that need a fence reg
2872 *
2873 * If the count is greater than the number of available regs, we'll have
2874 * to ask the caller to resubmit a batch with fewer tiled buffers.
2875 *
2876 * This function over-counts if the same buffer is used multiple times.
2877 */
2878static unsigned int
2879drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
2880{
2881	int i;
2882	unsigned int total = 0;
2883
2884	for (i = 0; i < count; i++) {
2885		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2886
2887		if (bo_gem == NULL)
2888			continue;
2889
2890		total += bo_gem->reloc_tree_fences;
2891	}
2892	return total;
2893}
2894
2895/**
2896 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
2897 * for the next drm_intel_bufmgr_check_aperture_space() call.
2898 */
2899static void
2900drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
2901{
2902	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2903	int i;
2904
2905	if (bo == NULL || !bo_gem->included_in_check_aperture)
2906		return;
2907
2908	bo_gem->included_in_check_aperture = false;
2909
2910	for (i = 0; i < bo_gem->reloc_count; i++)
2911		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
2912							   reloc_target_info[i].bo);
2913}
2914
2915/**
2916 * Return a conservative estimate for the amount of aperture required
2917 * for a collection of buffers. This may double-count some buffers.
2918 */
2919static unsigned int
2920drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
2921{
2922	int i;
2923	unsigned int total = 0;
2924
2925	for (i = 0; i < count; i++) {
2926		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2927		if (bo_gem != NULL)
2928			total += bo_gem->reloc_tree_size;
2929	}
2930	return total;
2931}
2932
2933/**
2934 * Return the amount of aperture needed for a collection of buffers.
2935 * This avoids double counting any buffers, at the cost of looking
2936 * at every buffer in the set.
2937 */
2938static unsigned int
2939drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
2940{
2941	int i;
2942	unsigned int total = 0;
2943
2944	for (i = 0; i < count; i++) {
2945		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
2946		/* For the first buffer object in the array, we get an
2947		 * accurate count back for its reloc_tree size (since nothing
2948		 * had been flagged as being counted yet).  We can save that
2949		 * value out as a more conservative reloc_tree_size that
2950		 * avoids double-counting target buffers.  Since the first
2951		 * buffer happens to usually be the batch buffer in our
2952		 * callers, this can pull us back from doing the tree
2953		 * walk on every new batch emit.
2954		 */
2955		if (i == 0) {
2956			drm_intel_bo_gem *bo_gem =
2957			    (drm_intel_bo_gem *) bo_array[i];
2958			bo_gem->reloc_tree_size = total;
2959		}
2960	}
2961
2962	for (i = 0; i < count; i++)
2963		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
2964	return total;
2965}
2966
2967/**
2968 * Return -1 if the batchbuffer should be flushed before attempting to
2969 * emit rendering referencing the buffers pointed to by bo_array.
2970 *
2971 * This is required because if we try to emit a batchbuffer with relocations
2972 * to a tree of buffers that won't simultaneously fit in the aperture,
2973 * the rendering will return an error at a point where the software is not
2974 * prepared to recover from it.
2975 *
2976 * However, we also want to emit the batchbuffer significantly before we reach
2977 * the limit, as a series of batchbuffers each of which references buffers
2978 * covering almost all of the aperture means that at each emit we end up
2979 * waiting to evict a buffer from the last rendering, and we get synchronous
2980 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
2981 * get better parallelism.
2982 */
2983static int
2984drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
2985{
2986	drm_intel_bufmgr_gem *bufmgr_gem =
2987	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
2988	unsigned int total = 0;
2989	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2990	int total_fences;
2991
2992	/* Check for fence reg constraints if necessary */
2993	if (bufmgr_gem->available_fences) {
2994		total_fences = drm_intel_gem_total_fences(bo_array, count);
2995		if (total_fences > bufmgr_gem->available_fences)
2996			return -ENOSPC;
2997	}
2998
2999	total = drm_intel_gem_estimate_batch_space(bo_array, count);
3000
3001	if (total > threshold)
3002		total = drm_intel_gem_compute_batch_space(bo_array, count);
3003
3004	if (total > threshold) {
3005		DBG("check_space: overflowed available aperture, "
3006		    "%dkb vs %dkb\n",
3007		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
3008		return -ENOSPC;
3009	} else {
3010		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
3011		    (int)bufmgr_gem->gtt_size / 1024);
3012		return 0;
3013	}
3014}
3015
3016/*
3017 * Disable buffer reuse for objects which are shared with the kernel
3018 * as scanout buffers
3019 */
3020static int
3021drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
3022{
3023	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3024
3025	bo_gem->reusable = false;
3026	return 0;
3027}
3028
3029static int
3030drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
3031{
3032	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3033
3034	return bo_gem->reusable;
3035}
3036
3037static int
3038_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
3039{
3040	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3041	int i;
3042
3043	for (i = 0; i < bo_gem->reloc_count; i++) {
3044		if (bo_gem->reloc_target_info[i].bo == target_bo)
3045			return 1;
3046		if (bo == bo_gem->reloc_target_info[i].bo)
3047			continue;
3048		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
3049						target_bo))
3050			return 1;
3051	}
3052
3053	for (i = 0; i< bo_gem->softpin_target_count; i++) {
3054		if (bo_gem->softpin_target[i] == target_bo)
3055			return 1;
3056		if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo))
3057			return 1;
3058	}
3059
3060	return 0;
3061}
3062
3063/** Return true if target_bo is referenced by bo's relocation tree. */
3064static int
3065drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
3066{
3067	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
3068
3069	if (bo == NULL || target_bo == NULL)
3070		return 0;
3071	if (target_bo_gem->used_as_reloc_target)
3072		return _drm_intel_gem_bo_references(bo, target_bo);
3073	return 0;
3074}
3075
3076static void
3077add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
3078{
3079	unsigned int i = bufmgr_gem->num_buckets;
3080
3081	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3082
3083	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3084	bufmgr_gem->cache_bucket[i].size = size;
3085	bufmgr_gem->num_buckets++;
3086}
3087
3088static void
3089init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
3090{
3091	unsigned long size, cache_max_size = 64 * 1024 * 1024;
3092
3093	/* OK, so power of two buckets was too wasteful of memory.
3094	 * Give 3 other sizes between each power of two, to hopefully
3095	 * cover things accurately enough.  (The alternative is
3096	 * probably to just go for exact matching of sizes, and assume
3097	 * that for things like composited window resize the tiled
3098	 * width/height alignment and rounding of sizes to pages will
3099	 * get us useful cache hit rates anyway)
3100	 */
3101	add_bucket(bufmgr_gem, 4096);
3102	add_bucket(bufmgr_gem, 4096 * 2);
3103	add_bucket(bufmgr_gem, 4096 * 3);
3104
3105	/* Initialize the linked lists for BO reuse cache. */
3106	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3107		add_bucket(bufmgr_gem, size);
3108
3109		add_bucket(bufmgr_gem, size + size * 1 / 4);
3110		add_bucket(bufmgr_gem, size + size * 2 / 4);
3111		add_bucket(bufmgr_gem, size + size * 3 / 4);
3112	}
3113}
3114
3115void
3116drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
3117{
3118	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3119
3120	bufmgr_gem->vma_max = limit;
3121
3122	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
3123}
3124
3125static int
3126parse_devid_override(const char *devid_override)
3127{
3128	static const struct {
3129		const char *name;
3130		int pci_id;
3131	} name_map[] = {
3132		{ "brw", PCI_CHIP_I965_GM },
3133		{ "g4x", PCI_CHIP_GM45_GM },
3134		{ "ilk", PCI_CHIP_ILD_G },
3135		{ "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS },
3136		{ "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 },
3137		{ "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 },
3138		{ "byt", PCI_CHIP_VALLEYVIEW_3 },
3139		{ "bdw", 0x1620 | BDW_ULX },
3140		{ "skl", PCI_CHIP_SKYLAKE_DT_GT2 },
3141		{ "kbl", PCI_CHIP_KABYLAKE_DT_GT2 },
3142	};
3143	unsigned int i;
3144
3145	for (i = 0; i < ARRAY_SIZE(name_map); i++) {
3146		if (!strcmp(name_map[i].name, devid_override))
3147			return name_map[i].pci_id;
3148	}
3149
3150	return strtod(devid_override, NULL);
3151}
3152
3153/**
3154 * Get the PCI ID for the device.  This can be overridden by setting the
3155 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
3156 */
3157static int
3158get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
3159{
3160	char *devid_override;
3161	int devid = 0;
3162	int ret;
3163	drm_i915_getparam_t gp;
3164
3165	if (geteuid() == getuid()) {
3166		devid_override = getenv("INTEL_DEVID_OVERRIDE");
3167		if (devid_override) {
3168			bufmgr_gem->no_exec = true;
3169			return parse_devid_override(devid_override);
3170		}
3171	}
3172
3173	memclear(gp);
3174	gp.param = I915_PARAM_CHIPSET_ID;
3175	gp.value = &devid;
3176	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3177	if (ret) {
3178		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
3179		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
3180	}
3181	return devid;
3182}
3183
3184int
3185drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
3186{
3187	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3188
3189	return bufmgr_gem->pci_device;
3190}
3191
3192/**
3193 * Sets the AUB filename.
3194 *
3195 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
3196 * for it to have any effect.
3197 */
3198void
3199drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
3200				      const char *filename)
3201{
3202}
3203
3204/**
3205 * Sets up AUB dumping.
3206 *
3207 * This is a trace file format that can be used with the simulator.
3208 * Packets are emitted in a format somewhat like GPU command packets.
3209 * You can set up a GTT and upload your objects into the referenced
3210 * space, then send off batchbuffers and get BMPs out the other end.
3211 */
3212void
3213drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
3214{
3215	fprintf(stderr, "libdrm aub dumping is deprecated.\n\n"
3216		"Use intel_aubdump from intel-gpu-tools instead.  Install intel-gpu-tools,\n"
3217		"then run (for example)\n\n"
3218		"\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n"
3219		"See the intel_aubdump man page for more details.\n");
3220}
3221
3222drm_intel_context *
3223drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
3224{
3225	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3226	struct drm_i915_gem_context_create create;
3227	drm_intel_context *context = NULL;
3228	int ret;
3229
3230	context = calloc(1, sizeof(*context));
3231	if (!context)
3232		return NULL;
3233
3234	memclear(create);
3235	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
3236	if (ret != 0) {
3237		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
3238		    strerror(errno));
3239		free(context);
3240		return NULL;
3241	}
3242
3243	context->ctx_id = create.ctx_id;
3244	context->bufmgr = bufmgr;
3245
3246	return context;
3247}
3248
3249int
3250drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id)
3251{
3252	if (ctx == NULL)
3253		return -EINVAL;
3254
3255	*ctx_id = ctx->ctx_id;
3256
3257	return 0;
3258}
3259
3260void
3261drm_intel_gem_context_destroy(drm_intel_context *ctx)
3262{
3263	drm_intel_bufmgr_gem *bufmgr_gem;
3264	struct drm_i915_gem_context_destroy destroy;
3265	int ret;
3266
3267	if (ctx == NULL)
3268		return;
3269
3270	memclear(destroy);
3271
3272	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3273	destroy.ctx_id = ctx->ctx_id;
3274	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
3275		       &destroy);
3276	if (ret != 0)
3277		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
3278			strerror(errno));
3279
3280	free(ctx);
3281}
3282
3283int
3284drm_intel_get_reset_stats(drm_intel_context *ctx,
3285			  uint32_t *reset_count,
3286			  uint32_t *active,
3287			  uint32_t *pending)
3288{
3289	drm_intel_bufmgr_gem *bufmgr_gem;
3290	struct drm_i915_reset_stats stats;
3291	int ret;
3292
3293	if (ctx == NULL)
3294		return -EINVAL;
3295
3296	memclear(stats);
3297
3298	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3299	stats.ctx_id = ctx->ctx_id;
3300	ret = drmIoctl(bufmgr_gem->fd,
3301		       DRM_IOCTL_I915_GET_RESET_STATS,
3302		       &stats);
3303	if (ret == 0) {
3304		if (reset_count != NULL)
3305			*reset_count = stats.reset_count;
3306
3307		if (active != NULL)
3308			*active = stats.batch_active;
3309
3310		if (pending != NULL)
3311			*pending = stats.batch_pending;
3312	}
3313
3314	return ret;
3315}
3316
3317int
3318drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
3319		   uint32_t offset,
3320		   uint64_t *result)
3321{
3322	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3323	struct drm_i915_reg_read reg_read;
3324	int ret;
3325
3326	memclear(reg_read);
3327	reg_read.offset = offset;
3328
3329	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
3330
3331	*result = reg_read.val;
3332	return ret;
3333}
3334
3335int
3336drm_intel_get_subslice_total(int fd, unsigned int *subslice_total)
3337{
3338	drm_i915_getparam_t gp;
3339	int ret;
3340
3341	memclear(gp);
3342	gp.value = (int*)subslice_total;
3343	gp.param = I915_PARAM_SUBSLICE_TOTAL;
3344	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3345	if (ret)
3346		return -errno;
3347
3348	return 0;
3349}
3350
3351int
3352drm_intel_get_eu_total(int fd, unsigned int *eu_total)
3353{
3354	drm_i915_getparam_t gp;
3355	int ret;
3356
3357	memclear(gp);
3358	gp.value = (int*)eu_total;
3359	gp.param = I915_PARAM_EU_TOTAL;
3360	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3361	if (ret)
3362		return -errno;
3363
3364	return 0;
3365}
3366
3367int
3368drm_intel_get_pooled_eu(int fd)
3369{
3370	drm_i915_getparam_t gp;
3371	int ret = -1;
3372
3373	memclear(gp);
3374	gp.param = I915_PARAM_HAS_POOLED_EU;
3375	gp.value = &ret;
3376	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3377		return -errno;
3378
3379	return ret;
3380}
3381
3382int
3383drm_intel_get_min_eu_in_pool(int fd)
3384{
3385	drm_i915_getparam_t gp;
3386	int ret = -1;
3387
3388	memclear(gp);
3389	gp.param = I915_PARAM_MIN_EU_IN_POOL;
3390	gp.value = &ret;
3391	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3392		return -errno;
3393
3394	return ret;
3395}
3396
3397/**
3398 * Annotate the given bo for use in aub dumping.
3399 *
3400 * \param annotations is an array of drm_intel_aub_annotation objects
3401 * describing the type of data in various sections of the bo.  Each
3402 * element of the array specifies the type and subtype of a section of
3403 * the bo, and the past-the-end offset of that section.  The elements
3404 * of \c annotations must be sorted so that ending_offset is
3405 * increasing.
3406 *
3407 * \param count is the number of elements in the \c annotations array.
3408 * If \c count is zero, then \c annotations will not be dereferenced.
3409 *
3410 * Annotations are copied into a private data structure, so caller may
3411 * re-use the memory pointed to by \c annotations after the call
3412 * returns.
3413 *
3414 * Annotations are stored for the lifetime of the bo; to reset to the
3415 * default state (no annotations), call this function with a \c count
3416 * of zero.
3417 */
3418void
3419drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
3420					 drm_intel_aub_annotation *annotations,
3421					 unsigned count)
3422{
3423}
3424
3425static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3426static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3427
3428static drm_intel_bufmgr_gem *
3429drm_intel_bufmgr_gem_find(int fd)
3430{
3431	drm_intel_bufmgr_gem *bufmgr_gem;
3432
3433	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3434		if (bufmgr_gem->fd == fd) {
3435			atomic_inc(&bufmgr_gem->refcount);
3436			return bufmgr_gem;
3437		}
3438	}
3439
3440	return NULL;
3441}
3442
3443static void
3444drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
3445{
3446	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3447
3448	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3449		pthread_mutex_lock(&bufmgr_list_mutex);
3450
3451		if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
3452			DRMLISTDEL(&bufmgr_gem->managers);
3453			drm_intel_bufmgr_gem_destroy(bufmgr);
3454		}
3455
3456		pthread_mutex_unlock(&bufmgr_list_mutex);
3457	}
3458}
3459
3460void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo)
3461{
3462	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3463	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3464
3465	if (bo_gem->gtt_virtual)
3466		return bo_gem->gtt_virtual;
3467
3468	if (bo_gem->is_userptr)
3469		return NULL;
3470
3471	pthread_mutex_lock(&bufmgr_gem->lock);
3472	if (bo_gem->gtt_virtual == NULL) {
3473		struct drm_i915_gem_mmap_gtt mmap_arg;
3474		void *ptr;
3475
3476		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
3477		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3478
3479		if (bo_gem->map_count++ == 0)
3480			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3481
3482		memclear(mmap_arg);
3483		mmap_arg.handle = bo_gem->gem_handle;
3484
3485		/* Get the fake offset back... */
3486		ptr = MAP_FAILED;
3487		if (drmIoctl(bufmgr_gem->fd,
3488			     DRM_IOCTL_I915_GEM_MMAP_GTT,
3489			     &mmap_arg) == 0) {
3490			/* and mmap it */
3491			ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
3492				       MAP_SHARED, bufmgr_gem->fd,
3493				       mmap_arg.offset);
3494		}
3495		if (ptr == MAP_FAILED) {
3496			if (--bo_gem->map_count == 0)
3497				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3498			ptr = NULL;
3499		}
3500
3501		bo_gem->gtt_virtual = ptr;
3502	}
3503	pthread_mutex_unlock(&bufmgr_gem->lock);
3504
3505	return bo_gem->gtt_virtual;
3506}
3507
3508void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo)
3509{
3510	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3511	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3512
3513	if (bo_gem->mem_virtual)
3514		return bo_gem->mem_virtual;
3515
3516	if (bo_gem->is_userptr) {
3517		/* Return the same user ptr */
3518		return bo_gem->user_virtual;
3519	}
3520
3521	pthread_mutex_lock(&bufmgr_gem->lock);
3522	if (!bo_gem->mem_virtual) {
3523		struct drm_i915_gem_mmap mmap_arg;
3524
3525		if (bo_gem->map_count++ == 0)
3526			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3527
3528		DBG("bo_map: %d (%s), map_count=%d\n",
3529		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3530
3531		memclear(mmap_arg);
3532		mmap_arg.handle = bo_gem->gem_handle;
3533		mmap_arg.size = bo->size;
3534		if (drmIoctl(bufmgr_gem->fd,
3535			     DRM_IOCTL_I915_GEM_MMAP,
3536			     &mmap_arg)) {
3537			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3538			    __FILE__, __LINE__, bo_gem->gem_handle,
3539			    bo_gem->name, strerror(errno));
3540			if (--bo_gem->map_count == 0)
3541				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3542		} else {
3543			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3544			bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3545		}
3546	}
3547	pthread_mutex_unlock(&bufmgr_gem->lock);
3548
3549	return bo_gem->mem_virtual;
3550}
3551
3552void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo)
3553{
3554	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3555	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3556
3557	if (bo_gem->wc_virtual)
3558		return bo_gem->wc_virtual;
3559
3560	if (bo_gem->is_userptr)
3561		return NULL;
3562
3563	pthread_mutex_lock(&bufmgr_gem->lock);
3564	if (!bo_gem->wc_virtual) {
3565		struct drm_i915_gem_mmap mmap_arg;
3566
3567		if (bo_gem->map_count++ == 0)
3568			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3569
3570		DBG("bo_map: %d (%s), map_count=%d\n",
3571		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3572
3573		memclear(mmap_arg);
3574		mmap_arg.handle = bo_gem->gem_handle;
3575		mmap_arg.size = bo->size;
3576		mmap_arg.flags = I915_MMAP_WC;
3577		if (drmIoctl(bufmgr_gem->fd,
3578			     DRM_IOCTL_I915_GEM_MMAP,
3579			     &mmap_arg)) {
3580			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3581			    __FILE__, __LINE__, bo_gem->gem_handle,
3582			    bo_gem->name, strerror(errno));
3583			if (--bo_gem->map_count == 0)
3584				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3585		} else {
3586			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3587			bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3588		}
3589	}
3590	pthread_mutex_unlock(&bufmgr_gem->lock);
3591
3592	return bo_gem->wc_virtual;
3593}
3594
3595/**
3596 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3597 * and manage map buffer objections.
3598 *
3599 * \param fd File descriptor of the opened DRM device.
3600 */
3601drm_intel_bufmgr *
3602drm_intel_bufmgr_gem_init(int fd, int batch_size)
3603{
3604	drm_intel_bufmgr_gem *bufmgr_gem;
3605	struct drm_i915_gem_get_aperture aperture;
3606	drm_i915_getparam_t gp;
3607	int ret, tmp;
3608	bool exec2 = false;
3609
3610	pthread_mutex_lock(&bufmgr_list_mutex);
3611
3612	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
3613	if (bufmgr_gem)
3614		goto exit;
3615
3616	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3617	if (bufmgr_gem == NULL)
3618		goto exit;
3619
3620	bufmgr_gem->fd = fd;
3621	atomic_set(&bufmgr_gem->refcount, 1);
3622
3623	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3624		free(bufmgr_gem);
3625		bufmgr_gem = NULL;
3626		goto exit;
3627	}
3628
3629	memclear(aperture);
3630	ret = drmIoctl(bufmgr_gem->fd,
3631		       DRM_IOCTL_I915_GEM_GET_APERTURE,
3632		       &aperture);
3633
3634	if (ret == 0)
3635		bufmgr_gem->gtt_size = aperture.aper_available_size;
3636	else {
3637		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3638			strerror(errno));
3639		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3640		fprintf(stderr, "Assuming %dkB available aperture size.\n"
3641			"May lead to reduced performance or incorrect "
3642			"rendering.\n",
3643			(int)bufmgr_gem->gtt_size / 1024);
3644	}
3645
3646	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3647
3648	if (IS_GEN2(bufmgr_gem->pci_device))
3649		bufmgr_gem->gen = 2;
3650	else if (IS_GEN3(bufmgr_gem->pci_device))
3651		bufmgr_gem->gen = 3;
3652	else if (IS_GEN4(bufmgr_gem->pci_device))
3653		bufmgr_gem->gen = 4;
3654	else if (IS_GEN5(bufmgr_gem->pci_device))
3655		bufmgr_gem->gen = 5;
3656	else if (IS_GEN6(bufmgr_gem->pci_device))
3657		bufmgr_gem->gen = 6;
3658	else if (IS_GEN7(bufmgr_gem->pci_device))
3659		bufmgr_gem->gen = 7;
3660	else if (IS_GEN8(bufmgr_gem->pci_device))
3661		bufmgr_gem->gen = 8;
3662	else if (IS_GEN9(bufmgr_gem->pci_device))
3663		bufmgr_gem->gen = 9;
3664	else if (IS_GEN10(bufmgr_gem->pci_device))
3665		bufmgr_gem->gen = 10;
3666	else {
3667		free(bufmgr_gem);
3668		bufmgr_gem = NULL;
3669		goto exit;
3670	}
3671
3672	if (IS_GEN3(bufmgr_gem->pci_device) &&
3673	    bufmgr_gem->gtt_size > 256*1024*1024) {
3674		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3675		 * be used for tiled blits. To simplify the accounting, just
3676		 * subtract the unmappable part (fixed to 256MB on all known
3677		 * gen3 devices) if the kernel advertises it. */
3678		bufmgr_gem->gtt_size -= 256*1024*1024;
3679	}
3680
3681	memclear(gp);
3682	gp.value = &tmp;
3683
3684	gp.param = I915_PARAM_HAS_EXECBUF2;
3685	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3686	if (!ret)
3687		exec2 = true;
3688
3689	gp.param = I915_PARAM_HAS_BSD;
3690	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3691	bufmgr_gem->has_bsd = ret == 0;
3692
3693	gp.param = I915_PARAM_HAS_BLT;
3694	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3695	bufmgr_gem->has_blt = ret == 0;
3696
3697	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
3698	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3699	bufmgr_gem->has_relaxed_fencing = ret == 0;
3700
3701	gp.param = I915_PARAM_HAS_EXEC_ASYNC;
3702	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3703	bufmgr_gem->has_exec_async = ret == 0;
3704
3705	bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr;
3706
3707	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3708	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3709	bufmgr_gem->has_wait_timeout = ret == 0;
3710
3711	gp.param = I915_PARAM_HAS_LLC;
3712	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3713	if (ret != 0) {
3714		/* Kernel does not supports HAS_LLC query, fallback to GPU
3715		 * generation detection and assume that we have LLC on GEN6/7
3716		 */
3717		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3718				IS_GEN7(bufmgr_gem->pci_device));
3719	} else
3720		bufmgr_gem->has_llc = *gp.value;
3721
3722	gp.param = I915_PARAM_HAS_VEBOX;
3723	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3724	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3725
3726	gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
3727	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3728	if (ret == 0 && *gp.value > 0)
3729		bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset;
3730
3731	if (bufmgr_gem->gen < 4) {
3732		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
3733		gp.value = &bufmgr_gem->available_fences;
3734		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3735		if (ret) {
3736			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
3737				errno);
3738			fprintf(stderr, "param: %d, val: %d\n", gp.param,
3739				*gp.value);
3740			bufmgr_gem->available_fences = 0;
3741		} else {
3742			/* XXX The kernel reports the total number of fences,
3743			 * including any that may be pinned.
3744			 *
3745			 * We presume that there will be at least one pinned
3746			 * fence for the scanout buffer, but there may be more
3747			 * than one scanout and the user may be manually
3748			 * pinning buffers. Let's move to execbuffer2 and
3749			 * thereby forget the insanity of using fences...
3750			 */
3751			bufmgr_gem->available_fences -= 2;
3752			if (bufmgr_gem->available_fences < 0)
3753				bufmgr_gem->available_fences = 0;
3754		}
3755	}
3756
3757	if (bufmgr_gem->gen >= 8) {
3758		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
3759		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3760		if (ret == 0 && *gp.value == 3)
3761			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
3762	}
3763
3764	/* Let's go with one relocation per every 2 dwords (but round down a bit
3765	 * since a power of two will mean an extra page allocation for the reloc
3766	 * buffer).
3767	 *
3768	 * Every 4 was too few for the blender benchmark.
3769	 */
3770	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3771
3772	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
3773	bufmgr_gem->bufmgr.bo_alloc_for_render =
3774	    drm_intel_gem_bo_alloc_for_render;
3775	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
3776	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
3777	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
3778	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
3779	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
3780	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
3781	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
3782	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
3783	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
3784	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
3785	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
3786	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
3787	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
3788	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
3789	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
3790	/* Use the new one if available */
3791	if (exec2) {
3792		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
3793		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3794	} else
3795		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
3796	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
3797	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3798	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
3799	bufmgr_gem->bufmgr.debug = 0;
3800	bufmgr_gem->bufmgr.check_aperture_space =
3801	    drm_intel_gem_check_aperture_space;
3802	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3803	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
3804	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
3805	    drm_intel_gem_get_pipe_from_crtc_id;
3806	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
3807
3808	init_cache_buckets(bufmgr_gem);
3809
3810	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
3811	bufmgr_gem->vma_max = -1; /* unlimited by default */
3812
3813	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3814
3815exit:
3816	pthread_mutex_unlock(&bufmgr_list_mutex);
3817
3818	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
3819}
3820