intel_bufmgr_gem.c revision 3c748557
122944501Smrg/**************************************************************************
222944501Smrg *
322944501Smrg * Copyright � 2007 Red Hat Inc.
420131375Smrg * Copyright � 2007-2012 Intel Corporation
522944501Smrg * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
622944501Smrg * All Rights Reserved.
722944501Smrg *
822944501Smrg * Permission is hereby granted, free of charge, to any person obtaining a
922944501Smrg * copy of this software and associated documentation files (the
1022944501Smrg * "Software"), to deal in the Software without restriction, including
1122944501Smrg * without limitation the rights to use, copy, modify, merge, publish,
1222944501Smrg * distribute, sub license, and/or sell copies of the Software, and to
1322944501Smrg * permit persons to whom the Software is furnished to do so, subject to
1422944501Smrg * the following conditions:
1522944501Smrg *
1622944501Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1722944501Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1822944501Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
1922944501Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
2022944501Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
2122944501Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2222944501Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
2322944501Smrg *
2422944501Smrg * The above copyright notice and this permission notice (including the
2522944501Smrg * next paragraph) shall be included in all copies or substantial portions
2622944501Smrg * of the Software.
2722944501Smrg *
2822944501Smrg *
2922944501Smrg **************************************************************************/
3022944501Smrg/*
3122944501Smrg * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
3222944501Smrg *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
3322944501Smrg *	    Eric Anholt <eric@anholt.net>
3422944501Smrg *	    Dave Airlie <airlied@linux.ie>
3522944501Smrg */
3622944501Smrg
3722944501Smrg#ifdef HAVE_CONFIG_H
3822944501Smrg#include "config.h"
3922944501Smrg#endif
4022944501Smrg
4122944501Smrg#include <xf86drm.h>
4222944501Smrg#include <xf86atomic.h>
4322944501Smrg#include <fcntl.h>
4422944501Smrg#include <stdio.h>
4522944501Smrg#include <stdlib.h>
4622944501Smrg#include <string.h>
4722944501Smrg#include <unistd.h>
4822944501Smrg#include <assert.h>
4922944501Smrg#include <pthread.h>
502e6867f6Smrg#include <stddef.h>
5122944501Smrg#include <sys/ioctl.h>
5222944501Smrg#include <sys/stat.h>
5322944501Smrg#include <sys/types.h>
5420131375Smrg#include <stdbool.h>
5522944501Smrg
5622944501Smrg#include "errno.h"
5720131375Smrg#ifndef ETIME
5820131375Smrg#define ETIME ETIMEDOUT
5920131375Smrg#endif
60a884aba1Smrg#include "libdrm.h"
6122944501Smrg#include "libdrm_lists.h"
6222944501Smrg#include "intel_bufmgr.h"
6322944501Smrg#include "intel_bufmgr_priv.h"
6422944501Smrg#include "intel_chipset.h"
6520131375Smrg#include "intel_aub.h"
6622944501Smrg#include "string.h"
6722944501Smrg
6822944501Smrg#include "i915_drm.h"
6922944501Smrg
7020131375Smrg#ifdef HAVE_VALGRIND
7120131375Smrg#include <valgrind.h>
7220131375Smrg#include <memcheck.h>
7320131375Smrg#define VG(x) x
7420131375Smrg#else
7520131375Smrg#define VG(x)
7620131375Smrg#endif
7720131375Smrg
7820131375Smrg#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
7920131375Smrg
8022944501Smrg#define DBG(...) do {					\
8122944501Smrg	if (bufmgr_gem->bufmgr.debug)			\
8222944501Smrg		fprintf(stderr, __VA_ARGS__);		\
8322944501Smrg} while (0)
8422944501Smrg
85aaba2545Smrg#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
86aaba2545Smrg
8722944501Smrgtypedef struct _drm_intel_bo_gem drm_intel_bo_gem;
8822944501Smrg
8922944501Smrgstruct drm_intel_gem_bo_bucket {
9022944501Smrg	drmMMListHead head;
9122944501Smrg	unsigned long size;
9222944501Smrg};
9322944501Smrg
9422944501Smrgtypedef struct _drm_intel_bufmgr_gem {
9522944501Smrg	drm_intel_bufmgr bufmgr;
9622944501Smrg
97a884aba1Smrg	atomic_t refcount;
98a884aba1Smrg
9922944501Smrg	int fd;
10022944501Smrg
10122944501Smrg	int max_relocs;
10222944501Smrg
10322944501Smrg	pthread_mutex_t lock;
10422944501Smrg
10522944501Smrg	struct drm_i915_gem_exec_object *exec_objects;
10622944501Smrg	struct drm_i915_gem_exec_object2 *exec2_objects;
10722944501Smrg	drm_intel_bo **exec_bos;
10822944501Smrg	int exec_size;
10922944501Smrg	int exec_count;
11022944501Smrg
11122944501Smrg	/** Array of lists of cached gem objects of power-of-two sizes */
112aaba2545Smrg	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
113aaba2545Smrg	int num_buckets;
1146d98c517Smrg	time_t time;
11522944501Smrg
116a884aba1Smrg	drmMMListHead managers;
117a884aba1Smrg
11820131375Smrg	drmMMListHead named;
11920131375Smrg	drmMMListHead vma_cache;
12020131375Smrg	int vma_count, vma_open, vma_max;
12120131375Smrg
12222944501Smrg	uint64_t gtt_size;
12322944501Smrg	int available_fences;
12422944501Smrg	int pci_device;
12522944501Smrg	int gen;
1269ce4edccSmrg	unsigned int has_bsd : 1;
1279ce4edccSmrg	unsigned int has_blt : 1;
1289ce4edccSmrg	unsigned int has_relaxed_fencing : 1;
12920131375Smrg	unsigned int has_llc : 1;
13020131375Smrg	unsigned int has_wait_timeout : 1;
1319ce4edccSmrg	unsigned int bo_reuse : 1;
13220131375Smrg	unsigned int no_exec : 1;
13320131375Smrg	unsigned int has_vebox : 1;
13420131375Smrg	bool fenced_relocs;
13520131375Smrg
13620131375Smrg	char *aub_filename;
13720131375Smrg	FILE *aub_file;
13820131375Smrg	uint32_t aub_offset;
13922944501Smrg} drm_intel_bufmgr_gem;
14022944501Smrg
14122944501Smrg#define DRM_INTEL_RELOC_FENCE (1<<0)
14222944501Smrg
14322944501Smrgtypedef struct _drm_intel_reloc_target_info {
14422944501Smrg	drm_intel_bo *bo;
14522944501Smrg	int flags;
14622944501Smrg} drm_intel_reloc_target;
14722944501Smrg
14822944501Smrgstruct _drm_intel_bo_gem {
14922944501Smrg	drm_intel_bo bo;
15022944501Smrg
15122944501Smrg	atomic_t refcount;
15222944501Smrg	uint32_t gem_handle;
15322944501Smrg	const char *name;
15422944501Smrg
15522944501Smrg	/**
15622944501Smrg	 * Kenel-assigned global name for this object
15720131375Smrg         *
15820131375Smrg         * List contains both flink named and prime fd'd objects
15922944501Smrg	 */
16022944501Smrg	unsigned int global_name;
16120131375Smrg	drmMMListHead name_list;
16222944501Smrg
16322944501Smrg	/**
16422944501Smrg	 * Index of the buffer within the validation list while preparing a
16522944501Smrg	 * batchbuffer execution.
16622944501Smrg	 */
16722944501Smrg	int validate_index;
16822944501Smrg
16922944501Smrg	/**
17022944501Smrg	 * Current tiling mode
17122944501Smrg	 */
17222944501Smrg	uint32_t tiling_mode;
17322944501Smrg	uint32_t swizzle_mode;
1746d98c517Smrg	unsigned long stride;
17522944501Smrg
17622944501Smrg	time_t free_time;
17722944501Smrg
17822944501Smrg	/** Array passed to the DRM containing relocation information. */
17922944501Smrg	struct drm_i915_gem_relocation_entry *relocs;
18022944501Smrg	/**
18122944501Smrg	 * Array of info structs corresponding to relocs[i].target_handle etc
18222944501Smrg	 */
18322944501Smrg	drm_intel_reloc_target *reloc_target_info;
18422944501Smrg	/** Number of entries in relocs */
18522944501Smrg	int reloc_count;
18622944501Smrg	/** Mapped address for the buffer, saved across map/unmap cycles */
18722944501Smrg	void *mem_virtual;
18822944501Smrg	/** GTT virtual address for the buffer, saved across map/unmap cycles */
18922944501Smrg	void *gtt_virtual;
190a884aba1Smrg	/**
191a884aba1Smrg	 * Virtual address of the buffer allocated by user, used for userptr
192a884aba1Smrg	 * objects only.
193a884aba1Smrg	 */
194a884aba1Smrg	void *user_virtual;
19520131375Smrg	int map_count;
19620131375Smrg	drmMMListHead vma_list;
19722944501Smrg
19822944501Smrg	/** BO cache list */
19922944501Smrg	drmMMListHead head;
20022944501Smrg
20122944501Smrg	/**
20222944501Smrg	 * Boolean of whether this BO and its children have been included in
20322944501Smrg	 * the current drm_intel_bufmgr_check_aperture_space() total.
20422944501Smrg	 */
20520131375Smrg	bool included_in_check_aperture;
20622944501Smrg
20722944501Smrg	/**
20822944501Smrg	 * Boolean of whether this buffer has been used as a relocation
20922944501Smrg	 * target and had its size accounted for, and thus can't have any
21022944501Smrg	 * further relocations added to it.
21122944501Smrg	 */
21220131375Smrg	bool used_as_reloc_target;
21322944501Smrg
21422944501Smrg	/**
21522944501Smrg	 * Boolean of whether we have encountered an error whilst building the relocation tree.
21622944501Smrg	 */
21720131375Smrg	bool has_error;
21822944501Smrg
21922944501Smrg	/**
22022944501Smrg	 * Boolean of whether this buffer can be re-used
22122944501Smrg	 */
22220131375Smrg	bool reusable;
22320131375Smrg
22420131375Smrg	/**
22520131375Smrg	 * Boolean of whether the GPU is definitely not accessing the buffer.
22620131375Smrg	 *
22720131375Smrg	 * This is only valid when reusable, since non-reusable
22820131375Smrg	 * buffers are those that have been shared wth other
22920131375Smrg	 * processes, so we don't know their state.
23020131375Smrg	 */
23120131375Smrg	bool idle;
23222944501Smrg
233a884aba1Smrg	/**
234a884aba1Smrg	 * Boolean of whether this buffer was allocated with userptr
235a884aba1Smrg	 */
236a884aba1Smrg	bool is_userptr;
237a884aba1Smrg
23822944501Smrg	/**
23922944501Smrg	 * Size in bytes of this buffer and its relocation descendents.
24022944501Smrg	 *
24122944501Smrg	 * Used to avoid costly tree walking in
24222944501Smrg	 * drm_intel_bufmgr_check_aperture in the common case.
24322944501Smrg	 */
24422944501Smrg	int reloc_tree_size;
24522944501Smrg
24622944501Smrg	/**
24722944501Smrg	 * Number of potential fence registers required by this buffer and its
24822944501Smrg	 * relocations.
24922944501Smrg	 */
25022944501Smrg	int reloc_tree_fences;
25120131375Smrg
25220131375Smrg	/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
25320131375Smrg	bool mapped_cpu_write;
25420131375Smrg
25520131375Smrg	uint32_t aub_offset;
25620131375Smrg
25720131375Smrg	drm_intel_aub_annotation *aub_annotations;
25820131375Smrg	unsigned aub_annotation_count;
25922944501Smrg};
26022944501Smrg
26122944501Smrgstatic unsigned int
26222944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
26322944501Smrg
26422944501Smrgstatic unsigned int
26522944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
26622944501Smrg
26722944501Smrgstatic int
26822944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
26922944501Smrg			    uint32_t * swizzle_mode);
27022944501Smrg
27122944501Smrgstatic int
2726d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2736d98c517Smrg				     uint32_t tiling_mode,
2746d98c517Smrg				     uint32_t stride);
27522944501Smrg
27622944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
27722944501Smrg						      time_t time);
27822944501Smrg
27922944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
28022944501Smrg
28122944501Smrgstatic void drm_intel_gem_bo_free(drm_intel_bo *bo);
28222944501Smrg
28322944501Smrgstatic unsigned long
28422944501Smrgdrm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
28522944501Smrg			   uint32_t *tiling_mode)
28622944501Smrg{
28722944501Smrg	unsigned long min_size, max_size;
28822944501Smrg	unsigned long i;
28922944501Smrg
29022944501Smrg	if (*tiling_mode == I915_TILING_NONE)
29122944501Smrg		return size;
29222944501Smrg
29322944501Smrg	/* 965+ just need multiples of page size for tiling */
29422944501Smrg	if (bufmgr_gem->gen >= 4)
29522944501Smrg		return ROUND_UP_TO(size, 4096);
29622944501Smrg
29722944501Smrg	/* Older chips need powers of two, of at least 512k or 1M */
29822944501Smrg	if (bufmgr_gem->gen == 3) {
29922944501Smrg		min_size = 1024*1024;
30022944501Smrg		max_size = 128*1024*1024;
30122944501Smrg	} else {
30222944501Smrg		min_size = 512*1024;
30322944501Smrg		max_size = 64*1024*1024;
30422944501Smrg	}
30522944501Smrg
30622944501Smrg	if (size > max_size) {
30722944501Smrg		*tiling_mode = I915_TILING_NONE;
30822944501Smrg		return size;
30922944501Smrg	}
31022944501Smrg
3119ce4edccSmrg	/* Do we need to allocate every page for the fence? */
3129ce4edccSmrg	if (bufmgr_gem->has_relaxed_fencing)
3139ce4edccSmrg		return ROUND_UP_TO(size, 4096);
3149ce4edccSmrg
31522944501Smrg	for (i = min_size; i < size; i <<= 1)
31622944501Smrg		;
31722944501Smrg
31822944501Smrg	return i;
31922944501Smrg}
32022944501Smrg
32122944501Smrg/*
32222944501Smrg * Round a given pitch up to the minimum required for X tiling on a
32322944501Smrg * given chip.  We use 512 as the minimum to allow for a later tiling
32422944501Smrg * change.
32522944501Smrg */
32622944501Smrgstatic unsigned long
32722944501Smrgdrm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
3286d98c517Smrg			    unsigned long pitch, uint32_t *tiling_mode)
32922944501Smrg{
33022944501Smrg	unsigned long tile_width;
33122944501Smrg	unsigned long i;
33222944501Smrg
33322944501Smrg	/* If untiled, then just align it so that we can do rendering
33422944501Smrg	 * to it with the 3D engine.
33522944501Smrg	 */
3366d98c517Smrg	if (*tiling_mode == I915_TILING_NONE)
33722944501Smrg		return ALIGN(pitch, 64);
33822944501Smrg
33920131375Smrg	if (*tiling_mode == I915_TILING_X
34020131375Smrg			|| (IS_915(bufmgr_gem->pci_device)
34120131375Smrg			    && *tiling_mode == I915_TILING_Y))
34222944501Smrg		tile_width = 512;
34322944501Smrg	else
34422944501Smrg		tile_width = 128;
34522944501Smrg
34622944501Smrg	/* 965 is flexible */
34722944501Smrg	if (bufmgr_gem->gen >= 4)
34822944501Smrg		return ROUND_UP_TO(pitch, tile_width);
34922944501Smrg
3506d98c517Smrg	/* The older hardware has a maximum pitch of 8192 with tiled
3516d98c517Smrg	 * surfaces, so fallback to untiled if it's too large.
3526d98c517Smrg	 */
3536d98c517Smrg	if (pitch > 8192) {
3546d98c517Smrg		*tiling_mode = I915_TILING_NONE;
3556d98c517Smrg		return ALIGN(pitch, 64);
3566d98c517Smrg	}
3576d98c517Smrg
35822944501Smrg	/* Pre-965 needs power of two tile width */
35922944501Smrg	for (i = tile_width; i < pitch; i <<= 1)
36022944501Smrg		;
36122944501Smrg
36222944501Smrg	return i;
36322944501Smrg}
36422944501Smrg
36522944501Smrgstatic struct drm_intel_gem_bo_bucket *
36622944501Smrgdrm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
36722944501Smrg				 unsigned long size)
36822944501Smrg{
36922944501Smrg	int i;
37022944501Smrg
371aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
37222944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
37322944501Smrg		    &bufmgr_gem->cache_bucket[i];
37422944501Smrg		if (bucket->size >= size) {
37522944501Smrg			return bucket;
37622944501Smrg		}
37722944501Smrg	}
37822944501Smrg
37922944501Smrg	return NULL;
38022944501Smrg}
38122944501Smrg
38222944501Smrgstatic void
38322944501Smrgdrm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
38422944501Smrg{
38522944501Smrg	int i, j;
38622944501Smrg
38722944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
38822944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
38922944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
39022944501Smrg
39122944501Smrg		if (bo_gem->relocs == NULL) {
39222944501Smrg			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
39322944501Smrg			    bo_gem->name);
39422944501Smrg			continue;
39522944501Smrg		}
39622944501Smrg
39722944501Smrg		for (j = 0; j < bo_gem->reloc_count; j++) {
39822944501Smrg			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
39922944501Smrg			drm_intel_bo_gem *target_gem =
40022944501Smrg			    (drm_intel_bo_gem *) target_bo;
40122944501Smrg
40222944501Smrg			DBG("%2d: %d (%s)@0x%08llx -> "
403d82d45b3Sjoerg			    "%d (%s)@0x%08llx + 0x%08x\n",
40422944501Smrg			    i,
40522944501Smrg			    bo_gem->gem_handle, bo_gem->name,
40622944501Smrg			    (unsigned long long)bo_gem->relocs[j].offset,
40722944501Smrg			    target_gem->gem_handle,
40822944501Smrg			    target_gem->name,
409d82d45b3Sjoerg			    (unsigned long long)target_bo->offset64,
41022944501Smrg			    bo_gem->relocs[j].delta);
41122944501Smrg		}
41222944501Smrg	}
41322944501Smrg}
41422944501Smrg
41522944501Smrgstatic inline void
41622944501Smrgdrm_intel_gem_bo_reference(drm_intel_bo *bo)
41722944501Smrg{
41822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
41922944501Smrg
42022944501Smrg	atomic_inc(&bo_gem->refcount);
42122944501Smrg}
42222944501Smrg
42322944501Smrg/**
42422944501Smrg * Adds the given buffer to the list of buffers to be validated (moved into the
42522944501Smrg * appropriate memory type) with the next batch submission.
42622944501Smrg *
42722944501Smrg * If a buffer is validated multiple times in a batch submission, it ends up
42822944501Smrg * with the intersection of the memory type flags and the union of the
42922944501Smrg * access flags.
43022944501Smrg */
43122944501Smrgstatic void
43222944501Smrgdrm_intel_add_validate_buffer(drm_intel_bo *bo)
43322944501Smrg{
43422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
43522944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
43622944501Smrg	int index;
43722944501Smrg
43822944501Smrg	if (bo_gem->validate_index != -1)
43922944501Smrg		return;
44022944501Smrg
44122944501Smrg	/* Extend the array of validation entries as necessary. */
44222944501Smrg	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
44322944501Smrg		int new_size = bufmgr_gem->exec_size * 2;
44422944501Smrg
44522944501Smrg		if (new_size == 0)
44622944501Smrg			new_size = 5;
44722944501Smrg
44822944501Smrg		bufmgr_gem->exec_objects =
44922944501Smrg		    realloc(bufmgr_gem->exec_objects,
45022944501Smrg			    sizeof(*bufmgr_gem->exec_objects) * new_size);
45122944501Smrg		bufmgr_gem->exec_bos =
45222944501Smrg		    realloc(bufmgr_gem->exec_bos,
45322944501Smrg			    sizeof(*bufmgr_gem->exec_bos) * new_size);
45422944501Smrg		bufmgr_gem->exec_size = new_size;
45522944501Smrg	}
45622944501Smrg
45722944501Smrg	index = bufmgr_gem->exec_count;
45822944501Smrg	bo_gem->validate_index = index;
45922944501Smrg	/* Fill in array entry */
46022944501Smrg	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
46122944501Smrg	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
46222944501Smrg	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
46322944501Smrg	bufmgr_gem->exec_objects[index].alignment = 0;
46422944501Smrg	bufmgr_gem->exec_objects[index].offset = 0;
46522944501Smrg	bufmgr_gem->exec_bos[index] = bo;
46622944501Smrg	bufmgr_gem->exec_count++;
46722944501Smrg}
46822944501Smrg
46922944501Smrgstatic void
47022944501Smrgdrm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
47122944501Smrg{
47222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
47322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
47422944501Smrg	int index;
47522944501Smrg
47622944501Smrg	if (bo_gem->validate_index != -1) {
47722944501Smrg		if (need_fence)
47822944501Smrg			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
47922944501Smrg				EXEC_OBJECT_NEEDS_FENCE;
48022944501Smrg		return;
48122944501Smrg	}
48222944501Smrg
48322944501Smrg	/* Extend the array of validation entries as necessary. */
48422944501Smrg	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
48522944501Smrg		int new_size = bufmgr_gem->exec_size * 2;
48622944501Smrg
48722944501Smrg		if (new_size == 0)
48822944501Smrg			new_size = 5;
48922944501Smrg
49022944501Smrg		bufmgr_gem->exec2_objects =
49122944501Smrg			realloc(bufmgr_gem->exec2_objects,
49222944501Smrg				sizeof(*bufmgr_gem->exec2_objects) * new_size);
49322944501Smrg		bufmgr_gem->exec_bos =
49422944501Smrg			realloc(bufmgr_gem->exec_bos,
49522944501Smrg				sizeof(*bufmgr_gem->exec_bos) * new_size);
49622944501Smrg		bufmgr_gem->exec_size = new_size;
49722944501Smrg	}
49822944501Smrg
49922944501Smrg	index = bufmgr_gem->exec_count;
50022944501Smrg	bo_gem->validate_index = index;
50122944501Smrg	/* Fill in array entry */
50222944501Smrg	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
50322944501Smrg	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
50422944501Smrg	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
50522944501Smrg	bufmgr_gem->exec2_objects[index].alignment = 0;
50622944501Smrg	bufmgr_gem->exec2_objects[index].offset = 0;
50722944501Smrg	bufmgr_gem->exec_bos[index] = bo;
50822944501Smrg	bufmgr_gem->exec2_objects[index].flags = 0;
50922944501Smrg	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
51022944501Smrg	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
51122944501Smrg	if (need_fence) {
51222944501Smrg		bufmgr_gem->exec2_objects[index].flags |=
51322944501Smrg			EXEC_OBJECT_NEEDS_FENCE;
51422944501Smrg	}
51522944501Smrg	bufmgr_gem->exec_count++;
51622944501Smrg}
51722944501Smrg
51822944501Smrg#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
51922944501Smrg	sizeof(uint32_t))
52022944501Smrg
52122944501Smrgstatic void
52222944501Smrgdrm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
52322944501Smrg				      drm_intel_bo_gem *bo_gem)
52422944501Smrg{
52522944501Smrg	int size;
52622944501Smrg
52722944501Smrg	assert(!bo_gem->used_as_reloc_target);
52822944501Smrg
52922944501Smrg	/* The older chipsets are far-less flexible in terms of tiling,
53022944501Smrg	 * and require tiled buffer to be size aligned in the aperture.
53122944501Smrg	 * This means that in the worst possible case we will need a hole
53222944501Smrg	 * twice as large as the object in order for it to fit into the
53322944501Smrg	 * aperture. Optimal packing is for wimps.
53422944501Smrg	 */
53522944501Smrg	size = bo_gem->bo.size;
5369ce4edccSmrg	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
5379ce4edccSmrg		int min_size;
5389ce4edccSmrg
5399ce4edccSmrg		if (bufmgr_gem->has_relaxed_fencing) {
5409ce4edccSmrg			if (bufmgr_gem->gen == 3)
5419ce4edccSmrg				min_size = 1024*1024;
5429ce4edccSmrg			else
5439ce4edccSmrg				min_size = 512*1024;
5449ce4edccSmrg
5459ce4edccSmrg			while (min_size < size)
5469ce4edccSmrg				min_size *= 2;
5479ce4edccSmrg		} else
5489ce4edccSmrg			min_size = size;
5499ce4edccSmrg
5509ce4edccSmrg		/* Account for worst-case alignment. */
5519ce4edccSmrg		size = 2 * min_size;
5529ce4edccSmrg	}
55322944501Smrg
55422944501Smrg	bo_gem->reloc_tree_size = size;
55522944501Smrg}
55622944501Smrg
55722944501Smrgstatic int
55822944501Smrgdrm_intel_setup_reloc_list(drm_intel_bo *bo)
55922944501Smrg{
56022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
56122944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
56222944501Smrg	unsigned int max_relocs = bufmgr_gem->max_relocs;
56322944501Smrg
56422944501Smrg	if (bo->size / 4 < max_relocs)
56522944501Smrg		max_relocs = bo->size / 4;
56622944501Smrg
56722944501Smrg	bo_gem->relocs = malloc(max_relocs *
56822944501Smrg				sizeof(struct drm_i915_gem_relocation_entry));
56922944501Smrg	bo_gem->reloc_target_info = malloc(max_relocs *
570aaba2545Smrg					   sizeof(drm_intel_reloc_target));
57122944501Smrg	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
57220131375Smrg		bo_gem->has_error = true;
57322944501Smrg
57422944501Smrg		free (bo_gem->relocs);
57522944501Smrg		bo_gem->relocs = NULL;
57622944501Smrg
57722944501Smrg		free (bo_gem->reloc_target_info);
57822944501Smrg		bo_gem->reloc_target_info = NULL;
57922944501Smrg
58022944501Smrg		return 1;
58122944501Smrg	}
58222944501Smrg
58322944501Smrg	return 0;
58422944501Smrg}
58522944501Smrg
58622944501Smrgstatic int
58722944501Smrgdrm_intel_gem_bo_busy(drm_intel_bo *bo)
58822944501Smrg{
58922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
59022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
59122944501Smrg	struct drm_i915_gem_busy busy;
59222944501Smrg	int ret;
59322944501Smrg
59420131375Smrg	if (bo_gem->reusable && bo_gem->idle)
59520131375Smrg		return false;
59620131375Smrg
59720131375Smrg	VG_CLEAR(busy);
59822944501Smrg	busy.handle = bo_gem->gem_handle;
59922944501Smrg
6006d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
60120131375Smrg	if (ret == 0) {
60220131375Smrg		bo_gem->idle = !busy.busy;
60320131375Smrg		return busy.busy;
60420131375Smrg	} else {
60520131375Smrg		return false;
60620131375Smrg	}
60722944501Smrg	return (ret == 0 && busy.busy);
60822944501Smrg}
60922944501Smrg
61022944501Smrgstatic int
61122944501Smrgdrm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
61222944501Smrg				  drm_intel_bo_gem *bo_gem, int state)
61322944501Smrg{
61422944501Smrg	struct drm_i915_gem_madvise madv;
61522944501Smrg
61620131375Smrg	VG_CLEAR(madv);
61722944501Smrg	madv.handle = bo_gem->gem_handle;
61822944501Smrg	madv.madv = state;
61922944501Smrg	madv.retained = 1;
6206d98c517Smrg	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
62122944501Smrg
62222944501Smrg	return madv.retained;
62322944501Smrg}
62422944501Smrg
62522944501Smrgstatic int
62622944501Smrgdrm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
62722944501Smrg{
62822944501Smrg	return drm_intel_gem_bo_madvise_internal
62922944501Smrg		((drm_intel_bufmgr_gem *) bo->bufmgr,
63022944501Smrg		 (drm_intel_bo_gem *) bo,
63122944501Smrg		 madv);
63222944501Smrg}
63322944501Smrg
63422944501Smrg/* drop the oldest entries that have been purged by the kernel */
63522944501Smrgstatic void
63622944501Smrgdrm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
63722944501Smrg				    struct drm_intel_gem_bo_bucket *bucket)
63822944501Smrg{
63922944501Smrg	while (!DRMLISTEMPTY(&bucket->head)) {
64022944501Smrg		drm_intel_bo_gem *bo_gem;
64122944501Smrg
64222944501Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
64322944501Smrg				      bucket->head.next, head);
64422944501Smrg		if (drm_intel_gem_bo_madvise_internal
64522944501Smrg		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
64622944501Smrg			break;
64722944501Smrg
64822944501Smrg		DRMLISTDEL(&bo_gem->head);
64922944501Smrg		drm_intel_gem_bo_free(&bo_gem->bo);
65022944501Smrg	}
65122944501Smrg}
65222944501Smrg
65322944501Smrgstatic drm_intel_bo *
65422944501Smrgdrm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
65522944501Smrg				const char *name,
65622944501Smrg				unsigned long size,
6576d98c517Smrg				unsigned long flags,
6586d98c517Smrg				uint32_t tiling_mode,
6596d98c517Smrg				unsigned long stride)
66022944501Smrg{
66122944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
66222944501Smrg	drm_intel_bo_gem *bo_gem;
66322944501Smrg	unsigned int page_size = getpagesize();
66422944501Smrg	int ret;
66522944501Smrg	struct drm_intel_gem_bo_bucket *bucket;
66620131375Smrg	bool alloc_from_cache;
66722944501Smrg	unsigned long bo_size;
66820131375Smrg	bool for_render = false;
66922944501Smrg
67022944501Smrg	if (flags & BO_ALLOC_FOR_RENDER)
67120131375Smrg		for_render = true;
67222944501Smrg
67322944501Smrg	/* Round the allocated size up to a power of two number of pages. */
67422944501Smrg	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
67522944501Smrg
67622944501Smrg	/* If we don't have caching at this size, don't actually round the
67722944501Smrg	 * allocation up.
67822944501Smrg	 */
67922944501Smrg	if (bucket == NULL) {
68022944501Smrg		bo_size = size;
68122944501Smrg		if (bo_size < page_size)
68222944501Smrg			bo_size = page_size;
68322944501Smrg	} else {
68422944501Smrg		bo_size = bucket->size;
68522944501Smrg	}
68622944501Smrg
68722944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
68822944501Smrg	/* Get a buffer out of the cache if available */
68922944501Smrgretry:
69020131375Smrg	alloc_from_cache = false;
69122944501Smrg	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
69222944501Smrg		if (for_render) {
69322944501Smrg			/* Allocate new render-target BOs from the tail (MRU)
69422944501Smrg			 * of the list, as it will likely be hot in the GPU
69522944501Smrg			 * cache and in the aperture for us.
69622944501Smrg			 */
69722944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
69822944501Smrg					      bucket->head.prev, head);
69922944501Smrg			DRMLISTDEL(&bo_gem->head);
70020131375Smrg			alloc_from_cache = true;
70122944501Smrg		} else {
70222944501Smrg			/* For non-render-target BOs (where we're probably
70322944501Smrg			 * going to map it first thing in order to fill it
70422944501Smrg			 * with data), check if the last BO in the cache is
70522944501Smrg			 * unbusy, and only reuse in that case. Otherwise,
70622944501Smrg			 * allocating a new buffer is probably faster than
70722944501Smrg			 * waiting for the GPU to finish.
70822944501Smrg			 */
70922944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
71022944501Smrg					      bucket->head.next, head);
71122944501Smrg			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
71220131375Smrg				alloc_from_cache = true;
71322944501Smrg				DRMLISTDEL(&bo_gem->head);
71422944501Smrg			}
71522944501Smrg		}
71622944501Smrg
71722944501Smrg		if (alloc_from_cache) {
71822944501Smrg			if (!drm_intel_gem_bo_madvise_internal
71922944501Smrg			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
72022944501Smrg				drm_intel_gem_bo_free(&bo_gem->bo);
72122944501Smrg				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
72222944501Smrg								    bucket);
72322944501Smrg				goto retry;
72422944501Smrg			}
7256d98c517Smrg
7266d98c517Smrg			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
7276d98c517Smrg								 tiling_mode,
7286d98c517Smrg								 stride)) {
7296d98c517Smrg				drm_intel_gem_bo_free(&bo_gem->bo);
7306d98c517Smrg				goto retry;
7316d98c517Smrg			}
73222944501Smrg		}
73322944501Smrg	}
73422944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
73522944501Smrg
73622944501Smrg	if (!alloc_from_cache) {
73722944501Smrg		struct drm_i915_gem_create create;
73822944501Smrg
73922944501Smrg		bo_gem = calloc(1, sizeof(*bo_gem));
74022944501Smrg		if (!bo_gem)
74122944501Smrg			return NULL;
74222944501Smrg
74322944501Smrg		bo_gem->bo.size = bo_size;
74420131375Smrg
74520131375Smrg		VG_CLEAR(create);
74622944501Smrg		create.size = bo_size;
74722944501Smrg
7486d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd,
7496d98c517Smrg			       DRM_IOCTL_I915_GEM_CREATE,
7506d98c517Smrg			       &create);
75122944501Smrg		bo_gem->gem_handle = create.handle;
75222944501Smrg		bo_gem->bo.handle = bo_gem->gem_handle;
75322944501Smrg		if (ret != 0) {
75422944501Smrg			free(bo_gem);
75522944501Smrg			return NULL;
75622944501Smrg		}
75722944501Smrg		bo_gem->bo.bufmgr = bufmgr;
7586d98c517Smrg
7596d98c517Smrg		bo_gem->tiling_mode = I915_TILING_NONE;
7606d98c517Smrg		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
7616d98c517Smrg		bo_gem->stride = 0;
7626d98c517Smrg
7633c748557Ssnj		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
7643c748557Ssnj		   list (vma_list), so better set the list head here */
7653c748557Ssnj		DRMINITLISTHEAD(&bo_gem->name_list);
7663c748557Ssnj		DRMINITLISTHEAD(&bo_gem->vma_list);
7676d98c517Smrg		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
7686d98c517Smrg							 tiling_mode,
7696d98c517Smrg							 stride)) {
7706d98c517Smrg		    drm_intel_gem_bo_free(&bo_gem->bo);
7716d98c517Smrg		    return NULL;
7726d98c517Smrg		}
77322944501Smrg	}
77422944501Smrg
77522944501Smrg	bo_gem->name = name;
77622944501Smrg	atomic_set(&bo_gem->refcount, 1);
77722944501Smrg	bo_gem->validate_index = -1;
77822944501Smrg	bo_gem->reloc_tree_fences = 0;
77920131375Smrg	bo_gem->used_as_reloc_target = false;
78020131375Smrg	bo_gem->has_error = false;
78120131375Smrg	bo_gem->reusable = true;
78220131375Smrg	bo_gem->aub_annotations = NULL;
78320131375Smrg	bo_gem->aub_annotation_count = 0;
78422944501Smrg
78522944501Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
78622944501Smrg
78722944501Smrg	DBG("bo_create: buf %d (%s) %ldb\n",
78822944501Smrg	    bo_gem->gem_handle, bo_gem->name, size);
78922944501Smrg
79022944501Smrg	return &bo_gem->bo;
79122944501Smrg}
79222944501Smrg
79322944501Smrgstatic drm_intel_bo *
79422944501Smrgdrm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
79522944501Smrg				  const char *name,
79622944501Smrg				  unsigned long size,
79722944501Smrg				  unsigned int alignment)
79822944501Smrg{
79922944501Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
8006d98c517Smrg					       BO_ALLOC_FOR_RENDER,
8016d98c517Smrg					       I915_TILING_NONE, 0);
80222944501Smrg}
80322944501Smrg
80422944501Smrgstatic drm_intel_bo *
80522944501Smrgdrm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
80622944501Smrg		       const char *name,
80722944501Smrg		       unsigned long size,
80822944501Smrg		       unsigned int alignment)
80922944501Smrg{
8106d98c517Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
8116d98c517Smrg					       I915_TILING_NONE, 0);
81222944501Smrg}
81322944501Smrg
81422944501Smrgstatic drm_intel_bo *
81522944501Smrgdrm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
81622944501Smrg			     int x, int y, int cpp, uint32_t *tiling_mode,
81722944501Smrg			     unsigned long *pitch, unsigned long flags)
81822944501Smrg{
81922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
820aaba2545Smrg	unsigned long size, stride;
821aaba2545Smrg	uint32_t tiling;
82222944501Smrg
823aaba2545Smrg	do {
82420131375Smrg		unsigned long aligned_y, height_alignment;
825aaba2545Smrg
826aaba2545Smrg		tiling = *tiling_mode;
827aaba2545Smrg
828aaba2545Smrg		/* If we're tiled, our allocations are in 8 or 32-row blocks,
829aaba2545Smrg		 * so failure to align our height means that we won't allocate
830aaba2545Smrg		 * enough pages.
831aaba2545Smrg		 *
832aaba2545Smrg		 * If we're untiled, we still have to align to 2 rows high
833aaba2545Smrg		 * because the data port accesses 2x2 blocks even if the
834aaba2545Smrg		 * bottom row isn't to be rendered, so failure to align means
835aaba2545Smrg		 * we could walk off the end of the GTT and fault.  This is
836aaba2545Smrg		 * documented on 965, and may be the case on older chipsets
837aaba2545Smrg		 * too so we try to be careful.
838aaba2545Smrg		 */
839aaba2545Smrg		aligned_y = y;
84020131375Smrg		height_alignment = 2;
84120131375Smrg
84220131375Smrg		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
84320131375Smrg			height_alignment = 16;
84420131375Smrg		else if (tiling == I915_TILING_X
84520131375Smrg			|| (IS_915(bufmgr_gem->pci_device)
84620131375Smrg			    && tiling == I915_TILING_Y))
84720131375Smrg			height_alignment = 8;
848aaba2545Smrg		else if (tiling == I915_TILING_Y)
84920131375Smrg			height_alignment = 32;
85020131375Smrg		aligned_y = ALIGN(y, height_alignment);
851aaba2545Smrg
852aaba2545Smrg		stride = x * cpp;
8536d98c517Smrg		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
854aaba2545Smrg		size = stride * aligned_y;
855aaba2545Smrg		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
856aaba2545Smrg	} while (*tiling_mode != tiling);
85722944501Smrg	*pitch = stride;
85822944501Smrg
8596d98c517Smrg	if (tiling == I915_TILING_NONE)
8606d98c517Smrg		stride = 0;
8616d98c517Smrg
8626d98c517Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
8636d98c517Smrg					       tiling, stride);
86422944501Smrg}
86522944501Smrg
866a884aba1Smrgstatic drm_intel_bo *
867a884aba1Smrgdrm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
868a884aba1Smrg				const char *name,
869a884aba1Smrg				void *addr,
870a884aba1Smrg				uint32_t tiling_mode,
871a884aba1Smrg				uint32_t stride,
872a884aba1Smrg				unsigned long size,
873a884aba1Smrg				unsigned long flags)
874a884aba1Smrg{
875a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
876a884aba1Smrg	drm_intel_bo_gem *bo_gem;
877a884aba1Smrg	int ret;
878a884aba1Smrg	struct drm_i915_gem_userptr userptr;
879a884aba1Smrg
880a884aba1Smrg	/* Tiling with userptr surfaces is not supported
881a884aba1Smrg	 * on all hardware so refuse it for time being.
882a884aba1Smrg	 */
883a884aba1Smrg	if (tiling_mode != I915_TILING_NONE)
884a884aba1Smrg		return NULL;
885a884aba1Smrg
886a884aba1Smrg	bo_gem = calloc(1, sizeof(*bo_gem));
887a884aba1Smrg	if (!bo_gem)
888a884aba1Smrg		return NULL;
889a884aba1Smrg
890a884aba1Smrg	bo_gem->bo.size = size;
891a884aba1Smrg
892a884aba1Smrg	VG_CLEAR(userptr);
893a884aba1Smrg	userptr.user_ptr = (__u64)((unsigned long)addr);
894a884aba1Smrg	userptr.user_size = size;
895a884aba1Smrg	userptr.flags = flags;
896a884aba1Smrg
897a884aba1Smrg	ret = drmIoctl(bufmgr_gem->fd,
898a884aba1Smrg			DRM_IOCTL_I915_GEM_USERPTR,
899a884aba1Smrg			&userptr);
900a884aba1Smrg	if (ret != 0) {
901a884aba1Smrg		DBG("bo_create_userptr: "
902a884aba1Smrg		    "ioctl failed with user ptr %p size 0x%lx, "
903a884aba1Smrg		    "user flags 0x%lx\n", addr, size, flags);
904a884aba1Smrg		free(bo_gem);
905a884aba1Smrg		return NULL;
906a884aba1Smrg	}
907a884aba1Smrg
908a884aba1Smrg	bo_gem->gem_handle = userptr.handle;
909a884aba1Smrg	bo_gem->bo.handle = bo_gem->gem_handle;
910a884aba1Smrg	bo_gem->bo.bufmgr    = bufmgr;
911a884aba1Smrg	bo_gem->is_userptr   = true;
912a884aba1Smrg	bo_gem->bo.virtual   = addr;
913a884aba1Smrg	/* Save the address provided by user */
914a884aba1Smrg	bo_gem->user_virtual = addr;
915a884aba1Smrg	bo_gem->tiling_mode  = I915_TILING_NONE;
916a884aba1Smrg	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
917a884aba1Smrg	bo_gem->stride       = 0;
918a884aba1Smrg
919a884aba1Smrg	DRMINITLISTHEAD(&bo_gem->name_list);
920a884aba1Smrg	DRMINITLISTHEAD(&bo_gem->vma_list);
921a884aba1Smrg
922a884aba1Smrg	bo_gem->name = name;
923a884aba1Smrg	atomic_set(&bo_gem->refcount, 1);
924a884aba1Smrg	bo_gem->validate_index = -1;
925a884aba1Smrg	bo_gem->reloc_tree_fences = 0;
926a884aba1Smrg	bo_gem->used_as_reloc_target = false;
927a884aba1Smrg	bo_gem->has_error = false;
928a884aba1Smrg	bo_gem->reusable = false;
929a884aba1Smrg
930a884aba1Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
931a884aba1Smrg
932a884aba1Smrg	DBG("bo_create_userptr: "
933a884aba1Smrg	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
934a884aba1Smrg		addr, bo_gem->gem_handle, bo_gem->name,
935a884aba1Smrg		size, stride, tiling_mode);
936a884aba1Smrg
937a884aba1Smrg	return &bo_gem->bo;
938a884aba1Smrg}
939a884aba1Smrg
94022944501Smrg/**
94122944501Smrg * Returns a drm_intel_bo wrapping the given buffer object handle.
94222944501Smrg *
94322944501Smrg * This can be used when one application needs to pass a buffer object
94422944501Smrg * to another.
94522944501Smrg */
946a884aba1Smrgdrm_public drm_intel_bo *
94722944501Smrgdrm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
94822944501Smrg				  const char *name,
94922944501Smrg				  unsigned int handle)
95022944501Smrg{
95122944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
95222944501Smrg	drm_intel_bo_gem *bo_gem;
95322944501Smrg	int ret;
95422944501Smrg	struct drm_gem_open open_arg;
95522944501Smrg	struct drm_i915_gem_get_tiling get_tiling;
95620131375Smrg	drmMMListHead *list;
95722944501Smrg
95820131375Smrg	/* At the moment most applications only have a few named bo.
95920131375Smrg	 * For instance, in a DRI client only the render buffers passed
96020131375Smrg	 * between X and the client are named. And since X returns the
96120131375Smrg	 * alternating names for the front/back buffer a linear search
96220131375Smrg	 * provides a sufficiently fast match.
96320131375Smrg	 */
964a884aba1Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
96520131375Smrg	for (list = bufmgr_gem->named.next;
96620131375Smrg	     list != &bufmgr_gem->named;
96720131375Smrg	     list = list->next) {
96820131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
96920131375Smrg		if (bo_gem->global_name == handle) {
97020131375Smrg			drm_intel_gem_bo_reference(&bo_gem->bo);
971a884aba1Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
97220131375Smrg			return &bo_gem->bo;
97320131375Smrg		}
97420131375Smrg	}
97522944501Smrg
97620131375Smrg	VG_CLEAR(open_arg);
97722944501Smrg	open_arg.name = handle;
9786d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
9796d98c517Smrg		       DRM_IOCTL_GEM_OPEN,
9806d98c517Smrg		       &open_arg);
98122944501Smrg	if (ret != 0) {
9829ce4edccSmrg		DBG("Couldn't reference %s handle 0x%08x: %s\n",
9839ce4edccSmrg		    name, handle, strerror(errno));
984a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
98522944501Smrg		return NULL;
98622944501Smrg	}
98720131375Smrg        /* Now see if someone has used a prime handle to get this
98820131375Smrg         * object from the kernel before by looking through the list
98920131375Smrg         * again for a matching gem_handle
99020131375Smrg         */
99120131375Smrg	for (list = bufmgr_gem->named.next;
99220131375Smrg	     list != &bufmgr_gem->named;
99320131375Smrg	     list = list->next) {
99420131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
99520131375Smrg		if (bo_gem->gem_handle == open_arg.handle) {
99620131375Smrg			drm_intel_gem_bo_reference(&bo_gem->bo);
997a884aba1Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
99820131375Smrg			return &bo_gem->bo;
99920131375Smrg		}
100020131375Smrg	}
100120131375Smrg
100220131375Smrg	bo_gem = calloc(1, sizeof(*bo_gem));
1003a884aba1Smrg	if (!bo_gem) {
1004a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
100520131375Smrg		return NULL;
1006a884aba1Smrg	}
100720131375Smrg
100822944501Smrg	bo_gem->bo.size = open_arg.size;
100922944501Smrg	bo_gem->bo.offset = 0;
101020131375Smrg	bo_gem->bo.offset64 = 0;
101122944501Smrg	bo_gem->bo.virtual = NULL;
101222944501Smrg	bo_gem->bo.bufmgr = bufmgr;
101322944501Smrg	bo_gem->name = name;
101422944501Smrg	atomic_set(&bo_gem->refcount, 1);
101522944501Smrg	bo_gem->validate_index = -1;
101622944501Smrg	bo_gem->gem_handle = open_arg.handle;
101720131375Smrg	bo_gem->bo.handle = open_arg.handle;
101822944501Smrg	bo_gem->global_name = handle;
101920131375Smrg	bo_gem->reusable = false;
102022944501Smrg
102120131375Smrg	VG_CLEAR(get_tiling);
102222944501Smrg	get_tiling.handle = bo_gem->gem_handle;
10236d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
10246d98c517Smrg		       DRM_IOCTL_I915_GEM_GET_TILING,
10256d98c517Smrg		       &get_tiling);
102622944501Smrg	if (ret != 0) {
102722944501Smrg		drm_intel_gem_bo_unreference(&bo_gem->bo);
1028a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
102922944501Smrg		return NULL;
103022944501Smrg	}
103122944501Smrg	bo_gem->tiling_mode = get_tiling.tiling_mode;
103222944501Smrg	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
10336d98c517Smrg	/* XXX stride is unknown */
103422944501Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
103522944501Smrg
103620131375Smrg	DRMINITLISTHEAD(&bo_gem->vma_list);
103720131375Smrg	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
1038a884aba1Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
103922944501Smrg	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
104022944501Smrg
104122944501Smrg	return &bo_gem->bo;
104222944501Smrg}
104322944501Smrg
104422944501Smrgstatic void
104522944501Smrgdrm_intel_gem_bo_free(drm_intel_bo *bo)
104622944501Smrg{
104722944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
104822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
104922944501Smrg	struct drm_gem_close close;
105022944501Smrg	int ret;
105122944501Smrg
105220131375Smrg	DRMLISTDEL(&bo_gem->vma_list);
105320131375Smrg	if (bo_gem->mem_virtual) {
105420131375Smrg		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1055a884aba1Smrg		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
105620131375Smrg		bufmgr_gem->vma_count--;
105720131375Smrg	}
105820131375Smrg	if (bo_gem->gtt_virtual) {
1059a884aba1Smrg		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
106020131375Smrg		bufmgr_gem->vma_count--;
106120131375Smrg	}
106222944501Smrg
106322944501Smrg	/* Close this object */
106420131375Smrg	VG_CLEAR(close);
106522944501Smrg	close.handle = bo_gem->gem_handle;
10666d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
106722944501Smrg	if (ret != 0) {
10689ce4edccSmrg		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
10699ce4edccSmrg		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
107022944501Smrg	}
107120131375Smrg	free(bo_gem->aub_annotations);
107222944501Smrg	free(bo);
107322944501Smrg}
107422944501Smrg
107520131375Smrgstatic void
107620131375Smrgdrm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
107720131375Smrg{
107820131375Smrg#if HAVE_VALGRIND
107920131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
108020131375Smrg
108120131375Smrg	if (bo_gem->mem_virtual)
108220131375Smrg		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
108320131375Smrg
108420131375Smrg	if (bo_gem->gtt_virtual)
108520131375Smrg		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
108620131375Smrg#endif
108720131375Smrg}
108820131375Smrg
108922944501Smrg/** Frees all cached buffers significantly older than @time. */
109022944501Smrgstatic void
109122944501Smrgdrm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
109222944501Smrg{
109322944501Smrg	int i;
109422944501Smrg
10956d98c517Smrg	if (bufmgr_gem->time == time)
10966d98c517Smrg		return;
10976d98c517Smrg
1098aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
109922944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
110022944501Smrg		    &bufmgr_gem->cache_bucket[i];
110122944501Smrg
110222944501Smrg		while (!DRMLISTEMPTY(&bucket->head)) {
110322944501Smrg			drm_intel_bo_gem *bo_gem;
110422944501Smrg
110522944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
110622944501Smrg					      bucket->head.next, head);
110722944501Smrg			if (time - bo_gem->free_time <= 1)
110822944501Smrg				break;
110922944501Smrg
111022944501Smrg			DRMLISTDEL(&bo_gem->head);
111122944501Smrg
111222944501Smrg			drm_intel_gem_bo_free(&bo_gem->bo);
111322944501Smrg		}
111422944501Smrg	}
11156d98c517Smrg
11166d98c517Smrg	bufmgr_gem->time = time;
111722944501Smrg}
111822944501Smrg
111920131375Smrgstatic void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
112020131375Smrg{
112120131375Smrg	int limit;
112220131375Smrg
112320131375Smrg	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
112420131375Smrg	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
112520131375Smrg
112620131375Smrg	if (bufmgr_gem->vma_max < 0)
112720131375Smrg		return;
112820131375Smrg
112920131375Smrg	/* We may need to evict a few entries in order to create new mmaps */
113020131375Smrg	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
113120131375Smrg	if (limit < 0)
113220131375Smrg		limit = 0;
113320131375Smrg
113420131375Smrg	while (bufmgr_gem->vma_count > limit) {
113520131375Smrg		drm_intel_bo_gem *bo_gem;
113620131375Smrg
113720131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
113820131375Smrg				      bufmgr_gem->vma_cache.next,
113920131375Smrg				      vma_list);
114020131375Smrg		assert(bo_gem->map_count == 0);
114120131375Smrg		DRMLISTDELINIT(&bo_gem->vma_list);
114220131375Smrg
114320131375Smrg		if (bo_gem->mem_virtual) {
1144a884aba1Smrg			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
114520131375Smrg			bo_gem->mem_virtual = NULL;
114620131375Smrg			bufmgr_gem->vma_count--;
114720131375Smrg		}
114820131375Smrg		if (bo_gem->gtt_virtual) {
1149a884aba1Smrg			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
115020131375Smrg			bo_gem->gtt_virtual = NULL;
115120131375Smrg			bufmgr_gem->vma_count--;
115220131375Smrg		}
115320131375Smrg	}
115420131375Smrg}
115520131375Smrg
115620131375Smrgstatic void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
115720131375Smrg				       drm_intel_bo_gem *bo_gem)
115820131375Smrg{
115920131375Smrg	bufmgr_gem->vma_open--;
116020131375Smrg	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
116120131375Smrg	if (bo_gem->mem_virtual)
116220131375Smrg		bufmgr_gem->vma_count++;
116320131375Smrg	if (bo_gem->gtt_virtual)
116420131375Smrg		bufmgr_gem->vma_count++;
116520131375Smrg	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
116620131375Smrg}
116720131375Smrg
116820131375Smrgstatic void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
116920131375Smrg				      drm_intel_bo_gem *bo_gem)
117020131375Smrg{
117120131375Smrg	bufmgr_gem->vma_open++;
117220131375Smrg	DRMLISTDEL(&bo_gem->vma_list);
117320131375Smrg	if (bo_gem->mem_virtual)
117420131375Smrg		bufmgr_gem->vma_count--;
117520131375Smrg	if (bo_gem->gtt_virtual)
117620131375Smrg		bufmgr_gem->vma_count--;
117720131375Smrg	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
117820131375Smrg}
117920131375Smrg
118022944501Smrgstatic void
118122944501Smrgdrm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
118222944501Smrg{
118322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
118422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
118522944501Smrg	struct drm_intel_gem_bo_bucket *bucket;
118622944501Smrg	int i;
118722944501Smrg
118822944501Smrg	/* Unreference all the target buffers */
118922944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
1190aaba2545Smrg		if (bo_gem->reloc_target_info[i].bo != bo) {
1191aaba2545Smrg			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1192aaba2545Smrg								  reloc_target_info[i].bo,
1193aaba2545Smrg								  time);
1194aaba2545Smrg		}
119522944501Smrg	}
119622944501Smrg	bo_gem->reloc_count = 0;
119720131375Smrg	bo_gem->used_as_reloc_target = false;
119822944501Smrg
119922944501Smrg	DBG("bo_unreference final: %d (%s)\n",
120022944501Smrg	    bo_gem->gem_handle, bo_gem->name);
120122944501Smrg
120222944501Smrg	/* release memory associated with this object */
120322944501Smrg	if (bo_gem->reloc_target_info) {
120422944501Smrg		free(bo_gem->reloc_target_info);
120522944501Smrg		bo_gem->reloc_target_info = NULL;
120622944501Smrg	}
120722944501Smrg	if (bo_gem->relocs) {
120822944501Smrg		free(bo_gem->relocs);
120922944501Smrg		bo_gem->relocs = NULL;
121022944501Smrg	}
121122944501Smrg
121220131375Smrg	/* Clear any left-over mappings */
121320131375Smrg	if (bo_gem->map_count) {
121420131375Smrg		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
121520131375Smrg		bo_gem->map_count = 0;
121620131375Smrg		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
121720131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
121820131375Smrg	}
121920131375Smrg
122020131375Smrg	DRMLISTDEL(&bo_gem->name_list);
122120131375Smrg
122222944501Smrg	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
122322944501Smrg	/* Put the buffer into our internal cache for reuse if we can. */
122422944501Smrg	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
122522944501Smrg	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
122622944501Smrg					      I915_MADV_DONTNEED)) {
122722944501Smrg		bo_gem->free_time = time;
122822944501Smrg
122922944501Smrg		bo_gem->name = NULL;
123022944501Smrg		bo_gem->validate_index = -1;
123122944501Smrg
123222944501Smrg		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
123322944501Smrg	} else {
123422944501Smrg		drm_intel_gem_bo_free(bo);
123522944501Smrg	}
123622944501Smrg}
123722944501Smrg
123822944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
123922944501Smrg						      time_t time)
124022944501Smrg{
124122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
124222944501Smrg
124322944501Smrg	assert(atomic_read(&bo_gem->refcount) > 0);
124422944501Smrg	if (atomic_dec_and_test(&bo_gem->refcount))
124522944501Smrg		drm_intel_gem_bo_unreference_final(bo, time);
124622944501Smrg}
124722944501Smrg
124822944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
124922944501Smrg{
125022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
125122944501Smrg
125222944501Smrg	assert(atomic_read(&bo_gem->refcount) > 0);
1253a884aba1Smrg
1254a884aba1Smrg	if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
125522944501Smrg		drm_intel_bufmgr_gem *bufmgr_gem =
125622944501Smrg		    (drm_intel_bufmgr_gem *) bo->bufmgr;
125722944501Smrg		struct timespec time;
125822944501Smrg
125922944501Smrg		clock_gettime(CLOCK_MONOTONIC, &time);
126022944501Smrg
126122944501Smrg		pthread_mutex_lock(&bufmgr_gem->lock);
1262a884aba1Smrg
1263a884aba1Smrg		if (atomic_dec_and_test(&bo_gem->refcount)) {
1264a884aba1Smrg			drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
1265a884aba1Smrg			drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1266a884aba1Smrg		}
1267a884aba1Smrg
126822944501Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
126922944501Smrg	}
127022944501Smrg}
127122944501Smrg
127222944501Smrgstatic int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
127322944501Smrg{
127422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
127522944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
127622944501Smrg	struct drm_i915_gem_set_domain set_domain;
127722944501Smrg	int ret;
127822944501Smrg
1279a884aba1Smrg	if (bo_gem->is_userptr) {
1280a884aba1Smrg		/* Return the same user ptr */
1281a884aba1Smrg		bo->virtual = bo_gem->user_virtual;
1282a884aba1Smrg		return 0;
1283a884aba1Smrg	}
1284a884aba1Smrg
128522944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
128622944501Smrg
128720131375Smrg	if (bo_gem->map_count++ == 0)
128820131375Smrg		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
128920131375Smrg
129022944501Smrg	if (!bo_gem->mem_virtual) {
129122944501Smrg		struct drm_i915_gem_mmap mmap_arg;
129222944501Smrg
129320131375Smrg		DBG("bo_map: %d (%s), map_count=%d\n",
129420131375Smrg		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
129522944501Smrg
129620131375Smrg		VG_CLEAR(mmap_arg);
129722944501Smrg		mmap_arg.handle = bo_gem->gem_handle;
129822944501Smrg		mmap_arg.offset = 0;
129922944501Smrg		mmap_arg.size = bo->size;
13006d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd,
13016d98c517Smrg			       DRM_IOCTL_I915_GEM_MMAP,
13026d98c517Smrg			       &mmap_arg);
130322944501Smrg		if (ret != 0) {
130422944501Smrg			ret = -errno;
13059ce4edccSmrg			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
13069ce4edccSmrg			    __FILE__, __LINE__, bo_gem->gem_handle,
13079ce4edccSmrg			    bo_gem->name, strerror(errno));
130820131375Smrg			if (--bo_gem->map_count == 0)
130920131375Smrg				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
131022944501Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
131122944501Smrg			return ret;
131222944501Smrg		}
131320131375Smrg		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
131422944501Smrg		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
131522944501Smrg	}
131622944501Smrg	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
131722944501Smrg	    bo_gem->mem_virtual);
131822944501Smrg	bo->virtual = bo_gem->mem_virtual;
131922944501Smrg
132020131375Smrg	VG_CLEAR(set_domain);
132122944501Smrg	set_domain.handle = bo_gem->gem_handle;
132222944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
132322944501Smrg	if (write_enable)
132422944501Smrg		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
132522944501Smrg	else
132622944501Smrg		set_domain.write_domain = 0;
13276d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
13286d98c517Smrg		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
13296d98c517Smrg		       &set_domain);
133022944501Smrg	if (ret != 0) {
13319ce4edccSmrg		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
13329ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle,
13339ce4edccSmrg		    strerror(errno));
133422944501Smrg	}
133522944501Smrg
133620131375Smrg	if (write_enable)
133720131375Smrg		bo_gem->mapped_cpu_write = true;
133820131375Smrg
133920131375Smrg	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
134020131375Smrg	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
134122944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
134222944501Smrg
134322944501Smrg	return 0;
134422944501Smrg}
134522944501Smrg
134620131375Smrgstatic int
134720131375Smrgmap_gtt(drm_intel_bo *bo)
134822944501Smrg{
134922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
135022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
135122944501Smrg	int ret;
135222944501Smrg
1353a884aba1Smrg	if (bo_gem->is_userptr)
1354a884aba1Smrg		return -EINVAL;
1355a884aba1Smrg
135620131375Smrg	if (bo_gem->map_count++ == 0)
135720131375Smrg		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
135822944501Smrg
135922944501Smrg	/* Get a mapping of the buffer if we haven't before. */
136022944501Smrg	if (bo_gem->gtt_virtual == NULL) {
136122944501Smrg		struct drm_i915_gem_mmap_gtt mmap_arg;
136222944501Smrg
136320131375Smrg		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
136420131375Smrg		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
136522944501Smrg
136620131375Smrg		VG_CLEAR(mmap_arg);
136722944501Smrg		mmap_arg.handle = bo_gem->gem_handle;
136822944501Smrg
136922944501Smrg		/* Get the fake offset back... */
13706d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd,
13716d98c517Smrg			       DRM_IOCTL_I915_GEM_MMAP_GTT,
13726d98c517Smrg			       &mmap_arg);
137322944501Smrg		if (ret != 0) {
137422944501Smrg			ret = -errno;
13759ce4edccSmrg			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
13769ce4edccSmrg			    __FILE__, __LINE__,
13779ce4edccSmrg			    bo_gem->gem_handle, bo_gem->name,
13789ce4edccSmrg			    strerror(errno));
137920131375Smrg			if (--bo_gem->map_count == 0)
138020131375Smrg				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
138122944501Smrg			return ret;
138222944501Smrg		}
138322944501Smrg
138422944501Smrg		/* and mmap it */
1385aec75c42Sriastradh		ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size,
1386aec75c42Sriastradh		    &bo_gem->gtt_virtual);
1387aec75c42Sriastradh		if (ret) {
138822944501Smrg			bo_gem->gtt_virtual = NULL;
13899ce4edccSmrg			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
13909ce4edccSmrg			    __FILE__, __LINE__,
13919ce4edccSmrg			    bo_gem->gem_handle, bo_gem->name,
13929ce4edccSmrg			    strerror(errno));
139320131375Smrg			if (--bo_gem->map_count == 0)
139420131375Smrg				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
139522944501Smrg			return ret;
139622944501Smrg		}
139722944501Smrg	}
139822944501Smrg
139922944501Smrg	bo->virtual = bo_gem->gtt_virtual;
140022944501Smrg
140122944501Smrg	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
140222944501Smrg	    bo_gem->gtt_virtual);
140322944501Smrg
140420131375Smrg	return 0;
140520131375Smrg}
140620131375Smrg
1407a884aba1Smrgdrm_public int
1408a884aba1Smrgdrm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
140920131375Smrg{
141020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
141120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
141220131375Smrg	struct drm_i915_gem_set_domain set_domain;
141320131375Smrg	int ret;
141420131375Smrg
141520131375Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
141620131375Smrg
141720131375Smrg	ret = map_gtt(bo);
141820131375Smrg	if (ret) {
141920131375Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
142020131375Smrg		return ret;
142120131375Smrg	}
142220131375Smrg
142320131375Smrg	/* Now move it to the GTT domain so that the GPU and CPU
142420131375Smrg	 * caches are flushed and the GPU isn't actively using the
142520131375Smrg	 * buffer.
142620131375Smrg	 *
142720131375Smrg	 * The pagefault handler does this domain change for us when
142820131375Smrg	 * it has unbound the BO from the GTT, but it's up to us to
142920131375Smrg	 * tell it when we're about to use things if we had done
143020131375Smrg	 * rendering and it still happens to be bound to the GTT.
143120131375Smrg	 */
143220131375Smrg	VG_CLEAR(set_domain);
143322944501Smrg	set_domain.handle = bo_gem->gem_handle;
143422944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
143522944501Smrg	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
14366d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
14376d98c517Smrg		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
14386d98c517Smrg		       &set_domain);
143922944501Smrg	if (ret != 0) {
14409ce4edccSmrg		DBG("%s:%d: Error setting domain %d: %s\n",
14419ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle,
14429ce4edccSmrg		    strerror(errno));
144322944501Smrg	}
144422944501Smrg
144520131375Smrg	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
144620131375Smrg	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
144722944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
144822944501Smrg
14496d98c517Smrg	return 0;
145022944501Smrg}
145122944501Smrg
145220131375Smrg/**
145320131375Smrg * Performs a mapping of the buffer object like the normal GTT
145420131375Smrg * mapping, but avoids waiting for the GPU to be done reading from or
145520131375Smrg * rendering to the buffer.
145620131375Smrg *
145720131375Smrg * This is used in the implementation of GL_ARB_map_buffer_range: The
145820131375Smrg * user asks to create a buffer, then does a mapping, fills some
145920131375Smrg * space, runs a drawing command, then asks to map it again without
146020131375Smrg * synchronizing because it guarantees that it won't write over the
146120131375Smrg * data that the GPU is busy using (or, more specifically, that if it
146220131375Smrg * does write over the data, it acknowledges that rendering is
146320131375Smrg * undefined).
146420131375Smrg */
146520131375Smrg
1466a884aba1Smrgdrm_public int
1467a884aba1Smrgdrm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
146822944501Smrg{
146922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
147020131375Smrg#ifdef HAVE_VALGRIND
147120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
147220131375Smrg#endif
147320131375Smrg	int ret;
147422944501Smrg
147520131375Smrg	/* If the CPU cache isn't coherent with the GTT, then use a
147620131375Smrg	 * regular synchronized mapping.  The problem is that we don't
147720131375Smrg	 * track where the buffer was last used on the CPU side in
147820131375Smrg	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
147920131375Smrg	 * we would potentially corrupt the buffer even when the user
148020131375Smrg	 * does reasonable things.
148120131375Smrg	 */
148220131375Smrg	if (!bufmgr_gem->has_llc)
148320131375Smrg		return drm_intel_gem_bo_map_gtt(bo);
148422944501Smrg
148522944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
148620131375Smrg
148720131375Smrg	ret = map_gtt(bo);
148820131375Smrg	if (ret == 0) {
148920131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
149020131375Smrg		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
149120131375Smrg	}
149220131375Smrg
149322944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
149422944501Smrg
149522944501Smrg	return ret;
149622944501Smrg}
149722944501Smrg
149822944501Smrgstatic int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
149922944501Smrg{
1500a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
150122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
150220131375Smrg	int ret = 0;
150322944501Smrg
150422944501Smrg	if (bo == NULL)
150522944501Smrg		return 0;
150622944501Smrg
1507a884aba1Smrg	if (bo_gem->is_userptr)
1508a884aba1Smrg		return 0;
1509a884aba1Smrg
1510a884aba1Smrg	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1511a884aba1Smrg
151222944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
151322944501Smrg
151420131375Smrg	if (bo_gem->map_count <= 0) {
151520131375Smrg		DBG("attempted to unmap an unmapped bo\n");
151620131375Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
151720131375Smrg		/* Preserve the old behaviour of just treating this as a
151820131375Smrg		 * no-op rather than reporting the error.
151920131375Smrg		 */
152020131375Smrg		return 0;
152120131375Smrg	}
152220131375Smrg
152320131375Smrg	if (bo_gem->mapped_cpu_write) {
152420131375Smrg		struct drm_i915_gem_sw_finish sw_finish;
152520131375Smrg
152620131375Smrg		/* Cause a flush to happen if the buffer's pinned for
152720131375Smrg		 * scanout, so the results show up in a timely manner.
152820131375Smrg		 * Unlike GTT set domains, this only does work if the
152920131375Smrg		 * buffer should be scanout-related.
153020131375Smrg		 */
153120131375Smrg		VG_CLEAR(sw_finish);
153220131375Smrg		sw_finish.handle = bo_gem->gem_handle;
153320131375Smrg		ret = drmIoctl(bufmgr_gem->fd,
153420131375Smrg			       DRM_IOCTL_I915_GEM_SW_FINISH,
153520131375Smrg			       &sw_finish);
153620131375Smrg		ret = ret == -1 ? -errno : 0;
153720131375Smrg
153820131375Smrg		bo_gem->mapped_cpu_write = false;
153920131375Smrg	}
154022944501Smrg
154120131375Smrg	/* We need to unmap after every innovation as we cannot track
154220131375Smrg	 * an open vma for every bo as that will exhaasut the system
154320131375Smrg	 * limits and cause later failures.
154420131375Smrg	 */
154520131375Smrg	if (--bo_gem->map_count == 0) {
154620131375Smrg		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
154720131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
154820131375Smrg		bo->virtual = NULL;
154920131375Smrg	}
155022944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
155122944501Smrg
155222944501Smrg	return ret;
155322944501Smrg}
155422944501Smrg
1555a884aba1Smrgdrm_public int
1556a884aba1Smrgdrm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
155720131375Smrg{
155820131375Smrg	return drm_intel_gem_bo_unmap(bo);
155920131375Smrg}
156020131375Smrg
156122944501Smrgstatic int
156222944501Smrgdrm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
156322944501Smrg			 unsigned long size, const void *data)
156422944501Smrg{
156522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
156622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
156722944501Smrg	struct drm_i915_gem_pwrite pwrite;
156822944501Smrg	int ret;
156922944501Smrg
1570a884aba1Smrg	if (bo_gem->is_userptr)
1571a884aba1Smrg		return -EINVAL;
1572a884aba1Smrg
157320131375Smrg	VG_CLEAR(pwrite);
157422944501Smrg	pwrite.handle = bo_gem->gem_handle;
157522944501Smrg	pwrite.offset = offset;
157622944501Smrg	pwrite.size = size;
157722944501Smrg	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
15786d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
15796d98c517Smrg		       DRM_IOCTL_I915_GEM_PWRITE,
15806d98c517Smrg		       &pwrite);
158122944501Smrg	if (ret != 0) {
158222944501Smrg		ret = -errno;
15839ce4edccSmrg		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
15849ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
15859ce4edccSmrg		    (int)size, strerror(errno));
158622944501Smrg	}
158722944501Smrg
158822944501Smrg	return ret;
158922944501Smrg}
159022944501Smrg
159122944501Smrgstatic int
159222944501Smrgdrm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
159322944501Smrg{
159422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
159522944501Smrg	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
159622944501Smrg	int ret;
159722944501Smrg
159820131375Smrg	VG_CLEAR(get_pipe_from_crtc_id);
159922944501Smrg	get_pipe_from_crtc_id.crtc_id = crtc_id;
16006d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
16016d98c517Smrg		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
16026d98c517Smrg		       &get_pipe_from_crtc_id);
160322944501Smrg	if (ret != 0) {
160422944501Smrg		/* We return -1 here to signal that we don't
160522944501Smrg		 * know which pipe is associated with this crtc.
160622944501Smrg		 * This lets the caller know that this information
160722944501Smrg		 * isn't available; using the wrong pipe for
160822944501Smrg		 * vblank waiting can cause the chipset to lock up
160922944501Smrg		 */
161022944501Smrg		return -1;
161122944501Smrg	}
161222944501Smrg
161322944501Smrg	return get_pipe_from_crtc_id.pipe;
161422944501Smrg}
161522944501Smrg
161622944501Smrgstatic int
161722944501Smrgdrm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
161822944501Smrg			     unsigned long size, void *data)
161922944501Smrg{
162022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
162122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
162222944501Smrg	struct drm_i915_gem_pread pread;
162322944501Smrg	int ret;
162422944501Smrg
1625a884aba1Smrg	if (bo_gem->is_userptr)
1626a884aba1Smrg		return -EINVAL;
1627a884aba1Smrg
162820131375Smrg	VG_CLEAR(pread);
162922944501Smrg	pread.handle = bo_gem->gem_handle;
163022944501Smrg	pread.offset = offset;
163122944501Smrg	pread.size = size;
163222944501Smrg	pread.data_ptr = (uint64_t) (uintptr_t) data;
16336d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
16346d98c517Smrg		       DRM_IOCTL_I915_GEM_PREAD,
16356d98c517Smrg		       &pread);
163622944501Smrg	if (ret != 0) {
163722944501Smrg		ret = -errno;
16389ce4edccSmrg		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
16399ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
16409ce4edccSmrg		    (int)size, strerror(errno));
164122944501Smrg	}
164222944501Smrg
164322944501Smrg	return ret;
164422944501Smrg}
164522944501Smrg
16469ce4edccSmrg/** Waits for all GPU rendering with the object to have completed. */
164722944501Smrgstatic void
164822944501Smrgdrm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
164922944501Smrg{
16509ce4edccSmrg	drm_intel_gem_bo_start_gtt_access(bo, 1);
165122944501Smrg}
165222944501Smrg
165320131375Smrg/**
165420131375Smrg * Waits on a BO for the given amount of time.
165520131375Smrg *
165620131375Smrg * @bo: buffer object to wait for
165720131375Smrg * @timeout_ns: amount of time to wait in nanoseconds.
165820131375Smrg *   If value is less than 0, an infinite wait will occur.
165920131375Smrg *
166020131375Smrg * Returns 0 if the wait was successful ie. the last batch referencing the
166120131375Smrg * object has completed within the allotted time. Otherwise some negative return
166220131375Smrg * value describes the error. Of particular interest is -ETIME when the wait has
166320131375Smrg * failed to yield the desired result.
166420131375Smrg *
166520131375Smrg * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
166620131375Smrg * the operation to give up after a certain amount of time. Another subtle
166720131375Smrg * difference is the internal locking semantics are different (this variant does
166820131375Smrg * not hold the lock for the duration of the wait). This makes the wait subject
166920131375Smrg * to a larger userspace race window.
167020131375Smrg *
167120131375Smrg * The implementation shall wait until the object is no longer actively
167220131375Smrg * referenced within a batch buffer at the time of the call. The wait will
167320131375Smrg * not guarantee that the buffer is re-issued via another thread, or an flinked
167420131375Smrg * handle. Userspace must make sure this race does not occur if such precision
167520131375Smrg * is important.
167620131375Smrg */
1677a884aba1Smrgdrm_public int
1678a884aba1Smrgdrm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
167920131375Smrg{
168020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
168120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
168220131375Smrg	struct drm_i915_gem_wait wait;
168320131375Smrg	int ret;
168420131375Smrg
168520131375Smrg	if (!bufmgr_gem->has_wait_timeout) {
168620131375Smrg		DBG("%s:%d: Timed wait is not supported. Falling back to "
168720131375Smrg		    "infinite wait\n", __FILE__, __LINE__);
168820131375Smrg		if (timeout_ns) {
168920131375Smrg			drm_intel_gem_bo_wait_rendering(bo);
169020131375Smrg			return 0;
169120131375Smrg		} else {
169220131375Smrg			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
169320131375Smrg		}
169420131375Smrg	}
169520131375Smrg
169620131375Smrg	wait.bo_handle = bo_gem->gem_handle;
169720131375Smrg	wait.timeout_ns = timeout_ns;
169820131375Smrg	wait.flags = 0;
169920131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
170020131375Smrg	if (ret == -1)
170120131375Smrg		return -errno;
170220131375Smrg
170320131375Smrg	return ret;
170420131375Smrg}
170520131375Smrg
170622944501Smrg/**
170722944501Smrg * Sets the object to the GTT read and possibly write domain, used by the X
170822944501Smrg * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
170922944501Smrg *
171022944501Smrg * In combination with drm_intel_gem_bo_pin() and manual fence management, we
171122944501Smrg * can do tiled pixmaps this way.
171222944501Smrg */
1713a884aba1Smrgdrm_public void
171422944501Smrgdrm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
171522944501Smrg{
171622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
171722944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
171822944501Smrg	struct drm_i915_gem_set_domain set_domain;
171922944501Smrg	int ret;
172022944501Smrg
172120131375Smrg	VG_CLEAR(set_domain);
172222944501Smrg	set_domain.handle = bo_gem->gem_handle;
172322944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
172422944501Smrg	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
17256d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
17266d98c517Smrg		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
17276d98c517Smrg		       &set_domain);
172822944501Smrg	if (ret != 0) {
17299ce4edccSmrg		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
17309ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle,
17319ce4edccSmrg		    set_domain.read_domains, set_domain.write_domain,
17329ce4edccSmrg		    strerror(errno));
173322944501Smrg	}
173422944501Smrg}
173522944501Smrg
173622944501Smrgstatic void
173722944501Smrgdrm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
173822944501Smrg{
173922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
174022944501Smrg	int i;
174122944501Smrg
174222944501Smrg	free(bufmgr_gem->exec2_objects);
174322944501Smrg	free(bufmgr_gem->exec_objects);
174422944501Smrg	free(bufmgr_gem->exec_bos);
174520131375Smrg	free(bufmgr_gem->aub_filename);
174622944501Smrg
174722944501Smrg	pthread_mutex_destroy(&bufmgr_gem->lock);
174822944501Smrg
174922944501Smrg	/* Free any cached buffer objects we were going to reuse */
1750aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
175122944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
175222944501Smrg		    &bufmgr_gem->cache_bucket[i];
175322944501Smrg		drm_intel_bo_gem *bo_gem;
175422944501Smrg
175522944501Smrg		while (!DRMLISTEMPTY(&bucket->head)) {
175622944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
175722944501Smrg					      bucket->head.next, head);
175822944501Smrg			DRMLISTDEL(&bo_gem->head);
175922944501Smrg
176022944501Smrg			drm_intel_gem_bo_free(&bo_gem->bo);
176122944501Smrg		}
176222944501Smrg	}
176322944501Smrg
176422944501Smrg	free(bufmgr);
176522944501Smrg}
176622944501Smrg
176722944501Smrg/**
176822944501Smrg * Adds the target buffer to the validation list and adds the relocation
176922944501Smrg * to the reloc_buffer's relocation list.
177022944501Smrg *
177122944501Smrg * The relocation entry at the given offset must already contain the
177222944501Smrg * precomputed relocation value, because the kernel will optimize out
177322944501Smrg * the relocation entry write when the buffer hasn't moved from the
177422944501Smrg * last known offset in target_bo.
177522944501Smrg */
177622944501Smrgstatic int
177722944501Smrgdo_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
177822944501Smrg		 drm_intel_bo *target_bo, uint32_t target_offset,
177922944501Smrg		 uint32_t read_domains, uint32_t write_domain,
178020131375Smrg		 bool need_fence)
178122944501Smrg{
178222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
178322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
178422944501Smrg	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
178520131375Smrg	bool fenced_command;
178622944501Smrg
178722944501Smrg	if (bo_gem->has_error)
178822944501Smrg		return -ENOMEM;
178922944501Smrg
179022944501Smrg	if (target_bo_gem->has_error) {
179120131375Smrg		bo_gem->has_error = true;
179222944501Smrg		return -ENOMEM;
179322944501Smrg	}
179422944501Smrg
179522944501Smrg	/* We never use HW fences for rendering on 965+ */
179622944501Smrg	if (bufmgr_gem->gen >= 4)
179720131375Smrg		need_fence = false;
179822944501Smrg
17999ce4edccSmrg	fenced_command = need_fence;
18009ce4edccSmrg	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
180120131375Smrg		need_fence = false;
18029ce4edccSmrg
180322944501Smrg	/* Create a new relocation list if needed */
180422944501Smrg	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
180522944501Smrg		return -ENOMEM;
180622944501Smrg
180722944501Smrg	/* Check overflow */
180822944501Smrg	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
180922944501Smrg
181022944501Smrg	/* Check args */
181122944501Smrg	assert(offset <= bo->size - 4);
181222944501Smrg	assert((write_domain & (write_domain - 1)) == 0);
181322944501Smrg
18143c748557Ssnj	/* An object needing a fence is a tiled buffer, so it won't have
18153c748557Ssnj	 * relocs to other buffers.
18163c748557Ssnj	 */
18173c748557Ssnj	if (need_fence) {
18183c748557Ssnj		assert(target_bo_gem->reloc_count == 0);
18193c748557Ssnj		target_bo_gem->reloc_tree_fences = 1;
18203c748557Ssnj	}
18213c748557Ssnj
182222944501Smrg	/* Make sure that we're not adding a reloc to something whose size has
182322944501Smrg	 * already been accounted for.
182422944501Smrg	 */
182522944501Smrg	assert(!bo_gem->used_as_reloc_target);
1826aaba2545Smrg	if (target_bo_gem != bo_gem) {
182720131375Smrg		target_bo_gem->used_as_reloc_target = true;
1828aaba2545Smrg		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
18293c748557Ssnj		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
1830aaba2545Smrg	}
183122944501Smrg
183222944501Smrg	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
183322944501Smrg	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
183422944501Smrg	bo_gem->relocs[bo_gem->reloc_count].target_handle =
183522944501Smrg	    target_bo_gem->gem_handle;
183622944501Smrg	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
183722944501Smrg	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
183820131375Smrg	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
183922944501Smrg
184022944501Smrg	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1841aaba2545Smrg	if (target_bo != bo)
1842aaba2545Smrg		drm_intel_gem_bo_reference(target_bo);
18439ce4edccSmrg	if (fenced_command)
184422944501Smrg		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
184522944501Smrg			DRM_INTEL_RELOC_FENCE;
184622944501Smrg	else
184722944501Smrg		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
184822944501Smrg
184922944501Smrg	bo_gem->reloc_count++;
185022944501Smrg
185122944501Smrg	return 0;
185222944501Smrg}
185322944501Smrg
185422944501Smrgstatic int
185522944501Smrgdrm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
185622944501Smrg			    drm_intel_bo *target_bo, uint32_t target_offset,
185722944501Smrg			    uint32_t read_domains, uint32_t write_domain)
185822944501Smrg{
185922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
186022944501Smrg
186122944501Smrg	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
186222944501Smrg				read_domains, write_domain,
186322944501Smrg				!bufmgr_gem->fenced_relocs);
186422944501Smrg}
186522944501Smrg
186622944501Smrgstatic int
186722944501Smrgdrm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
186822944501Smrg				  drm_intel_bo *target_bo,
186922944501Smrg				  uint32_t target_offset,
187022944501Smrg				  uint32_t read_domains, uint32_t write_domain)
187122944501Smrg{
187222944501Smrg	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
187320131375Smrg				read_domains, write_domain, true);
187420131375Smrg}
187520131375Smrg
1876a884aba1Smrgdrm_public int
187720131375Smrgdrm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
187820131375Smrg{
187920131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
188020131375Smrg
188120131375Smrg	return bo_gem->reloc_count;
188220131375Smrg}
188320131375Smrg
188420131375Smrg/**
188520131375Smrg * Removes existing relocation entries in the BO after "start".
188620131375Smrg *
188720131375Smrg * This allows a user to avoid a two-step process for state setup with
188820131375Smrg * counting up all the buffer objects and doing a
188920131375Smrg * drm_intel_bufmgr_check_aperture_space() before emitting any of the
189020131375Smrg * relocations for the state setup.  Instead, save the state of the
189120131375Smrg * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
189220131375Smrg * state, and then check if it still fits in the aperture.
189320131375Smrg *
189420131375Smrg * Any further drm_intel_bufmgr_check_aperture_space() queries
189520131375Smrg * involving this buffer in the tree are undefined after this call.
189620131375Smrg */
1897a884aba1Smrgdrm_public void
189820131375Smrgdrm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
189920131375Smrg{
1900a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
190120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
190220131375Smrg	int i;
190320131375Smrg	struct timespec time;
190420131375Smrg
190520131375Smrg	clock_gettime(CLOCK_MONOTONIC, &time);
190620131375Smrg
190720131375Smrg	assert(bo_gem->reloc_count >= start);
1908a884aba1Smrg
190920131375Smrg	/* Unreference the cleared target buffers */
1910a884aba1Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
1911a884aba1Smrg
191220131375Smrg	for (i = start; i < bo_gem->reloc_count; i++) {
191320131375Smrg		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
191420131375Smrg		if (&target_bo_gem->bo != bo) {
191520131375Smrg			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
191620131375Smrg			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
191720131375Smrg								  time.tv_sec);
191820131375Smrg		}
191920131375Smrg	}
192020131375Smrg	bo_gem->reloc_count = start;
1921a884aba1Smrg
1922a884aba1Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
1923a884aba1Smrg
192422944501Smrg}
192522944501Smrg
192622944501Smrg/**
192722944501Smrg * Walk the tree of relocations rooted at BO and accumulate the list of
192822944501Smrg * validations to be performed and update the relocation buffers with
192922944501Smrg * index values into the validation list.
193022944501Smrg */
193122944501Smrgstatic void
193222944501Smrgdrm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
193322944501Smrg{
193422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
193522944501Smrg	int i;
193622944501Smrg
193722944501Smrg	if (bo_gem->relocs == NULL)
193822944501Smrg		return;
193922944501Smrg
194022944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
194122944501Smrg		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
194222944501Smrg
1943aaba2545Smrg		if (target_bo == bo)
1944aaba2545Smrg			continue;
1945aaba2545Smrg
194620131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
194720131375Smrg
194822944501Smrg		/* Continue walking the tree depth-first. */
194922944501Smrg		drm_intel_gem_bo_process_reloc(target_bo);
195022944501Smrg
195122944501Smrg		/* Add the target to the validate list */
195222944501Smrg		drm_intel_add_validate_buffer(target_bo);
195322944501Smrg	}
195422944501Smrg}
195522944501Smrg
195622944501Smrgstatic void
195722944501Smrgdrm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
195822944501Smrg{
195922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
196022944501Smrg	int i;
196122944501Smrg
196222944501Smrg	if (bo_gem->relocs == NULL)
196322944501Smrg		return;
196422944501Smrg
196522944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
196622944501Smrg		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
196722944501Smrg		int need_fence;
196822944501Smrg
1969aaba2545Smrg		if (target_bo == bo)
1970aaba2545Smrg			continue;
1971aaba2545Smrg
197220131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
197320131375Smrg
197422944501Smrg		/* Continue walking the tree depth-first. */
197522944501Smrg		drm_intel_gem_bo_process_reloc2(target_bo);
197622944501Smrg
197722944501Smrg		need_fence = (bo_gem->reloc_target_info[i].flags &
197822944501Smrg			      DRM_INTEL_RELOC_FENCE);
197922944501Smrg
198022944501Smrg		/* Add the target to the validate list */
198122944501Smrg		drm_intel_add_validate_buffer2(target_bo, need_fence);
198222944501Smrg	}
198322944501Smrg}
198422944501Smrg
198522944501Smrg
198622944501Smrgstatic void
198722944501Smrgdrm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
198822944501Smrg{
198922944501Smrg	int i;
199022944501Smrg
199122944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
199222944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
199322944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
199422944501Smrg
199522944501Smrg		/* Update the buffer offset */
199620131375Smrg		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
1997d82d45b3Sjoerg			DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n",
1998d82d45b3Sjoerg			    bo_gem->gem_handle, bo_gem->name,
1999d82d45b3Sjoerg			    (unsigned long long)bo->offset64,
200022944501Smrg			    (unsigned long long)bufmgr_gem->exec_objects[i].
200122944501Smrg			    offset);
200220131375Smrg			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
200322944501Smrg			bo->offset = bufmgr_gem->exec_objects[i].offset;
200422944501Smrg		}
200522944501Smrg	}
200622944501Smrg}
200722944501Smrg
200822944501Smrgstatic void
200922944501Smrgdrm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
201022944501Smrg{
201122944501Smrg	int i;
201222944501Smrg
201322944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
201422944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
201522944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
201622944501Smrg
201722944501Smrg		/* Update the buffer offset */
201820131375Smrg		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2019d82d45b3Sjoerg			DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n",
2020d82d45b3Sjoerg			    bo_gem->gem_handle, bo_gem->name,
2021d82d45b3Sjoerg			    (unsigned long long)bo->offset64,
202222944501Smrg			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
202320131375Smrg			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
202422944501Smrg			bo->offset = bufmgr_gem->exec2_objects[i].offset;
202522944501Smrg		}
202622944501Smrg	}
202722944501Smrg}
202822944501Smrg
202920131375Smrgstatic void
203020131375Smrgaub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
203120131375Smrg{
203220131375Smrg	fwrite(&data, 1, 4, bufmgr_gem->aub_file);
203320131375Smrg}
203420131375Smrg
203520131375Smrgstatic void
203620131375Smrgaub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
203720131375Smrg{
203820131375Smrg	fwrite(data, 1, size, bufmgr_gem->aub_file);
203920131375Smrg}
204020131375Smrg
204120131375Smrgstatic void
204220131375Smrgaub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
204322944501Smrg{
204422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
204522944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
204620131375Smrg	uint32_t *data;
204720131375Smrg	unsigned int i;
204822944501Smrg
204920131375Smrg	data = malloc(bo->size);
205020131375Smrg	drm_intel_bo_get_subdata(bo, offset, size, data);
205122944501Smrg
205220131375Smrg	/* Easy mode: write out bo with no relocations */
205320131375Smrg	if (!bo_gem->reloc_count) {
205420131375Smrg		aub_out_data(bufmgr_gem, data, size);
205520131375Smrg		free(data);
205620131375Smrg		return;
205720131375Smrg	}
205822944501Smrg
205920131375Smrg	/* Otherwise, handle the relocations while writing. */
206020131375Smrg	for (i = 0; i < size / 4; i++) {
206120131375Smrg		int r;
206220131375Smrg		for (r = 0; r < bo_gem->reloc_count; r++) {
206320131375Smrg			struct drm_i915_gem_relocation_entry *reloc;
206420131375Smrg			drm_intel_reloc_target *info;
206522944501Smrg
206620131375Smrg			reloc = &bo_gem->relocs[r];
206720131375Smrg			info = &bo_gem->reloc_target_info[r];
206822944501Smrg
206920131375Smrg			if (reloc->offset == offset + i * 4) {
207020131375Smrg				drm_intel_bo_gem *target_gem;
207120131375Smrg				uint32_t val;
207222944501Smrg
207320131375Smrg				target_gem = (drm_intel_bo_gem *)info->bo;
207422944501Smrg
207520131375Smrg				val = reloc->delta;
207620131375Smrg				val += target_gem->aub_offset;
207722944501Smrg
207820131375Smrg				aub_out(bufmgr_gem, val);
207920131375Smrg				data[i] = val;
208020131375Smrg				break;
208120131375Smrg			}
208220131375Smrg		}
208320131375Smrg		if (r == bo_gem->reloc_count) {
208420131375Smrg			/* no relocation, just the data */
208520131375Smrg			aub_out(bufmgr_gem, data[i]);
208620131375Smrg		}
208722944501Smrg	}
208822944501Smrg
208920131375Smrg	free(data);
209022944501Smrg}
209122944501Smrg
209220131375Smrgstatic void
209320131375Smrgaub_bo_get_address(drm_intel_bo *bo)
209422944501Smrg{
209520131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
209620131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
209722944501Smrg
209820131375Smrg	/* Give the object a graphics address in the AUB file.  We
209920131375Smrg	 * don't just use the GEM object address because we do AUB
210020131375Smrg	 * dumping before execution -- we want to successfully log
210120131375Smrg	 * when the hardware might hang, and we might even want to aub
210220131375Smrg	 * capture for a driver trying to execute on a different
210320131375Smrg	 * generation of hardware by disabling the actual kernel exec
210420131375Smrg	 * call.
210520131375Smrg	 */
210620131375Smrg	bo_gem->aub_offset = bufmgr_gem->aub_offset;
210720131375Smrg	bufmgr_gem->aub_offset += bo->size;
210820131375Smrg	/* XXX: Handle aperture overflow. */
210920131375Smrg	assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
211020131375Smrg}
211120131375Smrg
211220131375Smrgstatic void
211320131375Smrgaub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
211420131375Smrg		      uint32_t offset, uint32_t size)
211520131375Smrg{
211620131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
211720131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
211820131375Smrg
211920131375Smrg	aub_out(bufmgr_gem,
212020131375Smrg		CMD_AUB_TRACE_HEADER_BLOCK |
212120131375Smrg		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
212220131375Smrg	aub_out(bufmgr_gem,
212320131375Smrg		AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
212420131375Smrg	aub_out(bufmgr_gem, subtype);
212520131375Smrg	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
212620131375Smrg	aub_out(bufmgr_gem, size);
212720131375Smrg	if (bufmgr_gem->gen >= 8)
212820131375Smrg		aub_out(bufmgr_gem, 0);
212920131375Smrg	aub_write_bo_data(bo, offset, size);
213020131375Smrg}
213120131375Smrg
213220131375Smrg/**
213320131375Smrg * Break up large objects into multiple writes.  Otherwise a 128kb VBO
213420131375Smrg * would overflow the 16 bits of size field in the packet header and
213520131375Smrg * everything goes badly after that.
213620131375Smrg */
213720131375Smrgstatic void
213820131375Smrgaub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
213920131375Smrg			    uint32_t offset, uint32_t size)
214020131375Smrg{
214120131375Smrg	uint32_t block_size;
214220131375Smrg	uint32_t sub_offset;
214320131375Smrg
214420131375Smrg	for (sub_offset = 0; sub_offset < size; sub_offset += block_size) {
214520131375Smrg		block_size = size - sub_offset;
214620131375Smrg
214720131375Smrg		if (block_size > 8 * 4096)
214820131375Smrg			block_size = 8 * 4096;
214920131375Smrg
215020131375Smrg		aub_write_trace_block(bo, type, subtype, offset + sub_offset,
215120131375Smrg				      block_size);
215220131375Smrg	}
215320131375Smrg}
215420131375Smrg
215520131375Smrgstatic void
215620131375Smrgaub_write_bo(drm_intel_bo *bo)
215720131375Smrg{
215820131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
215920131375Smrg	uint32_t offset = 0;
216020131375Smrg	unsigned i;
216120131375Smrg
216220131375Smrg	aub_bo_get_address(bo);
216320131375Smrg
216420131375Smrg	/* Write out each annotated section separately. */
216520131375Smrg	for (i = 0; i < bo_gem->aub_annotation_count; ++i) {
216620131375Smrg		drm_intel_aub_annotation *annotation =
216720131375Smrg			&bo_gem->aub_annotations[i];
216820131375Smrg		uint32_t ending_offset = annotation->ending_offset;
216920131375Smrg		if (ending_offset > bo->size)
217020131375Smrg			ending_offset = bo->size;
217120131375Smrg		if (ending_offset > offset) {
217220131375Smrg			aub_write_large_trace_block(bo, annotation->type,
217320131375Smrg						    annotation->subtype,
217420131375Smrg						    offset,
217520131375Smrg						    ending_offset - offset);
217620131375Smrg			offset = ending_offset;
217720131375Smrg		}
217820131375Smrg	}
217920131375Smrg
218020131375Smrg	/* Write out any remaining unannotated data */
218120131375Smrg	if (offset < bo->size) {
218220131375Smrg		aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
218320131375Smrg					    offset, bo->size - offset);
218420131375Smrg	}
218520131375Smrg}
218620131375Smrg
218720131375Smrg/*
218820131375Smrg * Make a ringbuffer on fly and dump it
218920131375Smrg */
219020131375Smrgstatic void
219120131375Smrgaub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
219220131375Smrg			  uint32_t batch_buffer, int ring_flag)
219320131375Smrg{
219420131375Smrg	uint32_t ringbuffer[4096];
219520131375Smrg	int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
219620131375Smrg	int ring_count = 0;
219720131375Smrg
219820131375Smrg	if (ring_flag == I915_EXEC_BSD)
219920131375Smrg		ring = AUB_TRACE_TYPE_RING_PRB1;
220020131375Smrg	else if (ring_flag == I915_EXEC_BLT)
220120131375Smrg		ring = AUB_TRACE_TYPE_RING_PRB2;
220220131375Smrg
220320131375Smrg	/* Make a ring buffer to execute our batchbuffer. */
220420131375Smrg	memset(ringbuffer, 0, sizeof(ringbuffer));
220520131375Smrg	if (bufmgr_gem->gen >= 8) {
220620131375Smrg		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2);
220720131375Smrg		ringbuffer[ring_count++] = batch_buffer;
220820131375Smrg		ringbuffer[ring_count++] = 0;
220920131375Smrg	} else {
221020131375Smrg		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
221120131375Smrg		ringbuffer[ring_count++] = batch_buffer;
221220131375Smrg	}
221320131375Smrg
221420131375Smrg	/* Write out the ring.  This appears to trigger execution of
221520131375Smrg	 * the ring in the simulator.
221620131375Smrg	 */
221720131375Smrg	aub_out(bufmgr_gem,
221820131375Smrg		CMD_AUB_TRACE_HEADER_BLOCK |
221920131375Smrg		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
222020131375Smrg	aub_out(bufmgr_gem,
222120131375Smrg		AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
222220131375Smrg	aub_out(bufmgr_gem, 0); /* general/surface subtype */
222320131375Smrg	aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
222420131375Smrg	aub_out(bufmgr_gem, ring_count * 4);
222520131375Smrg	if (bufmgr_gem->gen >= 8)
222620131375Smrg		aub_out(bufmgr_gem, 0);
222720131375Smrg
222820131375Smrg	/* FIXME: Need some flush operations here? */
222920131375Smrg	aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
223020131375Smrg
223120131375Smrg	/* Update offset pointer */
223220131375Smrg	bufmgr_gem->aub_offset += 4096;
223320131375Smrg}
223420131375Smrg
2235a884aba1Smrgdrm_public void
223620131375Smrgdrm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
223720131375Smrg			      int x1, int y1, int width, int height,
223820131375Smrg			      enum aub_dump_bmp_format format,
223920131375Smrg			      int pitch, int offset)
224020131375Smrg{
224120131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
224220131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
224320131375Smrg	uint32_t cpp;
224420131375Smrg
224520131375Smrg	switch (format) {
224620131375Smrg	case AUB_DUMP_BMP_FORMAT_8BIT:
224720131375Smrg		cpp = 1;
224820131375Smrg		break;
224920131375Smrg	case AUB_DUMP_BMP_FORMAT_ARGB_4444:
225020131375Smrg		cpp = 2;
225120131375Smrg		break;
225220131375Smrg	case AUB_DUMP_BMP_FORMAT_ARGB_0888:
225320131375Smrg	case AUB_DUMP_BMP_FORMAT_ARGB_8888:
225420131375Smrg		cpp = 4;
225520131375Smrg		break;
225620131375Smrg	default:
225720131375Smrg		printf("Unknown AUB dump format %d\n", format);
225820131375Smrg		return;
225920131375Smrg	}
226020131375Smrg
226120131375Smrg	if (!bufmgr_gem->aub_file)
226220131375Smrg		return;
226320131375Smrg
226420131375Smrg	aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
226520131375Smrg	aub_out(bufmgr_gem, (y1 << 16) | x1);
226620131375Smrg	aub_out(bufmgr_gem,
226720131375Smrg		(format << 24) |
226820131375Smrg		(cpp << 19) |
226920131375Smrg		pitch / 4);
227020131375Smrg	aub_out(bufmgr_gem, (height << 16) | width);
227120131375Smrg	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
227220131375Smrg	aub_out(bufmgr_gem,
227320131375Smrg		((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
227420131375Smrg		((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
227520131375Smrg}
227620131375Smrg
227720131375Smrgstatic void
227820131375Smrgaub_exec(drm_intel_bo *bo, int ring_flag, int used)
227920131375Smrg{
228020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
228120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
228220131375Smrg	int i;
228320131375Smrg	bool batch_buffer_needs_annotations;
228420131375Smrg
228520131375Smrg	if (!bufmgr_gem->aub_file)
228620131375Smrg		return;
228720131375Smrg
228820131375Smrg	/* If batch buffer is not annotated, annotate it the best we
228920131375Smrg	 * can.
229020131375Smrg	 */
229120131375Smrg	batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0;
229220131375Smrg	if (batch_buffer_needs_annotations) {
229320131375Smrg		drm_intel_aub_annotation annotations[2] = {
229420131375Smrg			{ AUB_TRACE_TYPE_BATCH, 0, used },
229520131375Smrg			{ AUB_TRACE_TYPE_NOTYPE, 0, bo->size }
229620131375Smrg		};
229720131375Smrg		drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2);
229820131375Smrg	}
229920131375Smrg
230020131375Smrg	/* Write out all buffers to AUB memory */
230120131375Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
230220131375Smrg		aub_write_bo(bufmgr_gem->exec_bos[i]);
230320131375Smrg	}
230420131375Smrg
230520131375Smrg	/* Remove any annotations we added */
230620131375Smrg	if (batch_buffer_needs_annotations)
230720131375Smrg		drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0);
230820131375Smrg
230920131375Smrg	/* Dump ring buffer */
231020131375Smrg	aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
231120131375Smrg
231220131375Smrg	fflush(bufmgr_gem->aub_file);
231320131375Smrg
231420131375Smrg	/*
231520131375Smrg	 * One frame has been dumped. So reset the aub_offset for the next frame.
231620131375Smrg	 *
231720131375Smrg	 * FIXME: Can we do this?
231820131375Smrg	 */
231920131375Smrg	bufmgr_gem->aub_offset = 0x10000;
232020131375Smrg}
232120131375Smrg
232220131375Smrgstatic int
232320131375Smrgdrm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
232420131375Smrg		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
232520131375Smrg{
232620131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
232720131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
232820131375Smrg	struct drm_i915_gem_execbuffer execbuf;
232920131375Smrg	int ret, i;
233020131375Smrg
233120131375Smrg	if (bo_gem->has_error)
233220131375Smrg		return -ENOMEM;
233320131375Smrg
233420131375Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
233520131375Smrg	/* Update indices and set up the validate list. */
233620131375Smrg	drm_intel_gem_bo_process_reloc(bo);
233720131375Smrg
233820131375Smrg	/* Add the batch buffer to the validation list.  There are no
233920131375Smrg	 * relocations pointing to it.
234020131375Smrg	 */
234120131375Smrg	drm_intel_add_validate_buffer(bo);
234220131375Smrg
234320131375Smrg	VG_CLEAR(execbuf);
234420131375Smrg	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
234520131375Smrg	execbuf.buffer_count = bufmgr_gem->exec_count;
234620131375Smrg	execbuf.batch_start_offset = 0;
234720131375Smrg	execbuf.batch_len = used;
234820131375Smrg	execbuf.cliprects_ptr = (uintptr_t) cliprects;
234920131375Smrg	execbuf.num_cliprects = num_cliprects;
235020131375Smrg	execbuf.DR1 = 0;
235120131375Smrg	execbuf.DR4 = DR4;
235220131375Smrg
235320131375Smrg	ret = drmIoctl(bufmgr_gem->fd,
235420131375Smrg		       DRM_IOCTL_I915_GEM_EXECBUFFER,
235520131375Smrg		       &execbuf);
235620131375Smrg	if (ret != 0) {
235720131375Smrg		ret = -errno;
235820131375Smrg		if (errno == ENOSPC) {
235920131375Smrg			DBG("Execbuffer fails to pin. "
236020131375Smrg			    "Estimate: %u. Actual: %u. Available: %u\n",
236120131375Smrg			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
236220131375Smrg							       bufmgr_gem->
236320131375Smrg							       exec_count),
236420131375Smrg			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
236520131375Smrg							      bufmgr_gem->
236620131375Smrg							      exec_count),
236720131375Smrg			    (unsigned int)bufmgr_gem->gtt_size);
236820131375Smrg		}
236920131375Smrg	}
237020131375Smrg	drm_intel_update_buffer_offsets(bufmgr_gem);
237120131375Smrg
237220131375Smrg	if (bufmgr_gem->bufmgr.debug)
237320131375Smrg		drm_intel_gem_dump_validation_list(bufmgr_gem);
237420131375Smrg
237520131375Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
237620131375Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
237720131375Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
237820131375Smrg
237920131375Smrg		bo_gem->idle = false;
238020131375Smrg
238120131375Smrg		/* Disconnect the buffer from the validate list */
238220131375Smrg		bo_gem->validate_index = -1;
238320131375Smrg		bufmgr_gem->exec_bos[i] = NULL;
238420131375Smrg	}
238520131375Smrg	bufmgr_gem->exec_count = 0;
238620131375Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
238720131375Smrg
238820131375Smrg	return ret;
238920131375Smrg}
239020131375Smrg
239120131375Smrgstatic int
239220131375Smrgdo_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
239320131375Smrg	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
239420131375Smrg	 unsigned int flags)
239520131375Smrg{
239620131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
239720131375Smrg	struct drm_i915_gem_execbuffer2 execbuf;
239820131375Smrg	int ret = 0;
239920131375Smrg	int i;
240020131375Smrg
240120131375Smrg	switch (flags & 0x7) {
240220131375Smrg	default:
240320131375Smrg		return -EINVAL;
240420131375Smrg	case I915_EXEC_BLT:
24059ce4edccSmrg		if (!bufmgr_gem->has_blt)
24069ce4edccSmrg			return -EINVAL;
24079ce4edccSmrg		break;
24089ce4edccSmrg	case I915_EXEC_BSD:
24099ce4edccSmrg		if (!bufmgr_gem->has_bsd)
24109ce4edccSmrg			return -EINVAL;
24119ce4edccSmrg		break;
241220131375Smrg	case I915_EXEC_VEBOX:
241320131375Smrg		if (!bufmgr_gem->has_vebox)
241420131375Smrg			return -EINVAL;
241520131375Smrg		break;
24169ce4edccSmrg	case I915_EXEC_RENDER:
24179ce4edccSmrg	case I915_EXEC_DEFAULT:
24189ce4edccSmrg		break;
24199ce4edccSmrg	}
2420aaba2545Smrg
242122944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
242222944501Smrg	/* Update indices and set up the validate list. */
242322944501Smrg	drm_intel_gem_bo_process_reloc2(bo);
242422944501Smrg
242522944501Smrg	/* Add the batch buffer to the validation list.  There are no relocations
242622944501Smrg	 * pointing to it.
242722944501Smrg	 */
242822944501Smrg	drm_intel_add_validate_buffer2(bo, 0);
242922944501Smrg
243020131375Smrg	VG_CLEAR(execbuf);
243122944501Smrg	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
243222944501Smrg	execbuf.buffer_count = bufmgr_gem->exec_count;
243322944501Smrg	execbuf.batch_start_offset = 0;
243422944501Smrg	execbuf.batch_len = used;
243522944501Smrg	execbuf.cliprects_ptr = (uintptr_t)cliprects;
243622944501Smrg	execbuf.num_cliprects = num_cliprects;
243722944501Smrg	execbuf.DR1 = 0;
243822944501Smrg	execbuf.DR4 = DR4;
243920131375Smrg	execbuf.flags = flags;
244020131375Smrg	if (ctx == NULL)
244120131375Smrg		i915_execbuffer2_set_context_id(execbuf, 0);
244220131375Smrg	else
244320131375Smrg		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
244422944501Smrg	execbuf.rsvd2 = 0;
244522944501Smrg
244620131375Smrg	aub_exec(bo, flags, used);
244720131375Smrg
244820131375Smrg	if (bufmgr_gem->no_exec)
244920131375Smrg		goto skip_execution;
245020131375Smrg
24516d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
24526d98c517Smrg		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
24536d98c517Smrg		       &execbuf);
245422944501Smrg	if (ret != 0) {
245522944501Smrg		ret = -errno;
24566d98c517Smrg		if (ret == -ENOSPC) {
24579ce4edccSmrg			DBG("Execbuffer fails to pin. "
24589ce4edccSmrg			    "Estimate: %u. Actual: %u. Available: %u\n",
24599ce4edccSmrg			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
24609ce4edccSmrg							       bufmgr_gem->exec_count),
24619ce4edccSmrg			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
24629ce4edccSmrg							      bufmgr_gem->exec_count),
24639ce4edccSmrg			    (unsigned int) bufmgr_gem->gtt_size);
246422944501Smrg		}
246522944501Smrg	}
246622944501Smrg	drm_intel_update_buffer_offsets2(bufmgr_gem);
246722944501Smrg
246820131375Smrgskip_execution:
246922944501Smrg	if (bufmgr_gem->bufmgr.debug)
247022944501Smrg		drm_intel_gem_dump_validation_list(bufmgr_gem);
247122944501Smrg
247222944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
247322944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
247422944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
247522944501Smrg
247620131375Smrg		bo_gem->idle = false;
247720131375Smrg
247822944501Smrg		/* Disconnect the buffer from the validate list */
247922944501Smrg		bo_gem->validate_index = -1;
248022944501Smrg		bufmgr_gem->exec_bos[i] = NULL;
248122944501Smrg	}
248222944501Smrg	bufmgr_gem->exec_count = 0;
248322944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
248422944501Smrg
248522944501Smrg	return ret;
248622944501Smrg}
248722944501Smrg
2488aaba2545Smrgstatic int
2489aaba2545Smrgdrm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2490aaba2545Smrg		       drm_clip_rect_t *cliprects, int num_cliprects,
2491aaba2545Smrg		       int DR4)
2492aaba2545Smrg{
249320131375Smrg	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
249420131375Smrg			I915_EXEC_RENDER);
249520131375Smrg}
249620131375Smrg
249720131375Smrgstatic int
249820131375Smrgdrm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
249920131375Smrg			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
250020131375Smrg			unsigned int flags)
250120131375Smrg{
250220131375Smrg	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
250320131375Smrg			flags);
250420131375Smrg}
250520131375Smrg
2506a884aba1Smrgdrm_public int
250720131375Smrgdrm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
250820131375Smrg			      int used, unsigned int flags)
250920131375Smrg{
251020131375Smrg	return do_exec2(bo, used, ctx, NULL, 0, 0, flags);
2511aaba2545Smrg}
2512aaba2545Smrg
251322944501Smrgstatic int
251422944501Smrgdrm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
251522944501Smrg{
251622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
251722944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
251822944501Smrg	struct drm_i915_gem_pin pin;
251922944501Smrg	int ret;
252022944501Smrg
252120131375Smrg	VG_CLEAR(pin);
252222944501Smrg	pin.handle = bo_gem->gem_handle;
252322944501Smrg	pin.alignment = alignment;
252422944501Smrg
25256d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
25266d98c517Smrg		       DRM_IOCTL_I915_GEM_PIN,
25276d98c517Smrg		       &pin);
252822944501Smrg	if (ret != 0)
252922944501Smrg		return -errno;
253022944501Smrg
253120131375Smrg	bo->offset64 = pin.offset;
253222944501Smrg	bo->offset = pin.offset;
253322944501Smrg	return 0;
253422944501Smrg}
253522944501Smrg
253622944501Smrgstatic int
253722944501Smrgdrm_intel_gem_bo_unpin(drm_intel_bo *bo)
253822944501Smrg{
253922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
254022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
254122944501Smrg	struct drm_i915_gem_unpin unpin;
254222944501Smrg	int ret;
254322944501Smrg
254420131375Smrg	VG_CLEAR(unpin);
254522944501Smrg	unpin.handle = bo_gem->gem_handle;
254622944501Smrg
25476d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
254822944501Smrg	if (ret != 0)
254922944501Smrg		return -errno;
255022944501Smrg
255122944501Smrg	return 0;
255222944501Smrg}
255322944501Smrg
255422944501Smrgstatic int
25556d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
25566d98c517Smrg				     uint32_t tiling_mode,
25576d98c517Smrg				     uint32_t stride)
255822944501Smrg{
255922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
256022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
256122944501Smrg	struct drm_i915_gem_set_tiling set_tiling;
256222944501Smrg	int ret;
256322944501Smrg
25646d98c517Smrg	if (bo_gem->global_name == 0 &&
25656d98c517Smrg	    tiling_mode == bo_gem->tiling_mode &&
25666d98c517Smrg	    stride == bo_gem->stride)
256722944501Smrg		return 0;
256822944501Smrg
256922944501Smrg	memset(&set_tiling, 0, sizeof(set_tiling));
257022944501Smrg	do {
25716d98c517Smrg		/* set_tiling is slightly broken and overwrites the
25726d98c517Smrg		 * input on the error path, so we have to open code
25736d98c517Smrg		 * rmIoctl.
25746d98c517Smrg		 */
25756d98c517Smrg		set_tiling.handle = bo_gem->gem_handle;
25766d98c517Smrg		set_tiling.tiling_mode = tiling_mode;
257722944501Smrg		set_tiling.stride = stride;
257822944501Smrg
257922944501Smrg		ret = ioctl(bufmgr_gem->fd,
258022944501Smrg			    DRM_IOCTL_I915_GEM_SET_TILING,
258122944501Smrg			    &set_tiling);
25826d98c517Smrg	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
25836d98c517Smrg	if (ret == -1)
25846d98c517Smrg		return -errno;
25856d98c517Smrg
25866d98c517Smrg	bo_gem->tiling_mode = set_tiling.tiling_mode;
25876d98c517Smrg	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
25886d98c517Smrg	bo_gem->stride = set_tiling.stride;
25896d98c517Smrg	return 0;
25906d98c517Smrg}
25916d98c517Smrg
25926d98c517Smrgstatic int
25936d98c517Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
25946d98c517Smrg			    uint32_t stride)
25956d98c517Smrg{
25966d98c517Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
25976d98c517Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
25986d98c517Smrg	int ret;
25996d98c517Smrg
2600a884aba1Smrg	/* Tiling with userptr surfaces is not supported
2601a884aba1Smrg	 * on all hardware so refuse it for time being.
2602a884aba1Smrg	 */
2603a884aba1Smrg	if (bo_gem->is_userptr)
2604a884aba1Smrg		return -EINVAL;
2605a884aba1Smrg
26066d98c517Smrg	/* Linear buffers have no stride. By ensuring that we only ever use
26076d98c517Smrg	 * stride 0 with linear buffers, we simplify our code.
26086d98c517Smrg	 */
26096d98c517Smrg	if (*tiling_mode == I915_TILING_NONE)
26106d98c517Smrg		stride = 0;
26116d98c517Smrg
26126d98c517Smrg	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
26136d98c517Smrg	if (ret == 0)
2614aaba2545Smrg		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
261522944501Smrg
261622944501Smrg	*tiling_mode = bo_gem->tiling_mode;
2617aaba2545Smrg	return ret;
261822944501Smrg}
261922944501Smrg
262022944501Smrgstatic int
262122944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
262222944501Smrg			    uint32_t * swizzle_mode)
262322944501Smrg{
262422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
262522944501Smrg
262622944501Smrg	*tiling_mode = bo_gem->tiling_mode;
262722944501Smrg	*swizzle_mode = bo_gem->swizzle_mode;
262822944501Smrg	return 0;
262922944501Smrg}
263022944501Smrg
2631a884aba1Smrgdrm_public drm_intel_bo *
263220131375Smrgdrm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
263320131375Smrg{
263420131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
263520131375Smrg	int ret;
263620131375Smrg	uint32_t handle;
263720131375Smrg	drm_intel_bo_gem *bo_gem;
263820131375Smrg	struct drm_i915_gem_get_tiling get_tiling;
263920131375Smrg	drmMMListHead *list;
264020131375Smrg
264120131375Smrg	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
264220131375Smrg
264320131375Smrg	/*
264420131375Smrg	 * See if the kernel has already returned this buffer to us. Just as
264520131375Smrg	 * for named buffers, we must not create two bo's pointing at the same
264620131375Smrg	 * kernel object
264720131375Smrg	 */
2648a884aba1Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
264920131375Smrg	for (list = bufmgr_gem->named.next;
265020131375Smrg	     list != &bufmgr_gem->named;
265120131375Smrg	     list = list->next) {
265220131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
265320131375Smrg		if (bo_gem->gem_handle == handle) {
265420131375Smrg			drm_intel_gem_bo_reference(&bo_gem->bo);
2655a884aba1Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
265620131375Smrg			return &bo_gem->bo;
265720131375Smrg		}
265820131375Smrg	}
265920131375Smrg
266020131375Smrg	if (ret) {
266120131375Smrg	  fprintf(stderr,"ret is %d %d\n", ret, errno);
2662a884aba1Smrg	  pthread_mutex_unlock(&bufmgr_gem->lock);
266320131375Smrg		return NULL;
266420131375Smrg	}
266520131375Smrg
266620131375Smrg	bo_gem = calloc(1, sizeof(*bo_gem));
2667a884aba1Smrg	if (!bo_gem) {
2668a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
266920131375Smrg		return NULL;
2670a884aba1Smrg	}
267120131375Smrg	/* Determine size of bo.  The fd-to-handle ioctl really should
267220131375Smrg	 * return the size, but it doesn't.  If we have kernel 3.12 or
267320131375Smrg	 * later, we can lseek on the prime fd to get the size.  Older
267420131375Smrg	 * kernels will just fail, in which case we fall back to the
267520131375Smrg	 * provided (estimated or guess size). */
267620131375Smrg	ret = lseek(prime_fd, 0, SEEK_END);
267720131375Smrg	if (ret != -1)
267820131375Smrg		bo_gem->bo.size = ret;
267920131375Smrg	else
268020131375Smrg		bo_gem->bo.size = size;
268120131375Smrg
268220131375Smrg	bo_gem->bo.handle = handle;
268320131375Smrg	bo_gem->bo.bufmgr = bufmgr;
268420131375Smrg
268520131375Smrg	bo_gem->gem_handle = handle;
268620131375Smrg
268720131375Smrg	atomic_set(&bo_gem->refcount, 1);
268820131375Smrg
268920131375Smrg	bo_gem->name = "prime";
269020131375Smrg	bo_gem->validate_index = -1;
269120131375Smrg	bo_gem->reloc_tree_fences = 0;
269220131375Smrg	bo_gem->used_as_reloc_target = false;
269320131375Smrg	bo_gem->has_error = false;
269420131375Smrg	bo_gem->reusable = false;
269520131375Smrg
269620131375Smrg	DRMINITLISTHEAD(&bo_gem->vma_list);
269720131375Smrg	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2698a884aba1Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
269920131375Smrg
270020131375Smrg	VG_CLEAR(get_tiling);
270120131375Smrg	get_tiling.handle = bo_gem->gem_handle;
270220131375Smrg	ret = drmIoctl(bufmgr_gem->fd,
270320131375Smrg		       DRM_IOCTL_I915_GEM_GET_TILING,
270420131375Smrg		       &get_tiling);
270520131375Smrg	if (ret != 0) {
270620131375Smrg		drm_intel_gem_bo_unreference(&bo_gem->bo);
270720131375Smrg		return NULL;
270820131375Smrg	}
270920131375Smrg	bo_gem->tiling_mode = get_tiling.tiling_mode;
271020131375Smrg	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
271120131375Smrg	/* XXX stride is unknown */
271220131375Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
271320131375Smrg
271420131375Smrg	return &bo_gem->bo;
271520131375Smrg}
271620131375Smrg
2717a884aba1Smrgdrm_public int
271820131375Smrgdrm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
271920131375Smrg{
272020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
272120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
272220131375Smrg
2723a884aba1Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
272420131375Smrg        if (DRMLISTEMPTY(&bo_gem->name_list))
272520131375Smrg                DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2726a884aba1Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
272720131375Smrg
272820131375Smrg	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
272920131375Smrg			       DRM_CLOEXEC, prime_fd) != 0)
273020131375Smrg		return -errno;
273120131375Smrg
273220131375Smrg	bo_gem->reusable = false;
273320131375Smrg
273420131375Smrg	return 0;
273520131375Smrg}
273620131375Smrg
273722944501Smrgstatic int
273822944501Smrgdrm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
273922944501Smrg{
274022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
274122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
274222944501Smrg	int ret;
274322944501Smrg
274422944501Smrg	if (!bo_gem->global_name) {
274520131375Smrg		struct drm_gem_flink flink;
274620131375Smrg
274720131375Smrg		VG_CLEAR(flink);
274822944501Smrg		flink.handle = bo_gem->gem_handle;
274922944501Smrg
2750a884aba1Smrg		pthread_mutex_lock(&bufmgr_gem->lock);
2751a884aba1Smrg
27526d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
2753a884aba1Smrg		if (ret != 0) {
2754a884aba1Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
275522944501Smrg			return -errno;
2756a884aba1Smrg		}
275720131375Smrg
275822944501Smrg		bo_gem->global_name = flink.name;
275920131375Smrg		bo_gem->reusable = false;
276020131375Smrg
276120131375Smrg                if (DRMLISTEMPTY(&bo_gem->name_list))
276220131375Smrg                        DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2763a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
276422944501Smrg	}
276522944501Smrg
276622944501Smrg	*name = bo_gem->global_name;
276722944501Smrg	return 0;
276822944501Smrg}
276922944501Smrg
277022944501Smrg/**
277122944501Smrg * Enables unlimited caching of buffer objects for reuse.
277222944501Smrg *
277322944501Smrg * This is potentially very memory expensive, as the cache at each bucket
277422944501Smrg * size is only bounded by how many buffers of that size we've managed to have
277522944501Smrg * in flight at once.
277622944501Smrg */
2777a884aba1Smrgdrm_public void
277822944501Smrgdrm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
277922944501Smrg{
278022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
278122944501Smrg
278220131375Smrg	bufmgr_gem->bo_reuse = true;
278322944501Smrg}
278422944501Smrg
278522944501Smrg/**
278622944501Smrg * Enable use of fenced reloc type.
278722944501Smrg *
278822944501Smrg * New code should enable this to avoid unnecessary fence register
278922944501Smrg * allocation.  If this option is not enabled, all relocs will have fence
279022944501Smrg * register allocated.
279122944501Smrg */
2792a884aba1Smrgdrm_public void
279322944501Smrgdrm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
279422944501Smrg{
279522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
279622944501Smrg
279722944501Smrg	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
279820131375Smrg		bufmgr_gem->fenced_relocs = true;
279922944501Smrg}
280022944501Smrg
280122944501Smrg/**
280222944501Smrg * Return the additional aperture space required by the tree of buffer objects
280322944501Smrg * rooted at bo.
280422944501Smrg */
280522944501Smrgstatic int
280622944501Smrgdrm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
280722944501Smrg{
280822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
280922944501Smrg	int i;
281022944501Smrg	int total = 0;
281122944501Smrg
281222944501Smrg	if (bo == NULL || bo_gem->included_in_check_aperture)
281322944501Smrg		return 0;
281422944501Smrg
281522944501Smrg	total += bo->size;
281620131375Smrg	bo_gem->included_in_check_aperture = true;
281722944501Smrg
281822944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++)
281922944501Smrg		total +=
282022944501Smrg		    drm_intel_gem_bo_get_aperture_space(bo_gem->
282122944501Smrg							reloc_target_info[i].bo);
282222944501Smrg
282322944501Smrg	return total;
282422944501Smrg}
282522944501Smrg
282622944501Smrg/**
282722944501Smrg * Count the number of buffers in this list that need a fence reg
282822944501Smrg *
282922944501Smrg * If the count is greater than the number of available regs, we'll have
283022944501Smrg * to ask the caller to resubmit a batch with fewer tiled buffers.
283122944501Smrg *
283222944501Smrg * This function over-counts if the same buffer is used multiple times.
283322944501Smrg */
283422944501Smrgstatic unsigned int
283522944501Smrgdrm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
283622944501Smrg{
283722944501Smrg	int i;
283822944501Smrg	unsigned int total = 0;
283922944501Smrg
284022944501Smrg	for (i = 0; i < count; i++) {
284122944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
284222944501Smrg
284322944501Smrg		if (bo_gem == NULL)
284422944501Smrg			continue;
284522944501Smrg
284622944501Smrg		total += bo_gem->reloc_tree_fences;
284722944501Smrg	}
284822944501Smrg	return total;
284922944501Smrg}
285022944501Smrg
285122944501Smrg/**
285222944501Smrg * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
285322944501Smrg * for the next drm_intel_bufmgr_check_aperture_space() call.
285422944501Smrg */
285522944501Smrgstatic void
285622944501Smrgdrm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
285722944501Smrg{
285822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
285922944501Smrg	int i;
286022944501Smrg
286122944501Smrg	if (bo == NULL || !bo_gem->included_in_check_aperture)
286222944501Smrg		return;
286322944501Smrg
286420131375Smrg	bo_gem->included_in_check_aperture = false;
286522944501Smrg
286622944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++)
286722944501Smrg		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
286822944501Smrg							   reloc_target_info[i].bo);
286922944501Smrg}
287022944501Smrg
287122944501Smrg/**
287222944501Smrg * Return a conservative estimate for the amount of aperture required
287322944501Smrg * for a collection of buffers. This may double-count some buffers.
287422944501Smrg */
287522944501Smrgstatic unsigned int
287622944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
287722944501Smrg{
287822944501Smrg	int i;
287922944501Smrg	unsigned int total = 0;
288022944501Smrg
288122944501Smrg	for (i = 0; i < count; i++) {
288222944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
288322944501Smrg		if (bo_gem != NULL)
288422944501Smrg			total += bo_gem->reloc_tree_size;
288522944501Smrg	}
288622944501Smrg	return total;
288722944501Smrg}
288822944501Smrg
288922944501Smrg/**
289022944501Smrg * Return the amount of aperture needed for a collection of buffers.
289122944501Smrg * This avoids double counting any buffers, at the cost of looking
289222944501Smrg * at every buffer in the set.
289322944501Smrg */
289422944501Smrgstatic unsigned int
289522944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
289622944501Smrg{
289722944501Smrg	int i;
289822944501Smrg	unsigned int total = 0;
289922944501Smrg
290022944501Smrg	for (i = 0; i < count; i++) {
290122944501Smrg		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
290222944501Smrg		/* For the first buffer object in the array, we get an
290322944501Smrg		 * accurate count back for its reloc_tree size (since nothing
290422944501Smrg		 * had been flagged as being counted yet).  We can save that
290522944501Smrg		 * value out as a more conservative reloc_tree_size that
290622944501Smrg		 * avoids double-counting target buffers.  Since the first
290722944501Smrg		 * buffer happens to usually be the batch buffer in our
290822944501Smrg		 * callers, this can pull us back from doing the tree
290922944501Smrg		 * walk on every new batch emit.
291022944501Smrg		 */
291122944501Smrg		if (i == 0) {
291222944501Smrg			drm_intel_bo_gem *bo_gem =
291322944501Smrg			    (drm_intel_bo_gem *) bo_array[i];
291422944501Smrg			bo_gem->reloc_tree_size = total;
291522944501Smrg		}
291622944501Smrg	}
291722944501Smrg
291822944501Smrg	for (i = 0; i < count; i++)
291922944501Smrg		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
292022944501Smrg	return total;
292122944501Smrg}
292222944501Smrg
292322944501Smrg/**
292422944501Smrg * Return -1 if the batchbuffer should be flushed before attempting to
292522944501Smrg * emit rendering referencing the buffers pointed to by bo_array.
292622944501Smrg *
292722944501Smrg * This is required because if we try to emit a batchbuffer with relocations
292822944501Smrg * to a tree of buffers that won't simultaneously fit in the aperture,
292922944501Smrg * the rendering will return an error at a point where the software is not
293022944501Smrg * prepared to recover from it.
293122944501Smrg *
293222944501Smrg * However, we also want to emit the batchbuffer significantly before we reach
293322944501Smrg * the limit, as a series of batchbuffers each of which references buffers
293422944501Smrg * covering almost all of the aperture means that at each emit we end up
293522944501Smrg * waiting to evict a buffer from the last rendering, and we get synchronous
293622944501Smrg * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
293722944501Smrg * get better parallelism.
293822944501Smrg */
293922944501Smrgstatic int
294022944501Smrgdrm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
294122944501Smrg{
294222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem =
294322944501Smrg	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
294422944501Smrg	unsigned int total = 0;
294522944501Smrg	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
294622944501Smrg	int total_fences;
294722944501Smrg
294822944501Smrg	/* Check for fence reg constraints if necessary */
294922944501Smrg	if (bufmgr_gem->available_fences) {
295022944501Smrg		total_fences = drm_intel_gem_total_fences(bo_array, count);
295122944501Smrg		if (total_fences > bufmgr_gem->available_fences)
295222944501Smrg			return -ENOSPC;
295322944501Smrg	}
295422944501Smrg
295522944501Smrg	total = drm_intel_gem_estimate_batch_space(bo_array, count);
295622944501Smrg
295722944501Smrg	if (total > threshold)
295822944501Smrg		total = drm_intel_gem_compute_batch_space(bo_array, count);
295922944501Smrg
296022944501Smrg	if (total > threshold) {
296122944501Smrg		DBG("check_space: overflowed available aperture, "
296222944501Smrg		    "%dkb vs %dkb\n",
296322944501Smrg		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
296422944501Smrg		return -ENOSPC;
296522944501Smrg	} else {
296622944501Smrg		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
296722944501Smrg		    (int)bufmgr_gem->gtt_size / 1024);
296822944501Smrg		return 0;
296922944501Smrg	}
297022944501Smrg}
297122944501Smrg
297222944501Smrg/*
297322944501Smrg * Disable buffer reuse for objects which are shared with the kernel
297422944501Smrg * as scanout buffers
297522944501Smrg */
297622944501Smrgstatic int
297722944501Smrgdrm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
297822944501Smrg{
297922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
298022944501Smrg
298120131375Smrg	bo_gem->reusable = false;
298222944501Smrg	return 0;
298322944501Smrg}
298422944501Smrg
2985aaba2545Smrgstatic int
2986aaba2545Smrgdrm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
2987aaba2545Smrg{
2988aaba2545Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2989aaba2545Smrg
2990aaba2545Smrg	return bo_gem->reusable;
2991aaba2545Smrg}
2992aaba2545Smrg
299322944501Smrgstatic int
299422944501Smrg_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
299522944501Smrg{
299622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
299722944501Smrg	int i;
299822944501Smrg
299922944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
300022944501Smrg		if (bo_gem->reloc_target_info[i].bo == target_bo)
300122944501Smrg			return 1;
3002aaba2545Smrg		if (bo == bo_gem->reloc_target_info[i].bo)
3003aaba2545Smrg			continue;
300422944501Smrg		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
300522944501Smrg						target_bo))
300622944501Smrg			return 1;
300722944501Smrg	}
300822944501Smrg
300922944501Smrg	return 0;
301022944501Smrg}
301122944501Smrg
301222944501Smrg/** Return true if target_bo is referenced by bo's relocation tree. */
301322944501Smrgstatic int
301422944501Smrgdrm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
301522944501Smrg{
301622944501Smrg	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
301722944501Smrg
301822944501Smrg	if (bo == NULL || target_bo == NULL)
301922944501Smrg		return 0;
302022944501Smrg	if (target_bo_gem->used_as_reloc_target)
302122944501Smrg		return _drm_intel_gem_bo_references(bo, target_bo);
302222944501Smrg	return 0;
302322944501Smrg}
302422944501Smrg
3025aaba2545Smrgstatic void
3026aaba2545Smrgadd_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
3027aaba2545Smrg{
3028aaba2545Smrg	unsigned int i = bufmgr_gem->num_buckets;
3029aaba2545Smrg
3030aaba2545Smrg	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3031aaba2545Smrg
3032aaba2545Smrg	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3033aaba2545Smrg	bufmgr_gem->cache_bucket[i].size = size;
3034aaba2545Smrg	bufmgr_gem->num_buckets++;
3035aaba2545Smrg}
3036aaba2545Smrg
3037aaba2545Smrgstatic void
3038aaba2545Smrginit_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
3039aaba2545Smrg{
3040aaba2545Smrg	unsigned long size, cache_max_size = 64 * 1024 * 1024;
3041aaba2545Smrg
3042aaba2545Smrg	/* OK, so power of two buckets was too wasteful of memory.
3043aaba2545Smrg	 * Give 3 other sizes between each power of two, to hopefully
3044aaba2545Smrg	 * cover things accurately enough.  (The alternative is
3045aaba2545Smrg	 * probably to just go for exact matching of sizes, and assume
3046aaba2545Smrg	 * that for things like composited window resize the tiled
3047aaba2545Smrg	 * width/height alignment and rounding of sizes to pages will
3048aaba2545Smrg	 * get us useful cache hit rates anyway)
3049aaba2545Smrg	 */
3050aaba2545Smrg	add_bucket(bufmgr_gem, 4096);
3051aaba2545Smrg	add_bucket(bufmgr_gem, 4096 * 2);
3052aaba2545Smrg	add_bucket(bufmgr_gem, 4096 * 3);
3053aaba2545Smrg
3054aaba2545Smrg	/* Initialize the linked lists for BO reuse cache. */
3055aaba2545Smrg	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3056aaba2545Smrg		add_bucket(bufmgr_gem, size);
3057aaba2545Smrg
3058aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 1 / 4);
3059aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 2 / 4);
3060aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 3 / 4);
3061aaba2545Smrg	}
3062aaba2545Smrg}
3063aaba2545Smrg
3064a884aba1Smrgdrm_public void
306520131375Smrgdrm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
306620131375Smrg{
306720131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
306820131375Smrg
306920131375Smrg	bufmgr_gem->vma_max = limit;
307020131375Smrg
307120131375Smrg	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
307220131375Smrg}
307320131375Smrg
307420131375Smrg/**
307520131375Smrg * Get the PCI ID for the device.  This can be overridden by setting the
307620131375Smrg * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
307720131375Smrg */
307820131375Smrgstatic int
307920131375Smrgget_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
308020131375Smrg{
308120131375Smrg	char *devid_override;
308220131375Smrg	int devid;
308320131375Smrg	int ret;
308420131375Smrg	drm_i915_getparam_t gp;
308520131375Smrg
308620131375Smrg	if (geteuid() == getuid()) {
308720131375Smrg		devid_override = getenv("INTEL_DEVID_OVERRIDE");
308820131375Smrg		if (devid_override) {
308920131375Smrg			bufmgr_gem->no_exec = true;
309020131375Smrg			return strtod(devid_override, NULL);
309120131375Smrg		}
309220131375Smrg	}
309320131375Smrg
309420131375Smrg	VG_CLEAR(devid);
309520131375Smrg	VG_CLEAR(gp);
309620131375Smrg	gp.param = I915_PARAM_CHIPSET_ID;
309720131375Smrg	gp.value = &devid;
309820131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
309920131375Smrg	if (ret) {
310020131375Smrg		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
310120131375Smrg		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
310220131375Smrg	}
310320131375Smrg	return devid;
310420131375Smrg}
310520131375Smrg
3106a884aba1Smrgdrm_public int
310720131375Smrgdrm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
310820131375Smrg{
310920131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
311020131375Smrg
311120131375Smrg	return bufmgr_gem->pci_device;
311220131375Smrg}
311320131375Smrg
311420131375Smrg/**
311520131375Smrg * Sets the AUB filename.
311620131375Smrg *
311720131375Smrg * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
311820131375Smrg * for it to have any effect.
311920131375Smrg */
3120a884aba1Smrgdrm_public void
312120131375Smrgdrm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
312220131375Smrg				      const char *filename)
312320131375Smrg{
312420131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
312520131375Smrg
312620131375Smrg	free(bufmgr_gem->aub_filename);
312720131375Smrg	if (filename)
312820131375Smrg		bufmgr_gem->aub_filename = strdup(filename);
312920131375Smrg}
313020131375Smrg
313120131375Smrg/**
313220131375Smrg * Sets up AUB dumping.
313320131375Smrg *
313420131375Smrg * This is a trace file format that can be used with the simulator.
313520131375Smrg * Packets are emitted in a format somewhat like GPU command packets.
313620131375Smrg * You can set up a GTT and upload your objects into the referenced
313720131375Smrg * space, then send off batchbuffers and get BMPs out the other end.
313820131375Smrg */
3139a884aba1Smrgdrm_public void
314020131375Smrgdrm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
314120131375Smrg{
314220131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
314320131375Smrg	int entry = 0x200003;
314420131375Smrg	int i;
314520131375Smrg	int gtt_size = 0x10000;
314620131375Smrg	const char *filename;
314720131375Smrg
314820131375Smrg	if (!enable) {
314920131375Smrg		if (bufmgr_gem->aub_file) {
315020131375Smrg			fclose(bufmgr_gem->aub_file);
315120131375Smrg			bufmgr_gem->aub_file = NULL;
315220131375Smrg		}
315320131375Smrg		return;
315420131375Smrg	}
315520131375Smrg
315620131375Smrg	if (geteuid() != getuid())
315720131375Smrg		return;
315820131375Smrg
315920131375Smrg	if (bufmgr_gem->aub_filename)
316020131375Smrg		filename = bufmgr_gem->aub_filename;
316120131375Smrg	else
316220131375Smrg		filename = "intel.aub";
316320131375Smrg	bufmgr_gem->aub_file = fopen(filename, "w+");
316420131375Smrg	if (!bufmgr_gem->aub_file)
316520131375Smrg		return;
316620131375Smrg
316720131375Smrg	/* Start allocating objects from just after the GTT. */
316820131375Smrg	bufmgr_gem->aub_offset = gtt_size;
316920131375Smrg
317020131375Smrg	/* Start with a (required) version packet. */
317120131375Smrg	aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
317220131375Smrg	aub_out(bufmgr_gem,
317320131375Smrg		(4 << AUB_HEADER_MAJOR_SHIFT) |
317420131375Smrg		(0 << AUB_HEADER_MINOR_SHIFT));
317520131375Smrg	for (i = 0; i < 8; i++) {
317620131375Smrg		aub_out(bufmgr_gem, 0); /* app name */
317720131375Smrg	}
317820131375Smrg	aub_out(bufmgr_gem, 0); /* timestamp */
317920131375Smrg	aub_out(bufmgr_gem, 0); /* timestamp */
318020131375Smrg	aub_out(bufmgr_gem, 0); /* comment len */
318120131375Smrg
318220131375Smrg	/* Set up the GTT. The max we can handle is 256M */
318320131375Smrg	aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
31843c748557Ssnj	/* Need to use GTT_ENTRY type for recent emulator */
31853c748557Ssnj	aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT_ENTRY | 0 | AUB_TRACE_OP_DATA_WRITE);
318620131375Smrg	aub_out(bufmgr_gem, 0); /* subtype */
318720131375Smrg	aub_out(bufmgr_gem, 0); /* offset */
318820131375Smrg	aub_out(bufmgr_gem, gtt_size); /* size */
318920131375Smrg	if (bufmgr_gem->gen >= 8)
319020131375Smrg		aub_out(bufmgr_gem, 0);
319120131375Smrg	for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
319220131375Smrg		aub_out(bufmgr_gem, entry);
319320131375Smrg	}
319420131375Smrg}
319520131375Smrg
3196a884aba1Smrgdrm_public drm_intel_context *
319720131375Smrgdrm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
319820131375Smrg{
319920131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
320020131375Smrg	struct drm_i915_gem_context_create create;
320120131375Smrg	drm_intel_context *context = NULL;
320220131375Smrg	int ret;
320320131375Smrg
320420131375Smrg	context = calloc(1, sizeof(*context));
320520131375Smrg	if (!context)
320620131375Smrg		return NULL;
320720131375Smrg
320820131375Smrg	VG_CLEAR(create);
320920131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
321020131375Smrg	if (ret != 0) {
321120131375Smrg		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
321220131375Smrg		    strerror(errno));
321320131375Smrg		free(context);
321420131375Smrg		return NULL;
321520131375Smrg	}
321620131375Smrg
321720131375Smrg	context->ctx_id = create.ctx_id;
321820131375Smrg	context->bufmgr = bufmgr;
321920131375Smrg
322020131375Smrg	return context;
322120131375Smrg}
322220131375Smrg
3223a884aba1Smrgdrm_public void
322420131375Smrgdrm_intel_gem_context_destroy(drm_intel_context *ctx)
322520131375Smrg{
322620131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
322720131375Smrg	struct drm_i915_gem_context_destroy destroy;
322820131375Smrg	int ret;
322920131375Smrg
323020131375Smrg	if (ctx == NULL)
323120131375Smrg		return;
323220131375Smrg
323320131375Smrg	VG_CLEAR(destroy);
323420131375Smrg
323520131375Smrg	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
323620131375Smrg	destroy.ctx_id = ctx->ctx_id;
323720131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
323820131375Smrg		       &destroy);
323920131375Smrg	if (ret != 0)
324020131375Smrg		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
324120131375Smrg			strerror(errno));
324220131375Smrg
324320131375Smrg	free(ctx);
324420131375Smrg}
324520131375Smrg
3246a884aba1Smrgdrm_public int
324720131375Smrgdrm_intel_get_reset_stats(drm_intel_context *ctx,
324820131375Smrg			  uint32_t *reset_count,
324920131375Smrg			  uint32_t *active,
325020131375Smrg			  uint32_t *pending)
325120131375Smrg{
325220131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
325320131375Smrg	struct drm_i915_reset_stats stats;
325420131375Smrg	int ret;
325520131375Smrg
325620131375Smrg	if (ctx == NULL)
325720131375Smrg		return -EINVAL;
325820131375Smrg
325920131375Smrg	memset(&stats, 0, sizeof(stats));
326020131375Smrg
326120131375Smrg	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
326220131375Smrg	stats.ctx_id = ctx->ctx_id;
326320131375Smrg	ret = drmIoctl(bufmgr_gem->fd,
326420131375Smrg		       DRM_IOCTL_I915_GET_RESET_STATS,
326520131375Smrg		       &stats);
326620131375Smrg	if (ret == 0) {
326720131375Smrg		if (reset_count != NULL)
326820131375Smrg			*reset_count = stats.reset_count;
326920131375Smrg
327020131375Smrg		if (active != NULL)
327120131375Smrg			*active = stats.batch_active;
327220131375Smrg
327320131375Smrg		if (pending != NULL)
327420131375Smrg			*pending = stats.batch_pending;
327520131375Smrg	}
327620131375Smrg
327720131375Smrg	return ret;
327820131375Smrg}
327920131375Smrg
3280a884aba1Smrgdrm_public int
328120131375Smrgdrm_intel_reg_read(drm_intel_bufmgr *bufmgr,
328220131375Smrg		   uint32_t offset,
328320131375Smrg		   uint64_t *result)
328420131375Smrg{
328520131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
328620131375Smrg	struct drm_i915_reg_read reg_read;
328720131375Smrg	int ret;
328820131375Smrg
328920131375Smrg	VG_CLEAR(reg_read);
329020131375Smrg	reg_read.offset = offset;
329120131375Smrg
329220131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
329320131375Smrg
329420131375Smrg	*result = reg_read.val;
329520131375Smrg	return ret;
329620131375Smrg}
329720131375Smrg
329820131375Smrg
329920131375Smrg/**
330020131375Smrg * Annotate the given bo for use in aub dumping.
330120131375Smrg *
330220131375Smrg * \param annotations is an array of drm_intel_aub_annotation objects
330320131375Smrg * describing the type of data in various sections of the bo.  Each
330420131375Smrg * element of the array specifies the type and subtype of a section of
330520131375Smrg * the bo, and the past-the-end offset of that section.  The elements
330620131375Smrg * of \c annotations must be sorted so that ending_offset is
330720131375Smrg * increasing.
330820131375Smrg *
330920131375Smrg * \param count is the number of elements in the \c annotations array.
331020131375Smrg * If \c count is zero, then \c annotations will not be dereferenced.
331120131375Smrg *
331220131375Smrg * Annotations are copied into a private data structure, so caller may
331320131375Smrg * re-use the memory pointed to by \c annotations after the call
331420131375Smrg * returns.
331520131375Smrg *
331620131375Smrg * Annotations are stored for the lifetime of the bo; to reset to the
331720131375Smrg * default state (no annotations), call this function with a \c count
331820131375Smrg * of zero.
331920131375Smrg */
3320a884aba1Smrgdrm_public void
332120131375Smrgdrm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
332220131375Smrg					 drm_intel_aub_annotation *annotations,
332320131375Smrg					 unsigned count)
332420131375Smrg{
332520131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
332620131375Smrg	unsigned size = sizeof(*annotations) * count;
332720131375Smrg	drm_intel_aub_annotation *new_annotations =
332820131375Smrg		count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL;
332920131375Smrg	if (new_annotations == NULL) {
333020131375Smrg		free(bo_gem->aub_annotations);
333120131375Smrg		bo_gem->aub_annotations = NULL;
333220131375Smrg		bo_gem->aub_annotation_count = 0;
333320131375Smrg		return;
333420131375Smrg	}
333520131375Smrg	memcpy(new_annotations, annotations, size);
333620131375Smrg	bo_gem->aub_annotations = new_annotations;
333720131375Smrg	bo_gem->aub_annotation_count = count;
333820131375Smrg}
333920131375Smrg
3340a884aba1Smrgstatic pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3341a884aba1Smrgstatic drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3342a884aba1Smrg
3343a884aba1Smrgstatic drm_intel_bufmgr_gem *
3344a884aba1Smrgdrm_intel_bufmgr_gem_find(int fd)
3345a884aba1Smrg{
3346a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
3347a884aba1Smrg
3348a884aba1Smrg	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3349a884aba1Smrg		if (bufmgr_gem->fd == fd) {
3350a884aba1Smrg			atomic_inc(&bufmgr_gem->refcount);
3351a884aba1Smrg			return bufmgr_gem;
3352a884aba1Smrg		}
3353a884aba1Smrg	}
3354a884aba1Smrg
3355a884aba1Smrg	return NULL;
3356a884aba1Smrg}
3357a884aba1Smrg
3358a884aba1Smrgstatic void
3359a884aba1Smrgdrm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
3360a884aba1Smrg{
3361a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3362a884aba1Smrg
3363a884aba1Smrg	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3364a884aba1Smrg		pthread_mutex_lock(&bufmgr_list_mutex);
3365a884aba1Smrg
3366a884aba1Smrg		if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
3367a884aba1Smrg			DRMLISTDEL(&bufmgr_gem->managers);
3368a884aba1Smrg			drm_intel_bufmgr_gem_destroy(bufmgr);
3369a884aba1Smrg		}
3370a884aba1Smrg
3371a884aba1Smrg		pthread_mutex_unlock(&bufmgr_list_mutex);
3372a884aba1Smrg	}
3373a884aba1Smrg}
3374a884aba1Smrg
3375a884aba1Smrgstatic bool
3376a884aba1Smrghas_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
3377a884aba1Smrg{
3378a884aba1Smrg	int ret;
3379a884aba1Smrg	void *ptr;
3380a884aba1Smrg	long pgsz;
3381a884aba1Smrg	struct drm_i915_gem_userptr userptr;
3382a884aba1Smrg	struct drm_gem_close close_bo;
3383a884aba1Smrg
3384a884aba1Smrg	pgsz = sysconf(_SC_PAGESIZE);
3385a884aba1Smrg	assert(pgsz > 0);
3386a884aba1Smrg
3387a884aba1Smrg	ret = posix_memalign(&ptr, pgsz, pgsz);
3388a884aba1Smrg	if (ret) {
3389a884aba1Smrg		DBG("Failed to get a page (%ld) for userptr detection!\n",
3390a884aba1Smrg			pgsz);
3391a884aba1Smrg		return false;
3392a884aba1Smrg	}
3393a884aba1Smrg
3394a884aba1Smrg	memset(&userptr, 0, sizeof(userptr));
3395a884aba1Smrg	userptr.user_ptr = (__u64)(unsigned long)ptr;
3396a884aba1Smrg	userptr.user_size = pgsz;
3397a884aba1Smrg
3398a884aba1Smrgretry:
3399a884aba1Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
3400a884aba1Smrg	if (ret) {
3401a884aba1Smrg		if (errno == ENODEV && userptr.flags == 0) {
3402a884aba1Smrg			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
3403a884aba1Smrg			goto retry;
3404a884aba1Smrg		}
3405a884aba1Smrg		free(ptr);
3406a884aba1Smrg		return false;
3407a884aba1Smrg	}
3408a884aba1Smrg
3409a884aba1Smrg	close_bo.handle = userptr.handle;
3410a884aba1Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
3411a884aba1Smrg	free(ptr);
3412a884aba1Smrg	if (ret) {
3413a884aba1Smrg		fprintf(stderr, "Failed to release test userptr object! (%d) "
3414a884aba1Smrg				"i915 kernel driver may not be sane!\n", errno);
3415a884aba1Smrg		return false;
3416a884aba1Smrg	}
3417a884aba1Smrg
3418a884aba1Smrg	return true;
3419a884aba1Smrg}
3420a884aba1Smrg
342122944501Smrg/**
342222944501Smrg * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
342322944501Smrg * and manage map buffer objections.
342422944501Smrg *
342522944501Smrg * \param fd File descriptor of the opened DRM device.
342622944501Smrg */
3427a884aba1Smrgdrm_public drm_intel_bufmgr *
342822944501Smrgdrm_intel_bufmgr_gem_init(int fd, int batch_size)
342922944501Smrg{
343022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
343122944501Smrg	struct drm_i915_gem_get_aperture aperture;
343222944501Smrg	drm_i915_getparam_t gp;
343320131375Smrg	int ret, tmp;
343420131375Smrg	bool exec2 = false;
343522944501Smrg
3436a884aba1Smrg	pthread_mutex_lock(&bufmgr_list_mutex);
3437a884aba1Smrg
3438a884aba1Smrg	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
3439a884aba1Smrg	if (bufmgr_gem)
3440a884aba1Smrg		goto exit;
3441a884aba1Smrg
344222944501Smrg	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
344322944501Smrg	if (bufmgr_gem == NULL)
3444a884aba1Smrg		goto exit;
344522944501Smrg
344622944501Smrg	bufmgr_gem->fd = fd;
3447a884aba1Smrg	atomic_set(&bufmgr_gem->refcount, 1);
344822944501Smrg
344922944501Smrg	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
345022944501Smrg		free(bufmgr_gem);
3451a884aba1Smrg		bufmgr_gem = NULL;
3452a884aba1Smrg		goto exit;
345322944501Smrg	}
345422944501Smrg
34556d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
34566d98c517Smrg		       DRM_IOCTL_I915_GEM_GET_APERTURE,
34576d98c517Smrg		       &aperture);
345822944501Smrg
345922944501Smrg	if (ret == 0)
346022944501Smrg		bufmgr_gem->gtt_size = aperture.aper_available_size;
346122944501Smrg	else {
346222944501Smrg		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
346322944501Smrg			strerror(errno));
346422944501Smrg		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
346522944501Smrg		fprintf(stderr, "Assuming %dkB available aperture size.\n"
346622944501Smrg			"May lead to reduced performance or incorrect "
346722944501Smrg			"rendering.\n",
346822944501Smrg			(int)bufmgr_gem->gtt_size / 1024);
346922944501Smrg	}
347022944501Smrg
347120131375Smrg	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
347222944501Smrg
347320131375Smrg	if (IS_GEN2(bufmgr_gem->pci_device))
347422944501Smrg		bufmgr_gem->gen = 2;
347520131375Smrg	else if (IS_GEN3(bufmgr_gem->pci_device))
347622944501Smrg		bufmgr_gem->gen = 3;
347720131375Smrg	else if (IS_GEN4(bufmgr_gem->pci_device))
347822944501Smrg		bufmgr_gem->gen = 4;
347920131375Smrg	else if (IS_GEN5(bufmgr_gem->pci_device))
348020131375Smrg		bufmgr_gem->gen = 5;
348120131375Smrg	else if (IS_GEN6(bufmgr_gem->pci_device))
348222944501Smrg		bufmgr_gem->gen = 6;
348320131375Smrg	else if (IS_GEN7(bufmgr_gem->pci_device))
348420131375Smrg		bufmgr_gem->gen = 7;
348520131375Smrg	else if (IS_GEN8(bufmgr_gem->pci_device))
348620131375Smrg		bufmgr_gem->gen = 8;
34873c748557Ssnj	else if (IS_GEN9(bufmgr_gem->pci_device))
34883c748557Ssnj		bufmgr_gem->gen = 9;
348920131375Smrg	else {
349020131375Smrg		free(bufmgr_gem);
3491a884aba1Smrg		bufmgr_gem = NULL;
3492a884aba1Smrg		goto exit;
349320131375Smrg	}
349420131375Smrg
349520131375Smrg	if (IS_GEN3(bufmgr_gem->pci_device) &&
349620131375Smrg	    bufmgr_gem->gtt_size > 256*1024*1024) {
349720131375Smrg		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
349820131375Smrg		 * be used for tiled blits. To simplify the accounting, just
349920131375Smrg		 * substract the unmappable part (fixed to 256MB on all known
350020131375Smrg		 * gen3 devices) if the kernel advertises it. */
350120131375Smrg		bufmgr_gem->gtt_size -= 256*1024*1024;
350220131375Smrg	}
350320131375Smrg
350420131375Smrg	VG_CLEAR(gp);
350520131375Smrg	gp.value = &tmp;
350622944501Smrg
350722944501Smrg	gp.param = I915_PARAM_HAS_EXECBUF2;
35086d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
350922944501Smrg	if (!ret)
351020131375Smrg		exec2 = true;
351122944501Smrg
3512aaba2545Smrg	gp.param = I915_PARAM_HAS_BSD;
35136d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
35149ce4edccSmrg	bufmgr_gem->has_bsd = ret == 0;
35159ce4edccSmrg
35169ce4edccSmrg	gp.param = I915_PARAM_HAS_BLT;
35179ce4edccSmrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
35189ce4edccSmrg	bufmgr_gem->has_blt = ret == 0;
35199ce4edccSmrg
35209ce4edccSmrg	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
35219ce4edccSmrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
35229ce4edccSmrg	bufmgr_gem->has_relaxed_fencing = ret == 0;
3523aaba2545Smrg
3524a884aba1Smrg	if (has_userptr(bufmgr_gem))
3525a884aba1Smrg		bufmgr_gem->bufmgr.bo_alloc_userptr =
3526a884aba1Smrg			drm_intel_gem_bo_alloc_userptr;
3527a884aba1Smrg
352820131375Smrg	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
352920131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
353020131375Smrg	bufmgr_gem->has_wait_timeout = ret == 0;
353120131375Smrg
353220131375Smrg	gp.param = I915_PARAM_HAS_LLC;
353320131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
353420131375Smrg	if (ret != 0) {
353520131375Smrg		/* Kernel does not supports HAS_LLC query, fallback to GPU
353620131375Smrg		 * generation detection and assume that we have LLC on GEN6/7
353720131375Smrg		 */
353820131375Smrg		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
353920131375Smrg				IS_GEN7(bufmgr_gem->pci_device));
354020131375Smrg	} else
354120131375Smrg		bufmgr_gem->has_llc = *gp.value;
354220131375Smrg
354320131375Smrg	gp.param = I915_PARAM_HAS_VEBOX;
354420131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
354520131375Smrg	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
354620131375Smrg
354722944501Smrg	if (bufmgr_gem->gen < 4) {
354822944501Smrg		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
354922944501Smrg		gp.value = &bufmgr_gem->available_fences;
35506d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
355122944501Smrg		if (ret) {
355222944501Smrg			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
355322944501Smrg				errno);
355422944501Smrg			fprintf(stderr, "param: %d, val: %d\n", gp.param,
355522944501Smrg				*gp.value);
355622944501Smrg			bufmgr_gem->available_fences = 0;
355722944501Smrg		} else {
355822944501Smrg			/* XXX The kernel reports the total number of fences,
355922944501Smrg			 * including any that may be pinned.
356022944501Smrg			 *
356122944501Smrg			 * We presume that there will be at least one pinned
356222944501Smrg			 * fence for the scanout buffer, but there may be more
356322944501Smrg			 * than one scanout and the user may be manually
356422944501Smrg			 * pinning buffers. Let's move to execbuffer2 and
356522944501Smrg			 * thereby forget the insanity of using fences...
356622944501Smrg			 */
356722944501Smrg			bufmgr_gem->available_fences -= 2;
356822944501Smrg			if (bufmgr_gem->available_fences < 0)
356922944501Smrg				bufmgr_gem->available_fences = 0;
357022944501Smrg		}
357122944501Smrg	}
357222944501Smrg
357322944501Smrg	/* Let's go with one relocation per every 2 dwords (but round down a bit
357422944501Smrg	 * since a power of two will mean an extra page allocation for the reloc
357522944501Smrg	 * buffer).
357622944501Smrg	 *
357722944501Smrg	 * Every 4 was too few for the blender benchmark.
357822944501Smrg	 */
357922944501Smrg	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
358022944501Smrg
358122944501Smrg	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
358222944501Smrg	bufmgr_gem->bufmgr.bo_alloc_for_render =
358322944501Smrg	    drm_intel_gem_bo_alloc_for_render;
358422944501Smrg	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
358522944501Smrg	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
358622944501Smrg	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
358722944501Smrg	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
358822944501Smrg	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
358922944501Smrg	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
359022944501Smrg	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
359122944501Smrg	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
359222944501Smrg	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
359322944501Smrg	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
359422944501Smrg	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
359522944501Smrg	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
359622944501Smrg	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
359722944501Smrg	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
359822944501Smrg	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
359922944501Smrg	/* Use the new one if available */
3600aaba2545Smrg	if (exec2) {
360122944501Smrg		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
36029ce4edccSmrg		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3603aaba2545Smrg	} else
360422944501Smrg		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
360522944501Smrg	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
360622944501Smrg	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3607a884aba1Smrg	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
360822944501Smrg	bufmgr_gem->bufmgr.debug = 0;
360922944501Smrg	bufmgr_gem->bufmgr.check_aperture_space =
361022944501Smrg	    drm_intel_gem_check_aperture_space;
361122944501Smrg	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3612aaba2545Smrg	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
361322944501Smrg	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
361422944501Smrg	    drm_intel_gem_get_pipe_from_crtc_id;
361522944501Smrg	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
361622944501Smrg
361720131375Smrg	DRMINITLISTHEAD(&bufmgr_gem->named);
3618aaba2545Smrg	init_cache_buckets(bufmgr_gem);
361922944501Smrg
362020131375Smrg	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
362120131375Smrg	bufmgr_gem->vma_max = -1; /* unlimited by default */
362220131375Smrg
3623a884aba1Smrg	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3624a884aba1Smrg
3625a884aba1Smrgexit:
3626a884aba1Smrg	pthread_mutex_unlock(&bufmgr_list_mutex);
3627a884aba1Smrg
3628a884aba1Smrg	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
362922944501Smrg}
3630