intel_bufmgr_gem.c revision 424e9256
122944501Smrg/**************************************************************************
222944501Smrg *
322944501Smrg * Copyright � 2007 Red Hat Inc.
420131375Smrg * Copyright � 2007-2012 Intel Corporation
522944501Smrg * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
622944501Smrg * All Rights Reserved.
722944501Smrg *
822944501Smrg * Permission is hereby granted, free of charge, to any person obtaining a
922944501Smrg * copy of this software and associated documentation files (the
1022944501Smrg * "Software"), to deal in the Software without restriction, including
1122944501Smrg * without limitation the rights to use, copy, modify, merge, publish,
1222944501Smrg * distribute, sub license, and/or sell copies of the Software, and to
1322944501Smrg * permit persons to whom the Software is furnished to do so, subject to
1422944501Smrg * the following conditions:
1522944501Smrg *
1622944501Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1722944501Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1822944501Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
1922944501Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
2022944501Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
2122944501Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2222944501Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
2322944501Smrg *
2422944501Smrg * The above copyright notice and this permission notice (including the
2522944501Smrg * next paragraph) shall be included in all copies or substantial portions
2622944501Smrg * of the Software.
2722944501Smrg *
2822944501Smrg *
2922944501Smrg **************************************************************************/
3022944501Smrg/*
3122944501Smrg * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
3222944501Smrg *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
3322944501Smrg *	    Eric Anholt <eric@anholt.net>
3422944501Smrg *	    Dave Airlie <airlied@linux.ie>
3522944501Smrg */
3622944501Smrg
3722944501Smrg#ifdef HAVE_CONFIG_H
3822944501Smrg#include "config.h"
3922944501Smrg#endif
4022944501Smrg
4122944501Smrg#include <xf86drm.h>
4222944501Smrg#include <xf86atomic.h>
4322944501Smrg#include <fcntl.h>
4422944501Smrg#include <stdio.h>
4522944501Smrg#include <stdlib.h>
4622944501Smrg#include <string.h>
4722944501Smrg#include <unistd.h>
4822944501Smrg#include <assert.h>
4922944501Smrg#include <pthread.h>
502e6867f6Smrg#include <stddef.h>
5122944501Smrg#include <sys/ioctl.h>
5222944501Smrg#include <sys/stat.h>
5322944501Smrg#include <sys/types.h>
5420131375Smrg#include <stdbool.h>
5522944501Smrg
5622944501Smrg#include "errno.h"
5720131375Smrg#ifndef ETIME
5820131375Smrg#define ETIME ETIMEDOUT
5920131375Smrg#endif
60424e9256Smrg#include "libdrm_macros.h"
6122944501Smrg#include "libdrm_lists.h"
6222944501Smrg#include "intel_bufmgr.h"
6322944501Smrg#include "intel_bufmgr_priv.h"
6422944501Smrg#include "intel_chipset.h"
6520131375Smrg#include "intel_aub.h"
6622944501Smrg#include "string.h"
6722944501Smrg
6822944501Smrg#include "i915_drm.h"
6922944501Smrg
7020131375Smrg#ifdef HAVE_VALGRIND
7120131375Smrg#include <valgrind.h>
7220131375Smrg#include <memcheck.h>
7320131375Smrg#define VG(x) x
7420131375Smrg#else
7520131375Smrg#define VG(x)
7620131375Smrg#endif
7720131375Smrg
78424e9256Smrg#define memclear(s) memset(&s, 0, sizeof(s))
7920131375Smrg
8022944501Smrg#define DBG(...) do {					\
8122944501Smrg	if (bufmgr_gem->bufmgr.debug)			\
8222944501Smrg		fprintf(stderr, __VA_ARGS__);		\
8322944501Smrg} while (0)
8422944501Smrg
85aaba2545Smrg#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
86aaba2545Smrg
8722944501Smrgtypedef struct _drm_intel_bo_gem drm_intel_bo_gem;
8822944501Smrg
8922944501Smrgstruct drm_intel_gem_bo_bucket {
9022944501Smrg	drmMMListHead head;
9122944501Smrg	unsigned long size;
9222944501Smrg};
9322944501Smrg
9422944501Smrgtypedef struct _drm_intel_bufmgr_gem {
9522944501Smrg	drm_intel_bufmgr bufmgr;
9622944501Smrg
97a884aba1Smrg	atomic_t refcount;
98a884aba1Smrg
9922944501Smrg	int fd;
10022944501Smrg
10122944501Smrg	int max_relocs;
10222944501Smrg
10322944501Smrg	pthread_mutex_t lock;
10422944501Smrg
10522944501Smrg	struct drm_i915_gem_exec_object *exec_objects;
10622944501Smrg	struct drm_i915_gem_exec_object2 *exec2_objects;
10722944501Smrg	drm_intel_bo **exec_bos;
10822944501Smrg	int exec_size;
10922944501Smrg	int exec_count;
11022944501Smrg
11122944501Smrg	/** Array of lists of cached gem objects of power-of-two sizes */
112aaba2545Smrg	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
113aaba2545Smrg	int num_buckets;
1146d98c517Smrg	time_t time;
11522944501Smrg
116a884aba1Smrg	drmMMListHead managers;
117a884aba1Smrg
11820131375Smrg	drmMMListHead named;
11920131375Smrg	drmMMListHead vma_cache;
12020131375Smrg	int vma_count, vma_open, vma_max;
12120131375Smrg
12222944501Smrg	uint64_t gtt_size;
12322944501Smrg	int available_fences;
12422944501Smrg	int pci_device;
12522944501Smrg	int gen;
1269ce4edccSmrg	unsigned int has_bsd : 1;
1279ce4edccSmrg	unsigned int has_blt : 1;
1289ce4edccSmrg	unsigned int has_relaxed_fencing : 1;
12920131375Smrg	unsigned int has_llc : 1;
13020131375Smrg	unsigned int has_wait_timeout : 1;
1319ce4edccSmrg	unsigned int bo_reuse : 1;
13220131375Smrg	unsigned int no_exec : 1;
13320131375Smrg	unsigned int has_vebox : 1;
13420131375Smrg	bool fenced_relocs;
13520131375Smrg
136424e9256Smrg	struct {
137424e9256Smrg		void *ptr;
138424e9256Smrg		uint32_t handle;
139424e9256Smrg	} userptr_active;
140424e9256Smrg
14120131375Smrg	char *aub_filename;
14220131375Smrg	FILE *aub_file;
14320131375Smrg	uint32_t aub_offset;
14422944501Smrg} drm_intel_bufmgr_gem;
14522944501Smrg
14622944501Smrg#define DRM_INTEL_RELOC_FENCE (1<<0)
14722944501Smrg
14822944501Smrgtypedef struct _drm_intel_reloc_target_info {
14922944501Smrg	drm_intel_bo *bo;
15022944501Smrg	int flags;
15122944501Smrg} drm_intel_reloc_target;
15222944501Smrg
15322944501Smrgstruct _drm_intel_bo_gem {
15422944501Smrg	drm_intel_bo bo;
15522944501Smrg
15622944501Smrg	atomic_t refcount;
15722944501Smrg	uint32_t gem_handle;
15822944501Smrg	const char *name;
15922944501Smrg
16022944501Smrg	/**
16122944501Smrg	 * Kenel-assigned global name for this object
16220131375Smrg         *
16320131375Smrg         * List contains both flink named and prime fd'd objects
16422944501Smrg	 */
16522944501Smrg	unsigned int global_name;
16620131375Smrg	drmMMListHead name_list;
16722944501Smrg
16822944501Smrg	/**
16922944501Smrg	 * Index of the buffer within the validation list while preparing a
17022944501Smrg	 * batchbuffer execution.
17122944501Smrg	 */
17222944501Smrg	int validate_index;
17322944501Smrg
17422944501Smrg	/**
17522944501Smrg	 * Current tiling mode
17622944501Smrg	 */
17722944501Smrg	uint32_t tiling_mode;
17822944501Smrg	uint32_t swizzle_mode;
1796d98c517Smrg	unsigned long stride;
18022944501Smrg
18122944501Smrg	time_t free_time;
18222944501Smrg
18322944501Smrg	/** Array passed to the DRM containing relocation information. */
18422944501Smrg	struct drm_i915_gem_relocation_entry *relocs;
18522944501Smrg	/**
18622944501Smrg	 * Array of info structs corresponding to relocs[i].target_handle etc
18722944501Smrg	 */
18822944501Smrg	drm_intel_reloc_target *reloc_target_info;
18922944501Smrg	/** Number of entries in relocs */
19022944501Smrg	int reloc_count;
19122944501Smrg	/** Mapped address for the buffer, saved across map/unmap cycles */
19222944501Smrg	void *mem_virtual;
19322944501Smrg	/** GTT virtual address for the buffer, saved across map/unmap cycles */
19422944501Smrg	void *gtt_virtual;
195a884aba1Smrg	/**
196a884aba1Smrg	 * Virtual address of the buffer allocated by user, used for userptr
197a884aba1Smrg	 * objects only.
198a884aba1Smrg	 */
199a884aba1Smrg	void *user_virtual;
20020131375Smrg	int map_count;
20120131375Smrg	drmMMListHead vma_list;
20222944501Smrg
20322944501Smrg	/** BO cache list */
20422944501Smrg	drmMMListHead head;
20522944501Smrg
20622944501Smrg	/**
20722944501Smrg	 * Boolean of whether this BO and its children have been included in
20822944501Smrg	 * the current drm_intel_bufmgr_check_aperture_space() total.
20922944501Smrg	 */
21020131375Smrg	bool included_in_check_aperture;
21122944501Smrg
21222944501Smrg	/**
21322944501Smrg	 * Boolean of whether this buffer has been used as a relocation
21422944501Smrg	 * target and had its size accounted for, and thus can't have any
21522944501Smrg	 * further relocations added to it.
21622944501Smrg	 */
21720131375Smrg	bool used_as_reloc_target;
21822944501Smrg
21922944501Smrg	/**
22022944501Smrg	 * Boolean of whether we have encountered an error whilst building the relocation tree.
22122944501Smrg	 */
22220131375Smrg	bool has_error;
22322944501Smrg
22422944501Smrg	/**
22522944501Smrg	 * Boolean of whether this buffer can be re-used
22622944501Smrg	 */
22720131375Smrg	bool reusable;
22820131375Smrg
22920131375Smrg	/**
23020131375Smrg	 * Boolean of whether the GPU is definitely not accessing the buffer.
23120131375Smrg	 *
23220131375Smrg	 * This is only valid when reusable, since non-reusable
23320131375Smrg	 * buffers are those that have been shared wth other
23420131375Smrg	 * processes, so we don't know their state.
23520131375Smrg	 */
23620131375Smrg	bool idle;
23722944501Smrg
238a884aba1Smrg	/**
239a884aba1Smrg	 * Boolean of whether this buffer was allocated with userptr
240a884aba1Smrg	 */
241a884aba1Smrg	bool is_userptr;
242a884aba1Smrg
24322944501Smrg	/**
24422944501Smrg	 * Size in bytes of this buffer and its relocation descendents.
24522944501Smrg	 *
24622944501Smrg	 * Used to avoid costly tree walking in
24722944501Smrg	 * drm_intel_bufmgr_check_aperture in the common case.
24822944501Smrg	 */
24922944501Smrg	int reloc_tree_size;
25022944501Smrg
25122944501Smrg	/**
25222944501Smrg	 * Number of potential fence registers required by this buffer and its
25322944501Smrg	 * relocations.
25422944501Smrg	 */
25522944501Smrg	int reloc_tree_fences;
25620131375Smrg
25720131375Smrg	/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
25820131375Smrg	bool mapped_cpu_write;
25920131375Smrg
26020131375Smrg	uint32_t aub_offset;
26120131375Smrg
26220131375Smrg	drm_intel_aub_annotation *aub_annotations;
26320131375Smrg	unsigned aub_annotation_count;
26422944501Smrg};
26522944501Smrg
26622944501Smrgstatic unsigned int
26722944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
26822944501Smrg
26922944501Smrgstatic unsigned int
27022944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
27122944501Smrg
27222944501Smrgstatic int
27322944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
27422944501Smrg			    uint32_t * swizzle_mode);
27522944501Smrg
27622944501Smrgstatic int
2776d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2786d98c517Smrg				     uint32_t tiling_mode,
2796d98c517Smrg				     uint32_t stride);
28022944501Smrg
28122944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
28222944501Smrg						      time_t time);
28322944501Smrg
28422944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
28522944501Smrg
28622944501Smrgstatic void drm_intel_gem_bo_free(drm_intel_bo *bo);
28722944501Smrg
28822944501Smrgstatic unsigned long
28922944501Smrgdrm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
29022944501Smrg			   uint32_t *tiling_mode)
29122944501Smrg{
29222944501Smrg	unsigned long min_size, max_size;
29322944501Smrg	unsigned long i;
29422944501Smrg
29522944501Smrg	if (*tiling_mode == I915_TILING_NONE)
29622944501Smrg		return size;
29722944501Smrg
29822944501Smrg	/* 965+ just need multiples of page size for tiling */
29922944501Smrg	if (bufmgr_gem->gen >= 4)
30022944501Smrg		return ROUND_UP_TO(size, 4096);
30122944501Smrg
30222944501Smrg	/* Older chips need powers of two, of at least 512k or 1M */
30322944501Smrg	if (bufmgr_gem->gen == 3) {
30422944501Smrg		min_size = 1024*1024;
30522944501Smrg		max_size = 128*1024*1024;
30622944501Smrg	} else {
30722944501Smrg		min_size = 512*1024;
30822944501Smrg		max_size = 64*1024*1024;
30922944501Smrg	}
31022944501Smrg
31122944501Smrg	if (size > max_size) {
31222944501Smrg		*tiling_mode = I915_TILING_NONE;
31322944501Smrg		return size;
31422944501Smrg	}
31522944501Smrg
3169ce4edccSmrg	/* Do we need to allocate every page for the fence? */
3179ce4edccSmrg	if (bufmgr_gem->has_relaxed_fencing)
3189ce4edccSmrg		return ROUND_UP_TO(size, 4096);
3199ce4edccSmrg
32022944501Smrg	for (i = min_size; i < size; i <<= 1)
32122944501Smrg		;
32222944501Smrg
32322944501Smrg	return i;
32422944501Smrg}
32522944501Smrg
32622944501Smrg/*
32722944501Smrg * Round a given pitch up to the minimum required for X tiling on a
32822944501Smrg * given chip.  We use 512 as the minimum to allow for a later tiling
32922944501Smrg * change.
33022944501Smrg */
33122944501Smrgstatic unsigned long
33222944501Smrgdrm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
3336d98c517Smrg			    unsigned long pitch, uint32_t *tiling_mode)
33422944501Smrg{
33522944501Smrg	unsigned long tile_width;
33622944501Smrg	unsigned long i;
33722944501Smrg
33822944501Smrg	/* If untiled, then just align it so that we can do rendering
33922944501Smrg	 * to it with the 3D engine.
34022944501Smrg	 */
3416d98c517Smrg	if (*tiling_mode == I915_TILING_NONE)
34222944501Smrg		return ALIGN(pitch, 64);
34322944501Smrg
34420131375Smrg	if (*tiling_mode == I915_TILING_X
34520131375Smrg			|| (IS_915(bufmgr_gem->pci_device)
34620131375Smrg			    && *tiling_mode == I915_TILING_Y))
34722944501Smrg		tile_width = 512;
34822944501Smrg	else
34922944501Smrg		tile_width = 128;
35022944501Smrg
35122944501Smrg	/* 965 is flexible */
35222944501Smrg	if (bufmgr_gem->gen >= 4)
35322944501Smrg		return ROUND_UP_TO(pitch, tile_width);
35422944501Smrg
3556d98c517Smrg	/* The older hardware has a maximum pitch of 8192 with tiled
3566d98c517Smrg	 * surfaces, so fallback to untiled if it's too large.
3576d98c517Smrg	 */
3586d98c517Smrg	if (pitch > 8192) {
3596d98c517Smrg		*tiling_mode = I915_TILING_NONE;
3606d98c517Smrg		return ALIGN(pitch, 64);
3616d98c517Smrg	}
3626d98c517Smrg
36322944501Smrg	/* Pre-965 needs power of two tile width */
36422944501Smrg	for (i = tile_width; i < pitch; i <<= 1)
36522944501Smrg		;
36622944501Smrg
36722944501Smrg	return i;
36822944501Smrg}
36922944501Smrg
37022944501Smrgstatic struct drm_intel_gem_bo_bucket *
37122944501Smrgdrm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
37222944501Smrg				 unsigned long size)
37322944501Smrg{
37422944501Smrg	int i;
37522944501Smrg
376aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
37722944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
37822944501Smrg		    &bufmgr_gem->cache_bucket[i];
37922944501Smrg		if (bucket->size >= size) {
38022944501Smrg			return bucket;
38122944501Smrg		}
38222944501Smrg	}
38322944501Smrg
38422944501Smrg	return NULL;
38522944501Smrg}
38622944501Smrg
38722944501Smrgstatic void
38822944501Smrgdrm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
38922944501Smrg{
39022944501Smrg	int i, j;
39122944501Smrg
39222944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
39322944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
39422944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
39522944501Smrg
39622944501Smrg		if (bo_gem->relocs == NULL) {
39722944501Smrg			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
39822944501Smrg			    bo_gem->name);
39922944501Smrg			continue;
40022944501Smrg		}
40122944501Smrg
40222944501Smrg		for (j = 0; j < bo_gem->reloc_count; j++) {
40322944501Smrg			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
40422944501Smrg			drm_intel_bo_gem *target_gem =
40522944501Smrg			    (drm_intel_bo_gem *) target_bo;
40622944501Smrg
40722944501Smrg			DBG("%2d: %d (%s)@0x%08llx -> "
408d82d45b3Sjoerg			    "%d (%s)@0x%08llx + 0x%08x\n",
40922944501Smrg			    i,
41022944501Smrg			    bo_gem->gem_handle, bo_gem->name,
41122944501Smrg			    (unsigned long long)bo_gem->relocs[j].offset,
41222944501Smrg			    target_gem->gem_handle,
41322944501Smrg			    target_gem->name,
414d82d45b3Sjoerg			    (unsigned long long)target_bo->offset64,
41522944501Smrg			    bo_gem->relocs[j].delta);
41622944501Smrg		}
41722944501Smrg	}
41822944501Smrg}
41922944501Smrg
42022944501Smrgstatic inline void
42122944501Smrgdrm_intel_gem_bo_reference(drm_intel_bo *bo)
42222944501Smrg{
42322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
42422944501Smrg
42522944501Smrg	atomic_inc(&bo_gem->refcount);
42622944501Smrg}
42722944501Smrg
42822944501Smrg/**
42922944501Smrg * Adds the given buffer to the list of buffers to be validated (moved into the
43022944501Smrg * appropriate memory type) with the next batch submission.
43122944501Smrg *
43222944501Smrg * If a buffer is validated multiple times in a batch submission, it ends up
43322944501Smrg * with the intersection of the memory type flags and the union of the
43422944501Smrg * access flags.
43522944501Smrg */
43622944501Smrgstatic void
43722944501Smrgdrm_intel_add_validate_buffer(drm_intel_bo *bo)
43822944501Smrg{
43922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
44022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
44122944501Smrg	int index;
44222944501Smrg
44322944501Smrg	if (bo_gem->validate_index != -1)
44422944501Smrg		return;
44522944501Smrg
44622944501Smrg	/* Extend the array of validation entries as necessary. */
44722944501Smrg	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
44822944501Smrg		int new_size = bufmgr_gem->exec_size * 2;
44922944501Smrg
45022944501Smrg		if (new_size == 0)
45122944501Smrg			new_size = 5;
45222944501Smrg
45322944501Smrg		bufmgr_gem->exec_objects =
45422944501Smrg		    realloc(bufmgr_gem->exec_objects,
45522944501Smrg			    sizeof(*bufmgr_gem->exec_objects) * new_size);
45622944501Smrg		bufmgr_gem->exec_bos =
45722944501Smrg		    realloc(bufmgr_gem->exec_bos,
45822944501Smrg			    sizeof(*bufmgr_gem->exec_bos) * new_size);
45922944501Smrg		bufmgr_gem->exec_size = new_size;
46022944501Smrg	}
46122944501Smrg
46222944501Smrg	index = bufmgr_gem->exec_count;
46322944501Smrg	bo_gem->validate_index = index;
46422944501Smrg	/* Fill in array entry */
46522944501Smrg	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
46622944501Smrg	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
46722944501Smrg	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
46822944501Smrg	bufmgr_gem->exec_objects[index].alignment = 0;
46922944501Smrg	bufmgr_gem->exec_objects[index].offset = 0;
47022944501Smrg	bufmgr_gem->exec_bos[index] = bo;
47122944501Smrg	bufmgr_gem->exec_count++;
47222944501Smrg}
47322944501Smrg
47422944501Smrgstatic void
47522944501Smrgdrm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
47622944501Smrg{
47722944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
47822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
47922944501Smrg	int index;
48022944501Smrg
48122944501Smrg	if (bo_gem->validate_index != -1) {
48222944501Smrg		if (need_fence)
48322944501Smrg			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
48422944501Smrg				EXEC_OBJECT_NEEDS_FENCE;
48522944501Smrg		return;
48622944501Smrg	}
48722944501Smrg
48822944501Smrg	/* Extend the array of validation entries as necessary. */
48922944501Smrg	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
49022944501Smrg		int new_size = bufmgr_gem->exec_size * 2;
49122944501Smrg
49222944501Smrg		if (new_size == 0)
49322944501Smrg			new_size = 5;
49422944501Smrg
49522944501Smrg		bufmgr_gem->exec2_objects =
49622944501Smrg			realloc(bufmgr_gem->exec2_objects,
49722944501Smrg				sizeof(*bufmgr_gem->exec2_objects) * new_size);
49822944501Smrg		bufmgr_gem->exec_bos =
49922944501Smrg			realloc(bufmgr_gem->exec_bos,
50022944501Smrg				sizeof(*bufmgr_gem->exec_bos) * new_size);
50122944501Smrg		bufmgr_gem->exec_size = new_size;
50222944501Smrg	}
50322944501Smrg
50422944501Smrg	index = bufmgr_gem->exec_count;
50522944501Smrg	bo_gem->validate_index = index;
50622944501Smrg	/* Fill in array entry */
50722944501Smrg	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
50822944501Smrg	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
50922944501Smrg	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
51022944501Smrg	bufmgr_gem->exec2_objects[index].alignment = 0;
51122944501Smrg	bufmgr_gem->exec2_objects[index].offset = 0;
51222944501Smrg	bufmgr_gem->exec_bos[index] = bo;
51322944501Smrg	bufmgr_gem->exec2_objects[index].flags = 0;
51422944501Smrg	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
51522944501Smrg	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
51622944501Smrg	if (need_fence) {
51722944501Smrg		bufmgr_gem->exec2_objects[index].flags |=
51822944501Smrg			EXEC_OBJECT_NEEDS_FENCE;
51922944501Smrg	}
52022944501Smrg	bufmgr_gem->exec_count++;
52122944501Smrg}
52222944501Smrg
52322944501Smrg#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
52422944501Smrg	sizeof(uint32_t))
52522944501Smrg
52622944501Smrgstatic void
52722944501Smrgdrm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
52822944501Smrg				      drm_intel_bo_gem *bo_gem)
52922944501Smrg{
53022944501Smrg	int size;
53122944501Smrg
53222944501Smrg	assert(!bo_gem->used_as_reloc_target);
53322944501Smrg
53422944501Smrg	/* The older chipsets are far-less flexible in terms of tiling,
53522944501Smrg	 * and require tiled buffer to be size aligned in the aperture.
53622944501Smrg	 * This means that in the worst possible case we will need a hole
53722944501Smrg	 * twice as large as the object in order for it to fit into the
53822944501Smrg	 * aperture. Optimal packing is for wimps.
53922944501Smrg	 */
54022944501Smrg	size = bo_gem->bo.size;
5419ce4edccSmrg	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
5429ce4edccSmrg		int min_size;
5439ce4edccSmrg
5449ce4edccSmrg		if (bufmgr_gem->has_relaxed_fencing) {
5459ce4edccSmrg			if (bufmgr_gem->gen == 3)
5469ce4edccSmrg				min_size = 1024*1024;
5479ce4edccSmrg			else
5489ce4edccSmrg				min_size = 512*1024;
5499ce4edccSmrg
5509ce4edccSmrg			while (min_size < size)
5519ce4edccSmrg				min_size *= 2;
5529ce4edccSmrg		} else
5539ce4edccSmrg			min_size = size;
5549ce4edccSmrg
5559ce4edccSmrg		/* Account for worst-case alignment. */
5569ce4edccSmrg		size = 2 * min_size;
5579ce4edccSmrg	}
55822944501Smrg
55922944501Smrg	bo_gem->reloc_tree_size = size;
56022944501Smrg}
56122944501Smrg
56222944501Smrgstatic int
56322944501Smrgdrm_intel_setup_reloc_list(drm_intel_bo *bo)
56422944501Smrg{
56522944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
56622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
56722944501Smrg	unsigned int max_relocs = bufmgr_gem->max_relocs;
56822944501Smrg
56922944501Smrg	if (bo->size / 4 < max_relocs)
57022944501Smrg		max_relocs = bo->size / 4;
57122944501Smrg
57222944501Smrg	bo_gem->relocs = malloc(max_relocs *
57322944501Smrg				sizeof(struct drm_i915_gem_relocation_entry));
57422944501Smrg	bo_gem->reloc_target_info = malloc(max_relocs *
575aaba2545Smrg					   sizeof(drm_intel_reloc_target));
57622944501Smrg	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
57720131375Smrg		bo_gem->has_error = true;
57822944501Smrg
57922944501Smrg		free (bo_gem->relocs);
58022944501Smrg		bo_gem->relocs = NULL;
58122944501Smrg
58222944501Smrg		free (bo_gem->reloc_target_info);
58322944501Smrg		bo_gem->reloc_target_info = NULL;
58422944501Smrg
58522944501Smrg		return 1;
58622944501Smrg	}
58722944501Smrg
58822944501Smrg	return 0;
58922944501Smrg}
59022944501Smrg
59122944501Smrgstatic int
59222944501Smrgdrm_intel_gem_bo_busy(drm_intel_bo *bo)
59322944501Smrg{
59422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
59522944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
59622944501Smrg	struct drm_i915_gem_busy busy;
59722944501Smrg	int ret;
59822944501Smrg
59920131375Smrg	if (bo_gem->reusable && bo_gem->idle)
60020131375Smrg		return false;
60120131375Smrg
602424e9256Smrg	memclear(busy);
60322944501Smrg	busy.handle = bo_gem->gem_handle;
60422944501Smrg
6056d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
60620131375Smrg	if (ret == 0) {
60720131375Smrg		bo_gem->idle = !busy.busy;
60820131375Smrg		return busy.busy;
60920131375Smrg	} else {
61020131375Smrg		return false;
61120131375Smrg	}
61222944501Smrg	return (ret == 0 && busy.busy);
61322944501Smrg}
61422944501Smrg
61522944501Smrgstatic int
61622944501Smrgdrm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
61722944501Smrg				  drm_intel_bo_gem *bo_gem, int state)
61822944501Smrg{
61922944501Smrg	struct drm_i915_gem_madvise madv;
62022944501Smrg
621424e9256Smrg	memclear(madv);
62222944501Smrg	madv.handle = bo_gem->gem_handle;
62322944501Smrg	madv.madv = state;
62422944501Smrg	madv.retained = 1;
6256d98c517Smrg	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
62622944501Smrg
62722944501Smrg	return madv.retained;
62822944501Smrg}
62922944501Smrg
63022944501Smrgstatic int
63122944501Smrgdrm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
63222944501Smrg{
63322944501Smrg	return drm_intel_gem_bo_madvise_internal
63422944501Smrg		((drm_intel_bufmgr_gem *) bo->bufmgr,
63522944501Smrg		 (drm_intel_bo_gem *) bo,
63622944501Smrg		 madv);
63722944501Smrg}
63822944501Smrg
63922944501Smrg/* drop the oldest entries that have been purged by the kernel */
64022944501Smrgstatic void
64122944501Smrgdrm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
64222944501Smrg				    struct drm_intel_gem_bo_bucket *bucket)
64322944501Smrg{
64422944501Smrg	while (!DRMLISTEMPTY(&bucket->head)) {
64522944501Smrg		drm_intel_bo_gem *bo_gem;
64622944501Smrg
64722944501Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
64822944501Smrg				      bucket->head.next, head);
64922944501Smrg		if (drm_intel_gem_bo_madvise_internal
65022944501Smrg		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
65122944501Smrg			break;
65222944501Smrg
65322944501Smrg		DRMLISTDEL(&bo_gem->head);
65422944501Smrg		drm_intel_gem_bo_free(&bo_gem->bo);
65522944501Smrg	}
65622944501Smrg}
65722944501Smrg
65822944501Smrgstatic drm_intel_bo *
65922944501Smrgdrm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
66022944501Smrg				const char *name,
66122944501Smrg				unsigned long size,
6626d98c517Smrg				unsigned long flags,
6636d98c517Smrg				uint32_t tiling_mode,
6646d98c517Smrg				unsigned long stride)
66522944501Smrg{
66622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
66722944501Smrg	drm_intel_bo_gem *bo_gem;
66822944501Smrg	unsigned int page_size = getpagesize();
66922944501Smrg	int ret;
67022944501Smrg	struct drm_intel_gem_bo_bucket *bucket;
67120131375Smrg	bool alloc_from_cache;
67222944501Smrg	unsigned long bo_size;
67320131375Smrg	bool for_render = false;
67422944501Smrg
67522944501Smrg	if (flags & BO_ALLOC_FOR_RENDER)
67620131375Smrg		for_render = true;
67722944501Smrg
67822944501Smrg	/* Round the allocated size up to a power of two number of pages. */
67922944501Smrg	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
68022944501Smrg
68122944501Smrg	/* If we don't have caching at this size, don't actually round the
68222944501Smrg	 * allocation up.
68322944501Smrg	 */
68422944501Smrg	if (bucket == NULL) {
68522944501Smrg		bo_size = size;
68622944501Smrg		if (bo_size < page_size)
68722944501Smrg			bo_size = page_size;
68822944501Smrg	} else {
68922944501Smrg		bo_size = bucket->size;
69022944501Smrg	}
69122944501Smrg
69222944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
69322944501Smrg	/* Get a buffer out of the cache if available */
69422944501Smrgretry:
69520131375Smrg	alloc_from_cache = false;
69622944501Smrg	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
69722944501Smrg		if (for_render) {
69822944501Smrg			/* Allocate new render-target BOs from the tail (MRU)
69922944501Smrg			 * of the list, as it will likely be hot in the GPU
70022944501Smrg			 * cache and in the aperture for us.
70122944501Smrg			 */
70222944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
70322944501Smrg					      bucket->head.prev, head);
70422944501Smrg			DRMLISTDEL(&bo_gem->head);
70520131375Smrg			alloc_from_cache = true;
70622944501Smrg		} else {
70722944501Smrg			/* For non-render-target BOs (where we're probably
70822944501Smrg			 * going to map it first thing in order to fill it
70922944501Smrg			 * with data), check if the last BO in the cache is
71022944501Smrg			 * unbusy, and only reuse in that case. Otherwise,
71122944501Smrg			 * allocating a new buffer is probably faster than
71222944501Smrg			 * waiting for the GPU to finish.
71322944501Smrg			 */
71422944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
71522944501Smrg					      bucket->head.next, head);
71622944501Smrg			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
71720131375Smrg				alloc_from_cache = true;
71822944501Smrg				DRMLISTDEL(&bo_gem->head);
71922944501Smrg			}
72022944501Smrg		}
72122944501Smrg
72222944501Smrg		if (alloc_from_cache) {
72322944501Smrg			if (!drm_intel_gem_bo_madvise_internal
72422944501Smrg			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
72522944501Smrg				drm_intel_gem_bo_free(&bo_gem->bo);
72622944501Smrg				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
72722944501Smrg								    bucket);
72822944501Smrg				goto retry;
72922944501Smrg			}
7306d98c517Smrg
7316d98c517Smrg			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
7326d98c517Smrg								 tiling_mode,
7336d98c517Smrg								 stride)) {
7346d98c517Smrg				drm_intel_gem_bo_free(&bo_gem->bo);
7356d98c517Smrg				goto retry;
7366d98c517Smrg			}
73722944501Smrg		}
73822944501Smrg	}
73922944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
74022944501Smrg
74122944501Smrg	if (!alloc_from_cache) {
74222944501Smrg		struct drm_i915_gem_create create;
74322944501Smrg
74422944501Smrg		bo_gem = calloc(1, sizeof(*bo_gem));
74522944501Smrg		if (!bo_gem)
74622944501Smrg			return NULL;
74722944501Smrg
74822944501Smrg		bo_gem->bo.size = bo_size;
74920131375Smrg
750424e9256Smrg		memclear(create);
75122944501Smrg		create.size = bo_size;
75222944501Smrg
7536d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd,
7546d98c517Smrg			       DRM_IOCTL_I915_GEM_CREATE,
7556d98c517Smrg			       &create);
75622944501Smrg		bo_gem->gem_handle = create.handle;
75722944501Smrg		bo_gem->bo.handle = bo_gem->gem_handle;
75822944501Smrg		if (ret != 0) {
75922944501Smrg			free(bo_gem);
76022944501Smrg			return NULL;
76122944501Smrg		}
76222944501Smrg		bo_gem->bo.bufmgr = bufmgr;
7636d98c517Smrg
7646d98c517Smrg		bo_gem->tiling_mode = I915_TILING_NONE;
7656d98c517Smrg		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
7666d98c517Smrg		bo_gem->stride = 0;
7676d98c517Smrg
7683c748557Ssnj		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
7693c748557Ssnj		   list (vma_list), so better set the list head here */
7703c748557Ssnj		DRMINITLISTHEAD(&bo_gem->name_list);
7713c748557Ssnj		DRMINITLISTHEAD(&bo_gem->vma_list);
7726d98c517Smrg		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
7736d98c517Smrg							 tiling_mode,
7746d98c517Smrg							 stride)) {
7756d98c517Smrg		    drm_intel_gem_bo_free(&bo_gem->bo);
7766d98c517Smrg		    return NULL;
7776d98c517Smrg		}
77822944501Smrg	}
77922944501Smrg
78022944501Smrg	bo_gem->name = name;
78122944501Smrg	atomic_set(&bo_gem->refcount, 1);
78222944501Smrg	bo_gem->validate_index = -1;
78322944501Smrg	bo_gem->reloc_tree_fences = 0;
78420131375Smrg	bo_gem->used_as_reloc_target = false;
78520131375Smrg	bo_gem->has_error = false;
78620131375Smrg	bo_gem->reusable = true;
78720131375Smrg	bo_gem->aub_annotations = NULL;
78820131375Smrg	bo_gem->aub_annotation_count = 0;
78922944501Smrg
79022944501Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
79122944501Smrg
79222944501Smrg	DBG("bo_create: buf %d (%s) %ldb\n",
79322944501Smrg	    bo_gem->gem_handle, bo_gem->name, size);
79422944501Smrg
79522944501Smrg	return &bo_gem->bo;
79622944501Smrg}
79722944501Smrg
79822944501Smrgstatic drm_intel_bo *
79922944501Smrgdrm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
80022944501Smrg				  const char *name,
80122944501Smrg				  unsigned long size,
80222944501Smrg				  unsigned int alignment)
80322944501Smrg{
80422944501Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
8056d98c517Smrg					       BO_ALLOC_FOR_RENDER,
8066d98c517Smrg					       I915_TILING_NONE, 0);
80722944501Smrg}
80822944501Smrg
80922944501Smrgstatic drm_intel_bo *
81022944501Smrgdrm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
81122944501Smrg		       const char *name,
81222944501Smrg		       unsigned long size,
81322944501Smrg		       unsigned int alignment)
81422944501Smrg{
8156d98c517Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
8166d98c517Smrg					       I915_TILING_NONE, 0);
81722944501Smrg}
81822944501Smrg
81922944501Smrgstatic drm_intel_bo *
82022944501Smrgdrm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
82122944501Smrg			     int x, int y, int cpp, uint32_t *tiling_mode,
82222944501Smrg			     unsigned long *pitch, unsigned long flags)
82322944501Smrg{
82422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
825aaba2545Smrg	unsigned long size, stride;
826aaba2545Smrg	uint32_t tiling;
82722944501Smrg
828aaba2545Smrg	do {
82920131375Smrg		unsigned long aligned_y, height_alignment;
830aaba2545Smrg
831aaba2545Smrg		tiling = *tiling_mode;
832aaba2545Smrg
833aaba2545Smrg		/* If we're tiled, our allocations are in 8 or 32-row blocks,
834aaba2545Smrg		 * so failure to align our height means that we won't allocate
835aaba2545Smrg		 * enough pages.
836aaba2545Smrg		 *
837aaba2545Smrg		 * If we're untiled, we still have to align to 2 rows high
838aaba2545Smrg		 * because the data port accesses 2x2 blocks even if the
839aaba2545Smrg		 * bottom row isn't to be rendered, so failure to align means
840aaba2545Smrg		 * we could walk off the end of the GTT and fault.  This is
841aaba2545Smrg		 * documented on 965, and may be the case on older chipsets
842aaba2545Smrg		 * too so we try to be careful.
843aaba2545Smrg		 */
844aaba2545Smrg		aligned_y = y;
84520131375Smrg		height_alignment = 2;
84620131375Smrg
84720131375Smrg		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
84820131375Smrg			height_alignment = 16;
84920131375Smrg		else if (tiling == I915_TILING_X
85020131375Smrg			|| (IS_915(bufmgr_gem->pci_device)
85120131375Smrg			    && tiling == I915_TILING_Y))
85220131375Smrg			height_alignment = 8;
853aaba2545Smrg		else if (tiling == I915_TILING_Y)
85420131375Smrg			height_alignment = 32;
85520131375Smrg		aligned_y = ALIGN(y, height_alignment);
856aaba2545Smrg
857aaba2545Smrg		stride = x * cpp;
8586d98c517Smrg		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
859aaba2545Smrg		size = stride * aligned_y;
860aaba2545Smrg		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
861aaba2545Smrg	} while (*tiling_mode != tiling);
86222944501Smrg	*pitch = stride;
86322944501Smrg
8646d98c517Smrg	if (tiling == I915_TILING_NONE)
8656d98c517Smrg		stride = 0;
8666d98c517Smrg
8676d98c517Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
8686d98c517Smrg					       tiling, stride);
86922944501Smrg}
87022944501Smrg
871a884aba1Smrgstatic drm_intel_bo *
872a884aba1Smrgdrm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
873a884aba1Smrg				const char *name,
874a884aba1Smrg				void *addr,
875a884aba1Smrg				uint32_t tiling_mode,
876a884aba1Smrg				uint32_t stride,
877a884aba1Smrg				unsigned long size,
878a884aba1Smrg				unsigned long flags)
879a884aba1Smrg{
880a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
881a884aba1Smrg	drm_intel_bo_gem *bo_gem;
882a884aba1Smrg	int ret;
883a884aba1Smrg	struct drm_i915_gem_userptr userptr;
884a884aba1Smrg
885a884aba1Smrg	/* Tiling with userptr surfaces is not supported
886a884aba1Smrg	 * on all hardware so refuse it for time being.
887a884aba1Smrg	 */
888a884aba1Smrg	if (tiling_mode != I915_TILING_NONE)
889a884aba1Smrg		return NULL;
890a884aba1Smrg
891a884aba1Smrg	bo_gem = calloc(1, sizeof(*bo_gem));
892a884aba1Smrg	if (!bo_gem)
893a884aba1Smrg		return NULL;
894a884aba1Smrg
895a884aba1Smrg	bo_gem->bo.size = size;
896a884aba1Smrg
897424e9256Smrg	memclear(userptr);
898a884aba1Smrg	userptr.user_ptr = (__u64)((unsigned long)addr);
899a884aba1Smrg	userptr.user_size = size;
900a884aba1Smrg	userptr.flags = flags;
901a884aba1Smrg
902a884aba1Smrg	ret = drmIoctl(bufmgr_gem->fd,
903a884aba1Smrg			DRM_IOCTL_I915_GEM_USERPTR,
904a884aba1Smrg			&userptr);
905a884aba1Smrg	if (ret != 0) {
906a884aba1Smrg		DBG("bo_create_userptr: "
907a884aba1Smrg		    "ioctl failed with user ptr %p size 0x%lx, "
908a884aba1Smrg		    "user flags 0x%lx\n", addr, size, flags);
909a884aba1Smrg		free(bo_gem);
910a884aba1Smrg		return NULL;
911a884aba1Smrg	}
912a884aba1Smrg
913a884aba1Smrg	bo_gem->gem_handle = userptr.handle;
914a884aba1Smrg	bo_gem->bo.handle = bo_gem->gem_handle;
915a884aba1Smrg	bo_gem->bo.bufmgr    = bufmgr;
916a884aba1Smrg	bo_gem->is_userptr   = true;
917a884aba1Smrg	bo_gem->bo.virtual   = addr;
918a884aba1Smrg	/* Save the address provided by user */
919a884aba1Smrg	bo_gem->user_virtual = addr;
920a884aba1Smrg	bo_gem->tiling_mode  = I915_TILING_NONE;
921a884aba1Smrg	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
922a884aba1Smrg	bo_gem->stride       = 0;
923a884aba1Smrg
924a884aba1Smrg	DRMINITLISTHEAD(&bo_gem->name_list);
925a884aba1Smrg	DRMINITLISTHEAD(&bo_gem->vma_list);
926a884aba1Smrg
927a884aba1Smrg	bo_gem->name = name;
928a884aba1Smrg	atomic_set(&bo_gem->refcount, 1);
929a884aba1Smrg	bo_gem->validate_index = -1;
930a884aba1Smrg	bo_gem->reloc_tree_fences = 0;
931a884aba1Smrg	bo_gem->used_as_reloc_target = false;
932a884aba1Smrg	bo_gem->has_error = false;
933a884aba1Smrg	bo_gem->reusable = false;
934a884aba1Smrg
935a884aba1Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
936a884aba1Smrg
937a884aba1Smrg	DBG("bo_create_userptr: "
938a884aba1Smrg	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
939a884aba1Smrg		addr, bo_gem->gem_handle, bo_gem->name,
940a884aba1Smrg		size, stride, tiling_mode);
941a884aba1Smrg
942a884aba1Smrg	return &bo_gem->bo;
943a884aba1Smrg}
944a884aba1Smrg
945424e9256Smrgstatic bool
946424e9256Smrghas_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
947424e9256Smrg{
948424e9256Smrg	int ret;
949424e9256Smrg	void *ptr;
950424e9256Smrg	long pgsz;
951424e9256Smrg	struct drm_i915_gem_userptr userptr;
952424e9256Smrg
953424e9256Smrg	pgsz = sysconf(_SC_PAGESIZE);
954424e9256Smrg	assert(pgsz > 0);
955424e9256Smrg
956424e9256Smrg	ret = posix_memalign(&ptr, pgsz, pgsz);
957424e9256Smrg	if (ret) {
958424e9256Smrg		DBG("Failed to get a page (%ld) for userptr detection!\n",
959424e9256Smrg			pgsz);
960424e9256Smrg		return false;
961424e9256Smrg	}
962424e9256Smrg
963424e9256Smrg	memclear(userptr);
964424e9256Smrg	userptr.user_ptr = (__u64)(unsigned long)ptr;
965424e9256Smrg	userptr.user_size = pgsz;
966424e9256Smrg
967424e9256Smrgretry:
968424e9256Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
969424e9256Smrg	if (ret) {
970424e9256Smrg		if (errno == ENODEV && userptr.flags == 0) {
971424e9256Smrg			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
972424e9256Smrg			goto retry;
973424e9256Smrg		}
974424e9256Smrg		free(ptr);
975424e9256Smrg		return false;
976424e9256Smrg	}
977424e9256Smrg
978424e9256Smrg	/* We don't release the userptr bo here as we want to keep the
979424e9256Smrg	 * kernel mm tracking alive for our lifetime. The first time we
980424e9256Smrg	 * create a userptr object the kernel has to install a mmu_notifer
981424e9256Smrg	 * which is a heavyweight operation (e.g. it requires taking all
982424e9256Smrg	 * mm_locks and stop_machine()).
983424e9256Smrg	 */
984424e9256Smrg
985424e9256Smrg	bufmgr_gem->userptr_active.ptr = ptr;
986424e9256Smrg	bufmgr_gem->userptr_active.handle = userptr.handle;
987424e9256Smrg
988424e9256Smrg	return true;
989424e9256Smrg}
990424e9256Smrg
991424e9256Smrgstatic drm_intel_bo *
992424e9256Smrgcheck_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
993424e9256Smrg		       const char *name,
994424e9256Smrg		       void *addr,
995424e9256Smrg		       uint32_t tiling_mode,
996424e9256Smrg		       uint32_t stride,
997424e9256Smrg		       unsigned long size,
998424e9256Smrg		       unsigned long flags)
999424e9256Smrg{
1000424e9256Smrg	if (has_userptr((drm_intel_bufmgr_gem *)bufmgr))
1001424e9256Smrg		bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr;
1002424e9256Smrg	else
1003424e9256Smrg		bufmgr->bo_alloc_userptr = NULL;
1004424e9256Smrg
1005424e9256Smrg	return drm_intel_bo_alloc_userptr(bufmgr, name, addr,
1006424e9256Smrg					  tiling_mode, stride, size, flags);
1007424e9256Smrg}
1008424e9256Smrg
100922944501Smrg/**
101022944501Smrg * Returns a drm_intel_bo wrapping the given buffer object handle.
101122944501Smrg *
101222944501Smrg * This can be used when one application needs to pass a buffer object
101322944501Smrg * to another.
101422944501Smrg */
1015424e9256Smrgdrm_intel_bo *
101622944501Smrgdrm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
101722944501Smrg				  const char *name,
101822944501Smrg				  unsigned int handle)
101922944501Smrg{
102022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
102122944501Smrg	drm_intel_bo_gem *bo_gem;
102222944501Smrg	int ret;
102322944501Smrg	struct drm_gem_open open_arg;
102422944501Smrg	struct drm_i915_gem_get_tiling get_tiling;
102520131375Smrg	drmMMListHead *list;
102622944501Smrg
102720131375Smrg	/* At the moment most applications only have a few named bo.
102820131375Smrg	 * For instance, in a DRI client only the render buffers passed
102920131375Smrg	 * between X and the client are named. And since X returns the
103020131375Smrg	 * alternating names for the front/back buffer a linear search
103120131375Smrg	 * provides a sufficiently fast match.
103220131375Smrg	 */
1033a884aba1Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
103420131375Smrg	for (list = bufmgr_gem->named.next;
103520131375Smrg	     list != &bufmgr_gem->named;
103620131375Smrg	     list = list->next) {
103720131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
103820131375Smrg		if (bo_gem->global_name == handle) {
103920131375Smrg			drm_intel_gem_bo_reference(&bo_gem->bo);
1040a884aba1Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
104120131375Smrg			return &bo_gem->bo;
104220131375Smrg		}
104320131375Smrg	}
104422944501Smrg
1045424e9256Smrg	memclear(open_arg);
104622944501Smrg	open_arg.name = handle;
10476d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
10486d98c517Smrg		       DRM_IOCTL_GEM_OPEN,
10496d98c517Smrg		       &open_arg);
105022944501Smrg	if (ret != 0) {
10519ce4edccSmrg		DBG("Couldn't reference %s handle 0x%08x: %s\n",
10529ce4edccSmrg		    name, handle, strerror(errno));
1053a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
105422944501Smrg		return NULL;
105522944501Smrg	}
105620131375Smrg        /* Now see if someone has used a prime handle to get this
105720131375Smrg         * object from the kernel before by looking through the list
105820131375Smrg         * again for a matching gem_handle
105920131375Smrg         */
106020131375Smrg	for (list = bufmgr_gem->named.next;
106120131375Smrg	     list != &bufmgr_gem->named;
106220131375Smrg	     list = list->next) {
106320131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
106420131375Smrg		if (bo_gem->gem_handle == open_arg.handle) {
106520131375Smrg			drm_intel_gem_bo_reference(&bo_gem->bo);
1066a884aba1Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
106720131375Smrg			return &bo_gem->bo;
106820131375Smrg		}
106920131375Smrg	}
107020131375Smrg
107120131375Smrg	bo_gem = calloc(1, sizeof(*bo_gem));
1072a884aba1Smrg	if (!bo_gem) {
1073a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
107420131375Smrg		return NULL;
1075a884aba1Smrg	}
107620131375Smrg
107722944501Smrg	bo_gem->bo.size = open_arg.size;
107822944501Smrg	bo_gem->bo.offset = 0;
107920131375Smrg	bo_gem->bo.offset64 = 0;
108022944501Smrg	bo_gem->bo.virtual = NULL;
108122944501Smrg	bo_gem->bo.bufmgr = bufmgr;
108222944501Smrg	bo_gem->name = name;
108322944501Smrg	atomic_set(&bo_gem->refcount, 1);
108422944501Smrg	bo_gem->validate_index = -1;
108522944501Smrg	bo_gem->gem_handle = open_arg.handle;
108620131375Smrg	bo_gem->bo.handle = open_arg.handle;
108722944501Smrg	bo_gem->global_name = handle;
108820131375Smrg	bo_gem->reusable = false;
108922944501Smrg
1090424e9256Smrg	memclear(get_tiling);
109122944501Smrg	get_tiling.handle = bo_gem->gem_handle;
10926d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
10936d98c517Smrg		       DRM_IOCTL_I915_GEM_GET_TILING,
10946d98c517Smrg		       &get_tiling);
109522944501Smrg	if (ret != 0) {
109622944501Smrg		drm_intel_gem_bo_unreference(&bo_gem->bo);
1097a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
109822944501Smrg		return NULL;
109922944501Smrg	}
110022944501Smrg	bo_gem->tiling_mode = get_tiling.tiling_mode;
110122944501Smrg	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
11026d98c517Smrg	/* XXX stride is unknown */
110322944501Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
110422944501Smrg
110520131375Smrg	DRMINITLISTHEAD(&bo_gem->vma_list);
110620131375Smrg	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
1107a884aba1Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
110822944501Smrg	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
110922944501Smrg
111022944501Smrg	return &bo_gem->bo;
111122944501Smrg}
111222944501Smrg
111322944501Smrgstatic void
111422944501Smrgdrm_intel_gem_bo_free(drm_intel_bo *bo)
111522944501Smrg{
111622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
111722944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
111822944501Smrg	struct drm_gem_close close;
111922944501Smrg	int ret;
112022944501Smrg
112120131375Smrg	DRMLISTDEL(&bo_gem->vma_list);
112220131375Smrg	if (bo_gem->mem_virtual) {
112320131375Smrg		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1124a884aba1Smrg		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
112520131375Smrg		bufmgr_gem->vma_count--;
112620131375Smrg	}
112720131375Smrg	if (bo_gem->gtt_virtual) {
1128a884aba1Smrg		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
112920131375Smrg		bufmgr_gem->vma_count--;
113020131375Smrg	}
113122944501Smrg
113222944501Smrg	/* Close this object */
1133424e9256Smrg	memclear(close);
113422944501Smrg	close.handle = bo_gem->gem_handle;
11356d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
113622944501Smrg	if (ret != 0) {
11379ce4edccSmrg		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
11389ce4edccSmrg		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
113922944501Smrg	}
114020131375Smrg	free(bo_gem->aub_annotations);
114122944501Smrg	free(bo);
114222944501Smrg}
114322944501Smrg
114420131375Smrgstatic void
114520131375Smrgdrm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
114620131375Smrg{
114720131375Smrg#if HAVE_VALGRIND
114820131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
114920131375Smrg
115020131375Smrg	if (bo_gem->mem_virtual)
115120131375Smrg		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
115220131375Smrg
115320131375Smrg	if (bo_gem->gtt_virtual)
115420131375Smrg		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
115520131375Smrg#endif
115620131375Smrg}
115720131375Smrg
115822944501Smrg/** Frees all cached buffers significantly older than @time. */
115922944501Smrgstatic void
116022944501Smrgdrm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
116122944501Smrg{
116222944501Smrg	int i;
116322944501Smrg
11646d98c517Smrg	if (bufmgr_gem->time == time)
11656d98c517Smrg		return;
11666d98c517Smrg
1167aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
116822944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
116922944501Smrg		    &bufmgr_gem->cache_bucket[i];
117022944501Smrg
117122944501Smrg		while (!DRMLISTEMPTY(&bucket->head)) {
117222944501Smrg			drm_intel_bo_gem *bo_gem;
117322944501Smrg
117422944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
117522944501Smrg					      bucket->head.next, head);
117622944501Smrg			if (time - bo_gem->free_time <= 1)
117722944501Smrg				break;
117822944501Smrg
117922944501Smrg			DRMLISTDEL(&bo_gem->head);
118022944501Smrg
118122944501Smrg			drm_intel_gem_bo_free(&bo_gem->bo);
118222944501Smrg		}
118322944501Smrg	}
11846d98c517Smrg
11856d98c517Smrg	bufmgr_gem->time = time;
118622944501Smrg}
118722944501Smrg
118820131375Smrgstatic void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
118920131375Smrg{
119020131375Smrg	int limit;
119120131375Smrg
119220131375Smrg	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
119320131375Smrg	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
119420131375Smrg
119520131375Smrg	if (bufmgr_gem->vma_max < 0)
119620131375Smrg		return;
119720131375Smrg
119820131375Smrg	/* We may need to evict a few entries in order to create new mmaps */
119920131375Smrg	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
120020131375Smrg	if (limit < 0)
120120131375Smrg		limit = 0;
120220131375Smrg
120320131375Smrg	while (bufmgr_gem->vma_count > limit) {
120420131375Smrg		drm_intel_bo_gem *bo_gem;
120520131375Smrg
120620131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
120720131375Smrg				      bufmgr_gem->vma_cache.next,
120820131375Smrg				      vma_list);
120920131375Smrg		assert(bo_gem->map_count == 0);
121020131375Smrg		DRMLISTDELINIT(&bo_gem->vma_list);
121120131375Smrg
121220131375Smrg		if (bo_gem->mem_virtual) {
1213a884aba1Smrg			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
121420131375Smrg			bo_gem->mem_virtual = NULL;
121520131375Smrg			bufmgr_gem->vma_count--;
121620131375Smrg		}
121720131375Smrg		if (bo_gem->gtt_virtual) {
1218a884aba1Smrg			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
121920131375Smrg			bo_gem->gtt_virtual = NULL;
122020131375Smrg			bufmgr_gem->vma_count--;
122120131375Smrg		}
122220131375Smrg	}
122320131375Smrg}
122420131375Smrg
122520131375Smrgstatic void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
122620131375Smrg				       drm_intel_bo_gem *bo_gem)
122720131375Smrg{
122820131375Smrg	bufmgr_gem->vma_open--;
122920131375Smrg	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
123020131375Smrg	if (bo_gem->mem_virtual)
123120131375Smrg		bufmgr_gem->vma_count++;
123220131375Smrg	if (bo_gem->gtt_virtual)
123320131375Smrg		bufmgr_gem->vma_count++;
123420131375Smrg	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
123520131375Smrg}
123620131375Smrg
123720131375Smrgstatic void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
123820131375Smrg				      drm_intel_bo_gem *bo_gem)
123920131375Smrg{
124020131375Smrg	bufmgr_gem->vma_open++;
124120131375Smrg	DRMLISTDEL(&bo_gem->vma_list);
124220131375Smrg	if (bo_gem->mem_virtual)
124320131375Smrg		bufmgr_gem->vma_count--;
124420131375Smrg	if (bo_gem->gtt_virtual)
124520131375Smrg		bufmgr_gem->vma_count--;
124620131375Smrg	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
124720131375Smrg}
124820131375Smrg
124922944501Smrgstatic void
125022944501Smrgdrm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
125122944501Smrg{
125222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
125322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
125422944501Smrg	struct drm_intel_gem_bo_bucket *bucket;
125522944501Smrg	int i;
125622944501Smrg
125722944501Smrg	/* Unreference all the target buffers */
125822944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
1259aaba2545Smrg		if (bo_gem->reloc_target_info[i].bo != bo) {
1260aaba2545Smrg			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1261aaba2545Smrg								  reloc_target_info[i].bo,
1262aaba2545Smrg								  time);
1263aaba2545Smrg		}
126422944501Smrg	}
126522944501Smrg	bo_gem->reloc_count = 0;
126620131375Smrg	bo_gem->used_as_reloc_target = false;
126722944501Smrg
126822944501Smrg	DBG("bo_unreference final: %d (%s)\n",
126922944501Smrg	    bo_gem->gem_handle, bo_gem->name);
127022944501Smrg
127122944501Smrg	/* release memory associated with this object */
127222944501Smrg	if (bo_gem->reloc_target_info) {
127322944501Smrg		free(bo_gem->reloc_target_info);
127422944501Smrg		bo_gem->reloc_target_info = NULL;
127522944501Smrg	}
127622944501Smrg	if (bo_gem->relocs) {
127722944501Smrg		free(bo_gem->relocs);
127822944501Smrg		bo_gem->relocs = NULL;
127922944501Smrg	}
128022944501Smrg
128120131375Smrg	/* Clear any left-over mappings */
128220131375Smrg	if (bo_gem->map_count) {
128320131375Smrg		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
128420131375Smrg		bo_gem->map_count = 0;
128520131375Smrg		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
128620131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
128720131375Smrg	}
128820131375Smrg
128920131375Smrg	DRMLISTDEL(&bo_gem->name_list);
129020131375Smrg
129122944501Smrg	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
129222944501Smrg	/* Put the buffer into our internal cache for reuse if we can. */
129322944501Smrg	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
129422944501Smrg	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
129522944501Smrg					      I915_MADV_DONTNEED)) {
129622944501Smrg		bo_gem->free_time = time;
129722944501Smrg
129822944501Smrg		bo_gem->name = NULL;
129922944501Smrg		bo_gem->validate_index = -1;
130022944501Smrg
130122944501Smrg		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
130222944501Smrg	} else {
130322944501Smrg		drm_intel_gem_bo_free(bo);
130422944501Smrg	}
130522944501Smrg}
130622944501Smrg
130722944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
130822944501Smrg						      time_t time)
130922944501Smrg{
131022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
131122944501Smrg
131222944501Smrg	assert(atomic_read(&bo_gem->refcount) > 0);
131322944501Smrg	if (atomic_dec_and_test(&bo_gem->refcount))
131422944501Smrg		drm_intel_gem_bo_unreference_final(bo, time);
131522944501Smrg}
131622944501Smrg
131722944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
131822944501Smrg{
131922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
132022944501Smrg
132122944501Smrg	assert(atomic_read(&bo_gem->refcount) > 0);
1322a884aba1Smrg
1323a884aba1Smrg	if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
132422944501Smrg		drm_intel_bufmgr_gem *bufmgr_gem =
132522944501Smrg		    (drm_intel_bufmgr_gem *) bo->bufmgr;
132622944501Smrg		struct timespec time;
132722944501Smrg
132822944501Smrg		clock_gettime(CLOCK_MONOTONIC, &time);
132922944501Smrg
133022944501Smrg		pthread_mutex_lock(&bufmgr_gem->lock);
1331a884aba1Smrg
1332a884aba1Smrg		if (atomic_dec_and_test(&bo_gem->refcount)) {
1333a884aba1Smrg			drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
1334a884aba1Smrg			drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1335a884aba1Smrg		}
1336a884aba1Smrg
133722944501Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
133822944501Smrg	}
133922944501Smrg}
134022944501Smrg
134122944501Smrgstatic int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
134222944501Smrg{
134322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
134422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
134522944501Smrg	struct drm_i915_gem_set_domain set_domain;
134622944501Smrg	int ret;
134722944501Smrg
1348a884aba1Smrg	if (bo_gem->is_userptr) {
1349a884aba1Smrg		/* Return the same user ptr */
1350a884aba1Smrg		bo->virtual = bo_gem->user_virtual;
1351a884aba1Smrg		return 0;
1352a884aba1Smrg	}
1353a884aba1Smrg
135422944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
135522944501Smrg
135620131375Smrg	if (bo_gem->map_count++ == 0)
135720131375Smrg		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
135820131375Smrg
135922944501Smrg	if (!bo_gem->mem_virtual) {
136022944501Smrg		struct drm_i915_gem_mmap mmap_arg;
136122944501Smrg
136220131375Smrg		DBG("bo_map: %d (%s), map_count=%d\n",
136320131375Smrg		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
136422944501Smrg
1365424e9256Smrg		memclear(mmap_arg);
136622944501Smrg		mmap_arg.handle = bo_gem->gem_handle;
136722944501Smrg		mmap_arg.size = bo->size;
13686d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd,
13696d98c517Smrg			       DRM_IOCTL_I915_GEM_MMAP,
13706d98c517Smrg			       &mmap_arg);
137122944501Smrg		if (ret != 0) {
137222944501Smrg			ret = -errno;
13739ce4edccSmrg			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
13749ce4edccSmrg			    __FILE__, __LINE__, bo_gem->gem_handle,
13759ce4edccSmrg			    bo_gem->name, strerror(errno));
137620131375Smrg			if (--bo_gem->map_count == 0)
137720131375Smrg				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
137822944501Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
137922944501Smrg			return ret;
138022944501Smrg		}
138120131375Smrg		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
138222944501Smrg		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
138322944501Smrg	}
138422944501Smrg	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
138522944501Smrg	    bo_gem->mem_virtual);
138622944501Smrg	bo->virtual = bo_gem->mem_virtual;
138722944501Smrg
1388424e9256Smrg	memclear(set_domain);
138922944501Smrg	set_domain.handle = bo_gem->gem_handle;
139022944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
139122944501Smrg	if (write_enable)
139222944501Smrg		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
139322944501Smrg	else
139422944501Smrg		set_domain.write_domain = 0;
13956d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
13966d98c517Smrg		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
13976d98c517Smrg		       &set_domain);
139822944501Smrg	if (ret != 0) {
13999ce4edccSmrg		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
14009ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle,
14019ce4edccSmrg		    strerror(errno));
140222944501Smrg	}
140322944501Smrg
140420131375Smrg	if (write_enable)
140520131375Smrg		bo_gem->mapped_cpu_write = true;
140620131375Smrg
140720131375Smrg	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
140820131375Smrg	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
140922944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
141022944501Smrg
141122944501Smrg	return 0;
141222944501Smrg}
141322944501Smrg
141420131375Smrgstatic int
141520131375Smrgmap_gtt(drm_intel_bo *bo)
141622944501Smrg{
141722944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
141822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
141922944501Smrg	int ret;
142022944501Smrg
1421a884aba1Smrg	if (bo_gem->is_userptr)
1422a884aba1Smrg		return -EINVAL;
1423a884aba1Smrg
142420131375Smrg	if (bo_gem->map_count++ == 0)
142520131375Smrg		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
142622944501Smrg
142722944501Smrg	/* Get a mapping of the buffer if we haven't before. */
142822944501Smrg	if (bo_gem->gtt_virtual == NULL) {
142922944501Smrg		struct drm_i915_gem_mmap_gtt mmap_arg;
143022944501Smrg
143120131375Smrg		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
143220131375Smrg		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
143322944501Smrg
1434424e9256Smrg		memclear(mmap_arg);
143522944501Smrg		mmap_arg.handle = bo_gem->gem_handle;
143622944501Smrg
143722944501Smrg		/* Get the fake offset back... */
14386d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd,
14396d98c517Smrg			       DRM_IOCTL_I915_GEM_MMAP_GTT,
14406d98c517Smrg			       &mmap_arg);
144122944501Smrg		if (ret != 0) {
144222944501Smrg			ret = -errno;
14439ce4edccSmrg			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
14449ce4edccSmrg			    __FILE__, __LINE__,
14459ce4edccSmrg			    bo_gem->gem_handle, bo_gem->name,
14469ce4edccSmrg			    strerror(errno));
144720131375Smrg			if (--bo_gem->map_count == 0)
144820131375Smrg				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
144922944501Smrg			return ret;
145022944501Smrg		}
145122944501Smrg
145222944501Smrg		/* and mmap it */
1453aec75c42Sriastradh		ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size,
1454aec75c42Sriastradh		    &bo_gem->gtt_virtual);
1455aec75c42Sriastradh		if (ret) {
145622944501Smrg			bo_gem->gtt_virtual = NULL;
14579ce4edccSmrg			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
14589ce4edccSmrg			    __FILE__, __LINE__,
14599ce4edccSmrg			    bo_gem->gem_handle, bo_gem->name,
14609ce4edccSmrg			    strerror(errno));
146120131375Smrg			if (--bo_gem->map_count == 0)
146220131375Smrg				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
146322944501Smrg			return ret;
146422944501Smrg		}
146522944501Smrg	}
146622944501Smrg
146722944501Smrg	bo->virtual = bo_gem->gtt_virtual;
146822944501Smrg
146922944501Smrg	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
147022944501Smrg	    bo_gem->gtt_virtual);
147122944501Smrg
147220131375Smrg	return 0;
147320131375Smrg}
147420131375Smrg
1475424e9256Smrgint
1476a884aba1Smrgdrm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
147720131375Smrg{
147820131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
147920131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
148020131375Smrg	struct drm_i915_gem_set_domain set_domain;
148120131375Smrg	int ret;
148220131375Smrg
148320131375Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
148420131375Smrg
148520131375Smrg	ret = map_gtt(bo);
148620131375Smrg	if (ret) {
148720131375Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
148820131375Smrg		return ret;
148920131375Smrg	}
149020131375Smrg
149120131375Smrg	/* Now move it to the GTT domain so that the GPU and CPU
149220131375Smrg	 * caches are flushed and the GPU isn't actively using the
149320131375Smrg	 * buffer.
149420131375Smrg	 *
149520131375Smrg	 * The pagefault handler does this domain change for us when
149620131375Smrg	 * it has unbound the BO from the GTT, but it's up to us to
149720131375Smrg	 * tell it when we're about to use things if we had done
149820131375Smrg	 * rendering and it still happens to be bound to the GTT.
149920131375Smrg	 */
1500424e9256Smrg	memclear(set_domain);
150122944501Smrg	set_domain.handle = bo_gem->gem_handle;
150222944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
150322944501Smrg	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
15046d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
15056d98c517Smrg		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
15066d98c517Smrg		       &set_domain);
150722944501Smrg	if (ret != 0) {
15089ce4edccSmrg		DBG("%s:%d: Error setting domain %d: %s\n",
15099ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle,
15109ce4edccSmrg		    strerror(errno));
151122944501Smrg	}
151222944501Smrg
151320131375Smrg	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
151420131375Smrg	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
151522944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
151622944501Smrg
15176d98c517Smrg	return 0;
151822944501Smrg}
151922944501Smrg
152020131375Smrg/**
152120131375Smrg * Performs a mapping of the buffer object like the normal GTT
152220131375Smrg * mapping, but avoids waiting for the GPU to be done reading from or
152320131375Smrg * rendering to the buffer.
152420131375Smrg *
152520131375Smrg * This is used in the implementation of GL_ARB_map_buffer_range: The
152620131375Smrg * user asks to create a buffer, then does a mapping, fills some
152720131375Smrg * space, runs a drawing command, then asks to map it again without
152820131375Smrg * synchronizing because it guarantees that it won't write over the
152920131375Smrg * data that the GPU is busy using (or, more specifically, that if it
153020131375Smrg * does write over the data, it acknowledges that rendering is
153120131375Smrg * undefined).
153220131375Smrg */
153320131375Smrg
1534424e9256Smrgint
1535a884aba1Smrgdrm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
153622944501Smrg{
153722944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
153820131375Smrg#ifdef HAVE_VALGRIND
153920131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
154020131375Smrg#endif
154120131375Smrg	int ret;
154222944501Smrg
154320131375Smrg	/* If the CPU cache isn't coherent with the GTT, then use a
154420131375Smrg	 * regular synchronized mapping.  The problem is that we don't
154520131375Smrg	 * track where the buffer was last used on the CPU side in
154620131375Smrg	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
154720131375Smrg	 * we would potentially corrupt the buffer even when the user
154820131375Smrg	 * does reasonable things.
154920131375Smrg	 */
155020131375Smrg	if (!bufmgr_gem->has_llc)
155120131375Smrg		return drm_intel_gem_bo_map_gtt(bo);
155222944501Smrg
155322944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
155420131375Smrg
155520131375Smrg	ret = map_gtt(bo);
155620131375Smrg	if (ret == 0) {
155720131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
155820131375Smrg		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
155920131375Smrg	}
156020131375Smrg
156122944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
156222944501Smrg
156322944501Smrg	return ret;
156422944501Smrg}
156522944501Smrg
156622944501Smrgstatic int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
156722944501Smrg{
1568a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
156922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
157020131375Smrg	int ret = 0;
157122944501Smrg
157222944501Smrg	if (bo == NULL)
157322944501Smrg		return 0;
157422944501Smrg
1575a884aba1Smrg	if (bo_gem->is_userptr)
1576a884aba1Smrg		return 0;
1577a884aba1Smrg
1578a884aba1Smrg	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1579a884aba1Smrg
158022944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
158122944501Smrg
158220131375Smrg	if (bo_gem->map_count <= 0) {
158320131375Smrg		DBG("attempted to unmap an unmapped bo\n");
158420131375Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
158520131375Smrg		/* Preserve the old behaviour of just treating this as a
158620131375Smrg		 * no-op rather than reporting the error.
158720131375Smrg		 */
158820131375Smrg		return 0;
158920131375Smrg	}
159020131375Smrg
159120131375Smrg	if (bo_gem->mapped_cpu_write) {
159220131375Smrg		struct drm_i915_gem_sw_finish sw_finish;
159320131375Smrg
159420131375Smrg		/* Cause a flush to happen if the buffer's pinned for
159520131375Smrg		 * scanout, so the results show up in a timely manner.
159620131375Smrg		 * Unlike GTT set domains, this only does work if the
159720131375Smrg		 * buffer should be scanout-related.
159820131375Smrg		 */
1599424e9256Smrg		memclear(sw_finish);
160020131375Smrg		sw_finish.handle = bo_gem->gem_handle;
160120131375Smrg		ret = drmIoctl(bufmgr_gem->fd,
160220131375Smrg			       DRM_IOCTL_I915_GEM_SW_FINISH,
160320131375Smrg			       &sw_finish);
160420131375Smrg		ret = ret == -1 ? -errno : 0;
160520131375Smrg
160620131375Smrg		bo_gem->mapped_cpu_write = false;
160720131375Smrg	}
160822944501Smrg
160920131375Smrg	/* We need to unmap after every innovation as we cannot track
161020131375Smrg	 * an open vma for every bo as that will exhaasut the system
161120131375Smrg	 * limits and cause later failures.
161220131375Smrg	 */
161320131375Smrg	if (--bo_gem->map_count == 0) {
161420131375Smrg		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
161520131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
161620131375Smrg		bo->virtual = NULL;
161720131375Smrg	}
161822944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
161922944501Smrg
162022944501Smrg	return ret;
162122944501Smrg}
162222944501Smrg
1623424e9256Smrgint
1624a884aba1Smrgdrm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
162520131375Smrg{
162620131375Smrg	return drm_intel_gem_bo_unmap(bo);
162720131375Smrg}
162820131375Smrg
162922944501Smrgstatic int
163022944501Smrgdrm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
163122944501Smrg			 unsigned long size, const void *data)
163222944501Smrg{
163322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
163422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
163522944501Smrg	struct drm_i915_gem_pwrite pwrite;
163622944501Smrg	int ret;
163722944501Smrg
1638a884aba1Smrg	if (bo_gem->is_userptr)
1639a884aba1Smrg		return -EINVAL;
1640a884aba1Smrg
1641424e9256Smrg	memclear(pwrite);
164222944501Smrg	pwrite.handle = bo_gem->gem_handle;
164322944501Smrg	pwrite.offset = offset;
164422944501Smrg	pwrite.size = size;
164522944501Smrg	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
16466d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
16476d98c517Smrg		       DRM_IOCTL_I915_GEM_PWRITE,
16486d98c517Smrg		       &pwrite);
164922944501Smrg	if (ret != 0) {
165022944501Smrg		ret = -errno;
16519ce4edccSmrg		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
16529ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
16539ce4edccSmrg		    (int)size, strerror(errno));
165422944501Smrg	}
165522944501Smrg
165622944501Smrg	return ret;
165722944501Smrg}
165822944501Smrg
165922944501Smrgstatic int
166022944501Smrgdrm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
166122944501Smrg{
166222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
166322944501Smrg	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
166422944501Smrg	int ret;
166522944501Smrg
1666424e9256Smrg	memclear(get_pipe_from_crtc_id);
166722944501Smrg	get_pipe_from_crtc_id.crtc_id = crtc_id;
16686d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
16696d98c517Smrg		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
16706d98c517Smrg		       &get_pipe_from_crtc_id);
167122944501Smrg	if (ret != 0) {
167222944501Smrg		/* We return -1 here to signal that we don't
167322944501Smrg		 * know which pipe is associated with this crtc.
167422944501Smrg		 * This lets the caller know that this information
167522944501Smrg		 * isn't available; using the wrong pipe for
167622944501Smrg		 * vblank waiting can cause the chipset to lock up
167722944501Smrg		 */
167822944501Smrg		return -1;
167922944501Smrg	}
168022944501Smrg
168122944501Smrg	return get_pipe_from_crtc_id.pipe;
168222944501Smrg}
168322944501Smrg
168422944501Smrgstatic int
168522944501Smrgdrm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
168622944501Smrg			     unsigned long size, void *data)
168722944501Smrg{
168822944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
168922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
169022944501Smrg	struct drm_i915_gem_pread pread;
169122944501Smrg	int ret;
169222944501Smrg
1693a884aba1Smrg	if (bo_gem->is_userptr)
1694a884aba1Smrg		return -EINVAL;
1695a884aba1Smrg
1696424e9256Smrg	memclear(pread);
169722944501Smrg	pread.handle = bo_gem->gem_handle;
169822944501Smrg	pread.offset = offset;
169922944501Smrg	pread.size = size;
170022944501Smrg	pread.data_ptr = (uint64_t) (uintptr_t) data;
17016d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
17026d98c517Smrg		       DRM_IOCTL_I915_GEM_PREAD,
17036d98c517Smrg		       &pread);
170422944501Smrg	if (ret != 0) {
170522944501Smrg		ret = -errno;
17069ce4edccSmrg		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
17079ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
17089ce4edccSmrg		    (int)size, strerror(errno));
170922944501Smrg	}
171022944501Smrg
171122944501Smrg	return ret;
171222944501Smrg}
171322944501Smrg
17149ce4edccSmrg/** Waits for all GPU rendering with the object to have completed. */
171522944501Smrgstatic void
171622944501Smrgdrm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
171722944501Smrg{
17189ce4edccSmrg	drm_intel_gem_bo_start_gtt_access(bo, 1);
171922944501Smrg}
172022944501Smrg
172120131375Smrg/**
172220131375Smrg * Waits on a BO for the given amount of time.
172320131375Smrg *
172420131375Smrg * @bo: buffer object to wait for
172520131375Smrg * @timeout_ns: amount of time to wait in nanoseconds.
172620131375Smrg *   If value is less than 0, an infinite wait will occur.
172720131375Smrg *
172820131375Smrg * Returns 0 if the wait was successful ie. the last batch referencing the
172920131375Smrg * object has completed within the allotted time. Otherwise some negative return
173020131375Smrg * value describes the error. Of particular interest is -ETIME when the wait has
173120131375Smrg * failed to yield the desired result.
173220131375Smrg *
173320131375Smrg * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
173420131375Smrg * the operation to give up after a certain amount of time. Another subtle
173520131375Smrg * difference is the internal locking semantics are different (this variant does
173620131375Smrg * not hold the lock for the duration of the wait). This makes the wait subject
173720131375Smrg * to a larger userspace race window.
173820131375Smrg *
173920131375Smrg * The implementation shall wait until the object is no longer actively
174020131375Smrg * referenced within a batch buffer at the time of the call. The wait will
174120131375Smrg * not guarantee that the buffer is re-issued via another thread, or an flinked
174220131375Smrg * handle. Userspace must make sure this race does not occur if such precision
174320131375Smrg * is important.
1744424e9256Smrg *
1745424e9256Smrg * Note that some kernels have broken the inifite wait for negative values
1746424e9256Smrg * promise, upgrade to latest stable kernels if this is the case.
174720131375Smrg */
1748424e9256Smrgint
1749a884aba1Smrgdrm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
175020131375Smrg{
175120131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
175220131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
175320131375Smrg	struct drm_i915_gem_wait wait;
175420131375Smrg	int ret;
175520131375Smrg
175620131375Smrg	if (!bufmgr_gem->has_wait_timeout) {
175720131375Smrg		DBG("%s:%d: Timed wait is not supported. Falling back to "
175820131375Smrg		    "infinite wait\n", __FILE__, __LINE__);
175920131375Smrg		if (timeout_ns) {
176020131375Smrg			drm_intel_gem_bo_wait_rendering(bo);
176120131375Smrg			return 0;
176220131375Smrg		} else {
176320131375Smrg			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
176420131375Smrg		}
176520131375Smrg	}
176620131375Smrg
1767424e9256Smrg	memclear(wait);
176820131375Smrg	wait.bo_handle = bo_gem->gem_handle;
176920131375Smrg	wait.timeout_ns = timeout_ns;
177020131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
177120131375Smrg	if (ret == -1)
177220131375Smrg		return -errno;
177320131375Smrg
177420131375Smrg	return ret;
177520131375Smrg}
177620131375Smrg
177722944501Smrg/**
177822944501Smrg * Sets the object to the GTT read and possibly write domain, used by the X
177922944501Smrg * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
178022944501Smrg *
178122944501Smrg * In combination with drm_intel_gem_bo_pin() and manual fence management, we
178222944501Smrg * can do tiled pixmaps this way.
178322944501Smrg */
1784424e9256Smrgvoid
178522944501Smrgdrm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
178622944501Smrg{
178722944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
178822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
178922944501Smrg	struct drm_i915_gem_set_domain set_domain;
179022944501Smrg	int ret;
179122944501Smrg
1792424e9256Smrg	memclear(set_domain);
179322944501Smrg	set_domain.handle = bo_gem->gem_handle;
179422944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
179522944501Smrg	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
17966d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
17976d98c517Smrg		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
17986d98c517Smrg		       &set_domain);
179922944501Smrg	if (ret != 0) {
18009ce4edccSmrg		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
18019ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle,
18029ce4edccSmrg		    set_domain.read_domains, set_domain.write_domain,
18039ce4edccSmrg		    strerror(errno));
180422944501Smrg	}
180522944501Smrg}
180622944501Smrg
180722944501Smrgstatic void
180822944501Smrgdrm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
180922944501Smrg{
181022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1811424e9256Smrg	struct drm_gem_close close_bo;
1812424e9256Smrg	int i, ret;
181322944501Smrg
181422944501Smrg	free(bufmgr_gem->exec2_objects);
181522944501Smrg	free(bufmgr_gem->exec_objects);
181622944501Smrg	free(bufmgr_gem->exec_bos);
181720131375Smrg	free(bufmgr_gem->aub_filename);
181822944501Smrg
181922944501Smrg	pthread_mutex_destroy(&bufmgr_gem->lock);
182022944501Smrg
182122944501Smrg	/* Free any cached buffer objects we were going to reuse */
1822aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
182322944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
182422944501Smrg		    &bufmgr_gem->cache_bucket[i];
182522944501Smrg		drm_intel_bo_gem *bo_gem;
182622944501Smrg
182722944501Smrg		while (!DRMLISTEMPTY(&bucket->head)) {
182822944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
182922944501Smrg					      bucket->head.next, head);
183022944501Smrg			DRMLISTDEL(&bo_gem->head);
183122944501Smrg
183222944501Smrg			drm_intel_gem_bo_free(&bo_gem->bo);
183322944501Smrg		}
183422944501Smrg	}
183522944501Smrg
1836424e9256Smrg	/* Release userptr bo kept hanging around for optimisation. */
1837424e9256Smrg	if (bufmgr_gem->userptr_active.ptr) {
1838424e9256Smrg		memclear(close_bo);
1839424e9256Smrg		close_bo.handle = bufmgr_gem->userptr_active.handle;
1840424e9256Smrg		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
1841424e9256Smrg		free(bufmgr_gem->userptr_active.ptr);
1842424e9256Smrg		if (ret)
1843424e9256Smrg			fprintf(stderr,
1844424e9256Smrg				"Failed to release test userptr object! (%d) "
1845424e9256Smrg				"i915 kernel driver may not be sane!\n", errno);
1846424e9256Smrg	}
1847424e9256Smrg
184822944501Smrg	free(bufmgr);
184922944501Smrg}
185022944501Smrg
185122944501Smrg/**
185222944501Smrg * Adds the target buffer to the validation list and adds the relocation
185322944501Smrg * to the reloc_buffer's relocation list.
185422944501Smrg *
185522944501Smrg * The relocation entry at the given offset must already contain the
185622944501Smrg * precomputed relocation value, because the kernel will optimize out
185722944501Smrg * the relocation entry write when the buffer hasn't moved from the
185822944501Smrg * last known offset in target_bo.
185922944501Smrg */
186022944501Smrgstatic int
186122944501Smrgdo_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
186222944501Smrg		 drm_intel_bo *target_bo, uint32_t target_offset,
186322944501Smrg		 uint32_t read_domains, uint32_t write_domain,
186420131375Smrg		 bool need_fence)
186522944501Smrg{
186622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
186722944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
186822944501Smrg	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
186920131375Smrg	bool fenced_command;
187022944501Smrg
187122944501Smrg	if (bo_gem->has_error)
187222944501Smrg		return -ENOMEM;
187322944501Smrg
187422944501Smrg	if (target_bo_gem->has_error) {
187520131375Smrg		bo_gem->has_error = true;
187622944501Smrg		return -ENOMEM;
187722944501Smrg	}
187822944501Smrg
187922944501Smrg	/* We never use HW fences for rendering on 965+ */
188022944501Smrg	if (bufmgr_gem->gen >= 4)
188120131375Smrg		need_fence = false;
188222944501Smrg
18839ce4edccSmrg	fenced_command = need_fence;
18849ce4edccSmrg	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
188520131375Smrg		need_fence = false;
18869ce4edccSmrg
188722944501Smrg	/* Create a new relocation list if needed */
188822944501Smrg	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
188922944501Smrg		return -ENOMEM;
189022944501Smrg
189122944501Smrg	/* Check overflow */
189222944501Smrg	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
189322944501Smrg
189422944501Smrg	/* Check args */
189522944501Smrg	assert(offset <= bo->size - 4);
189622944501Smrg	assert((write_domain & (write_domain - 1)) == 0);
189722944501Smrg
18983c748557Ssnj	/* An object needing a fence is a tiled buffer, so it won't have
18993c748557Ssnj	 * relocs to other buffers.
19003c748557Ssnj	 */
19013c748557Ssnj	if (need_fence) {
19023c748557Ssnj		assert(target_bo_gem->reloc_count == 0);
19033c748557Ssnj		target_bo_gem->reloc_tree_fences = 1;
19043c748557Ssnj	}
19053c748557Ssnj
190622944501Smrg	/* Make sure that we're not adding a reloc to something whose size has
190722944501Smrg	 * already been accounted for.
190822944501Smrg	 */
190922944501Smrg	assert(!bo_gem->used_as_reloc_target);
1910aaba2545Smrg	if (target_bo_gem != bo_gem) {
191120131375Smrg		target_bo_gem->used_as_reloc_target = true;
1912aaba2545Smrg		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
19133c748557Ssnj		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
1914aaba2545Smrg	}
191522944501Smrg
191622944501Smrg	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
191722944501Smrg	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
191822944501Smrg	bo_gem->relocs[bo_gem->reloc_count].target_handle =
191922944501Smrg	    target_bo_gem->gem_handle;
192022944501Smrg	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
192122944501Smrg	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
192220131375Smrg	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
192322944501Smrg
192422944501Smrg	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1925aaba2545Smrg	if (target_bo != bo)
1926aaba2545Smrg		drm_intel_gem_bo_reference(target_bo);
19279ce4edccSmrg	if (fenced_command)
192822944501Smrg		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
192922944501Smrg			DRM_INTEL_RELOC_FENCE;
193022944501Smrg	else
193122944501Smrg		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
193222944501Smrg
193322944501Smrg	bo_gem->reloc_count++;
193422944501Smrg
193522944501Smrg	return 0;
193622944501Smrg}
193722944501Smrg
193822944501Smrgstatic int
193922944501Smrgdrm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
194022944501Smrg			    drm_intel_bo *target_bo, uint32_t target_offset,
194122944501Smrg			    uint32_t read_domains, uint32_t write_domain)
194222944501Smrg{
194322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
194422944501Smrg
194522944501Smrg	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
194622944501Smrg				read_domains, write_domain,
194722944501Smrg				!bufmgr_gem->fenced_relocs);
194822944501Smrg}
194922944501Smrg
195022944501Smrgstatic int
195122944501Smrgdrm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
195222944501Smrg				  drm_intel_bo *target_bo,
195322944501Smrg				  uint32_t target_offset,
195422944501Smrg				  uint32_t read_domains, uint32_t write_domain)
195522944501Smrg{
195622944501Smrg	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
195720131375Smrg				read_domains, write_domain, true);
195820131375Smrg}
195920131375Smrg
1960424e9256Smrgint
196120131375Smrgdrm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
196220131375Smrg{
196320131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
196420131375Smrg
196520131375Smrg	return bo_gem->reloc_count;
196620131375Smrg}
196720131375Smrg
196820131375Smrg/**
196920131375Smrg * Removes existing relocation entries in the BO after "start".
197020131375Smrg *
197120131375Smrg * This allows a user to avoid a two-step process for state setup with
197220131375Smrg * counting up all the buffer objects and doing a
197320131375Smrg * drm_intel_bufmgr_check_aperture_space() before emitting any of the
197420131375Smrg * relocations for the state setup.  Instead, save the state of the
197520131375Smrg * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
197620131375Smrg * state, and then check if it still fits in the aperture.
197720131375Smrg *
197820131375Smrg * Any further drm_intel_bufmgr_check_aperture_space() queries
197920131375Smrg * involving this buffer in the tree are undefined after this call.
198020131375Smrg */
1981424e9256Smrgvoid
198220131375Smrgdrm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
198320131375Smrg{
1984a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
198520131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
198620131375Smrg	int i;
198720131375Smrg	struct timespec time;
198820131375Smrg
198920131375Smrg	clock_gettime(CLOCK_MONOTONIC, &time);
199020131375Smrg
199120131375Smrg	assert(bo_gem->reloc_count >= start);
1992a884aba1Smrg
199320131375Smrg	/* Unreference the cleared target buffers */
1994a884aba1Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
1995a884aba1Smrg
199620131375Smrg	for (i = start; i < bo_gem->reloc_count; i++) {
199720131375Smrg		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
199820131375Smrg		if (&target_bo_gem->bo != bo) {
199920131375Smrg			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
200020131375Smrg			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
200120131375Smrg								  time.tv_sec);
200220131375Smrg		}
200320131375Smrg	}
200420131375Smrg	bo_gem->reloc_count = start;
2005a884aba1Smrg
2006a884aba1Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
2007a884aba1Smrg
200822944501Smrg}
200922944501Smrg
201022944501Smrg/**
201122944501Smrg * Walk the tree of relocations rooted at BO and accumulate the list of
201222944501Smrg * validations to be performed and update the relocation buffers with
201322944501Smrg * index values into the validation list.
201422944501Smrg */
201522944501Smrgstatic void
201622944501Smrgdrm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
201722944501Smrg{
201822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
201922944501Smrg	int i;
202022944501Smrg
202122944501Smrg	if (bo_gem->relocs == NULL)
202222944501Smrg		return;
202322944501Smrg
202422944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
202522944501Smrg		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
202622944501Smrg
2027aaba2545Smrg		if (target_bo == bo)
2028aaba2545Smrg			continue;
2029aaba2545Smrg
203020131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
203120131375Smrg
203222944501Smrg		/* Continue walking the tree depth-first. */
203322944501Smrg		drm_intel_gem_bo_process_reloc(target_bo);
203422944501Smrg
203522944501Smrg		/* Add the target to the validate list */
203622944501Smrg		drm_intel_add_validate_buffer(target_bo);
203722944501Smrg	}
203822944501Smrg}
203922944501Smrg
204022944501Smrgstatic void
204122944501Smrgdrm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
204222944501Smrg{
204322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
204422944501Smrg	int i;
204522944501Smrg
204622944501Smrg	if (bo_gem->relocs == NULL)
204722944501Smrg		return;
204822944501Smrg
204922944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
205022944501Smrg		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
205122944501Smrg		int need_fence;
205222944501Smrg
2053aaba2545Smrg		if (target_bo == bo)
2054aaba2545Smrg			continue;
2055aaba2545Smrg
205620131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
205720131375Smrg
205822944501Smrg		/* Continue walking the tree depth-first. */
205922944501Smrg		drm_intel_gem_bo_process_reloc2(target_bo);
206022944501Smrg
206122944501Smrg		need_fence = (bo_gem->reloc_target_info[i].flags &
206222944501Smrg			      DRM_INTEL_RELOC_FENCE);
206322944501Smrg
206422944501Smrg		/* Add the target to the validate list */
206522944501Smrg		drm_intel_add_validate_buffer2(target_bo, need_fence);
206622944501Smrg	}
206722944501Smrg}
206822944501Smrg
206922944501Smrg
207022944501Smrgstatic void
207122944501Smrgdrm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
207222944501Smrg{
207322944501Smrg	int i;
207422944501Smrg
207522944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
207622944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
207722944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
207822944501Smrg
207922944501Smrg		/* Update the buffer offset */
208020131375Smrg		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
2081d82d45b3Sjoerg			DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n",
2082d82d45b3Sjoerg			    bo_gem->gem_handle, bo_gem->name,
2083d82d45b3Sjoerg			    (unsigned long long)bo->offset64,
208422944501Smrg			    (unsigned long long)bufmgr_gem->exec_objects[i].
208522944501Smrg			    offset);
208620131375Smrg			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
208722944501Smrg			bo->offset = bufmgr_gem->exec_objects[i].offset;
208822944501Smrg		}
208922944501Smrg	}
209022944501Smrg}
209122944501Smrg
209222944501Smrgstatic void
209322944501Smrgdrm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
209422944501Smrg{
209522944501Smrg	int i;
209622944501Smrg
209722944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
209822944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
209922944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
210022944501Smrg
210122944501Smrg		/* Update the buffer offset */
210220131375Smrg		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2103d82d45b3Sjoerg			DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n",
2104d82d45b3Sjoerg			    bo_gem->gem_handle, bo_gem->name,
2105d82d45b3Sjoerg			    (unsigned long long)bo->offset64,
210622944501Smrg			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
210720131375Smrg			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
210822944501Smrg			bo->offset = bufmgr_gem->exec2_objects[i].offset;
210922944501Smrg		}
211022944501Smrg	}
211122944501Smrg}
211222944501Smrg
211320131375Smrgstatic void
211420131375Smrgaub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
211520131375Smrg{
211620131375Smrg	fwrite(&data, 1, 4, bufmgr_gem->aub_file);
211720131375Smrg}
211820131375Smrg
211920131375Smrgstatic void
212020131375Smrgaub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
212120131375Smrg{
212220131375Smrg	fwrite(data, 1, size, bufmgr_gem->aub_file);
212320131375Smrg}
212420131375Smrg
212520131375Smrgstatic void
212620131375Smrgaub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
212722944501Smrg{
212822944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
212922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
213020131375Smrg	uint32_t *data;
213120131375Smrg	unsigned int i;
213222944501Smrg
213320131375Smrg	data = malloc(bo->size);
213420131375Smrg	drm_intel_bo_get_subdata(bo, offset, size, data);
213522944501Smrg
213620131375Smrg	/* Easy mode: write out bo with no relocations */
213720131375Smrg	if (!bo_gem->reloc_count) {
213820131375Smrg		aub_out_data(bufmgr_gem, data, size);
213920131375Smrg		free(data);
214020131375Smrg		return;
214120131375Smrg	}
214222944501Smrg
214320131375Smrg	/* Otherwise, handle the relocations while writing. */
214420131375Smrg	for (i = 0; i < size / 4; i++) {
214520131375Smrg		int r;
214620131375Smrg		for (r = 0; r < bo_gem->reloc_count; r++) {
214720131375Smrg			struct drm_i915_gem_relocation_entry *reloc;
214820131375Smrg			drm_intel_reloc_target *info;
214922944501Smrg
215020131375Smrg			reloc = &bo_gem->relocs[r];
215120131375Smrg			info = &bo_gem->reloc_target_info[r];
215222944501Smrg
215320131375Smrg			if (reloc->offset == offset + i * 4) {
215420131375Smrg				drm_intel_bo_gem *target_gem;
215520131375Smrg				uint32_t val;
215622944501Smrg
215720131375Smrg				target_gem = (drm_intel_bo_gem *)info->bo;
215822944501Smrg
215920131375Smrg				val = reloc->delta;
216020131375Smrg				val += target_gem->aub_offset;
216122944501Smrg
216220131375Smrg				aub_out(bufmgr_gem, val);
216320131375Smrg				data[i] = val;
216420131375Smrg				break;
216520131375Smrg			}
216620131375Smrg		}
216720131375Smrg		if (r == bo_gem->reloc_count) {
216820131375Smrg			/* no relocation, just the data */
216920131375Smrg			aub_out(bufmgr_gem, data[i]);
217020131375Smrg		}
217122944501Smrg	}
217222944501Smrg
217320131375Smrg	free(data);
217422944501Smrg}
217522944501Smrg
217620131375Smrgstatic void
217720131375Smrgaub_bo_get_address(drm_intel_bo *bo)
217822944501Smrg{
217920131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
218020131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
218122944501Smrg
218220131375Smrg	/* Give the object a graphics address in the AUB file.  We
218320131375Smrg	 * don't just use the GEM object address because we do AUB
218420131375Smrg	 * dumping before execution -- we want to successfully log
218520131375Smrg	 * when the hardware might hang, and we might even want to aub
218620131375Smrg	 * capture for a driver trying to execute on a different
218720131375Smrg	 * generation of hardware by disabling the actual kernel exec
218820131375Smrg	 * call.
218920131375Smrg	 */
219020131375Smrg	bo_gem->aub_offset = bufmgr_gem->aub_offset;
219120131375Smrg	bufmgr_gem->aub_offset += bo->size;
219220131375Smrg	/* XXX: Handle aperture overflow. */
219320131375Smrg	assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
219420131375Smrg}
219520131375Smrg
219620131375Smrgstatic void
219720131375Smrgaub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
219820131375Smrg		      uint32_t offset, uint32_t size)
219920131375Smrg{
220020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
220120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
220220131375Smrg
220320131375Smrg	aub_out(bufmgr_gem,
220420131375Smrg		CMD_AUB_TRACE_HEADER_BLOCK |
220520131375Smrg		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
220620131375Smrg	aub_out(bufmgr_gem,
220720131375Smrg		AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
220820131375Smrg	aub_out(bufmgr_gem, subtype);
220920131375Smrg	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
221020131375Smrg	aub_out(bufmgr_gem, size);
221120131375Smrg	if (bufmgr_gem->gen >= 8)
221220131375Smrg		aub_out(bufmgr_gem, 0);
221320131375Smrg	aub_write_bo_data(bo, offset, size);
221420131375Smrg}
221520131375Smrg
221620131375Smrg/**
221720131375Smrg * Break up large objects into multiple writes.  Otherwise a 128kb VBO
221820131375Smrg * would overflow the 16 bits of size field in the packet header and
221920131375Smrg * everything goes badly after that.
222020131375Smrg */
222120131375Smrgstatic void
222220131375Smrgaub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
222320131375Smrg			    uint32_t offset, uint32_t size)
222420131375Smrg{
222520131375Smrg	uint32_t block_size;
222620131375Smrg	uint32_t sub_offset;
222720131375Smrg
222820131375Smrg	for (sub_offset = 0; sub_offset < size; sub_offset += block_size) {
222920131375Smrg		block_size = size - sub_offset;
223020131375Smrg
223120131375Smrg		if (block_size > 8 * 4096)
223220131375Smrg			block_size = 8 * 4096;
223320131375Smrg
223420131375Smrg		aub_write_trace_block(bo, type, subtype, offset + sub_offset,
223520131375Smrg				      block_size);
223620131375Smrg	}
223720131375Smrg}
223820131375Smrg
223920131375Smrgstatic void
224020131375Smrgaub_write_bo(drm_intel_bo *bo)
224120131375Smrg{
224220131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
224320131375Smrg	uint32_t offset = 0;
224420131375Smrg	unsigned i;
224520131375Smrg
224620131375Smrg	aub_bo_get_address(bo);
224720131375Smrg
224820131375Smrg	/* Write out each annotated section separately. */
224920131375Smrg	for (i = 0; i < bo_gem->aub_annotation_count; ++i) {
225020131375Smrg		drm_intel_aub_annotation *annotation =
225120131375Smrg			&bo_gem->aub_annotations[i];
225220131375Smrg		uint32_t ending_offset = annotation->ending_offset;
225320131375Smrg		if (ending_offset > bo->size)
225420131375Smrg			ending_offset = bo->size;
225520131375Smrg		if (ending_offset > offset) {
225620131375Smrg			aub_write_large_trace_block(bo, annotation->type,
225720131375Smrg						    annotation->subtype,
225820131375Smrg						    offset,
225920131375Smrg						    ending_offset - offset);
226020131375Smrg			offset = ending_offset;
226120131375Smrg		}
226220131375Smrg	}
226320131375Smrg
226420131375Smrg	/* Write out any remaining unannotated data */
226520131375Smrg	if (offset < bo->size) {
226620131375Smrg		aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
226720131375Smrg					    offset, bo->size - offset);
226820131375Smrg	}
226920131375Smrg}
227020131375Smrg
227120131375Smrg/*
227220131375Smrg * Make a ringbuffer on fly and dump it
227320131375Smrg */
227420131375Smrgstatic void
227520131375Smrgaub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
227620131375Smrg			  uint32_t batch_buffer, int ring_flag)
227720131375Smrg{
227820131375Smrg	uint32_t ringbuffer[4096];
227920131375Smrg	int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
228020131375Smrg	int ring_count = 0;
228120131375Smrg
228220131375Smrg	if (ring_flag == I915_EXEC_BSD)
228320131375Smrg		ring = AUB_TRACE_TYPE_RING_PRB1;
228420131375Smrg	else if (ring_flag == I915_EXEC_BLT)
228520131375Smrg		ring = AUB_TRACE_TYPE_RING_PRB2;
228620131375Smrg
228720131375Smrg	/* Make a ring buffer to execute our batchbuffer. */
228820131375Smrg	memset(ringbuffer, 0, sizeof(ringbuffer));
228920131375Smrg	if (bufmgr_gem->gen >= 8) {
229020131375Smrg		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2);
229120131375Smrg		ringbuffer[ring_count++] = batch_buffer;
229220131375Smrg		ringbuffer[ring_count++] = 0;
229320131375Smrg	} else {
229420131375Smrg		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
229520131375Smrg		ringbuffer[ring_count++] = batch_buffer;
229620131375Smrg	}
229720131375Smrg
229820131375Smrg	/* Write out the ring.  This appears to trigger execution of
229920131375Smrg	 * the ring in the simulator.
230020131375Smrg	 */
230120131375Smrg	aub_out(bufmgr_gem,
230220131375Smrg		CMD_AUB_TRACE_HEADER_BLOCK |
230320131375Smrg		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
230420131375Smrg	aub_out(bufmgr_gem,
230520131375Smrg		AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
230620131375Smrg	aub_out(bufmgr_gem, 0); /* general/surface subtype */
230720131375Smrg	aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
230820131375Smrg	aub_out(bufmgr_gem, ring_count * 4);
230920131375Smrg	if (bufmgr_gem->gen >= 8)
231020131375Smrg		aub_out(bufmgr_gem, 0);
231120131375Smrg
231220131375Smrg	/* FIXME: Need some flush operations here? */
231320131375Smrg	aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
231420131375Smrg
231520131375Smrg	/* Update offset pointer */
231620131375Smrg	bufmgr_gem->aub_offset += 4096;
231720131375Smrg}
231820131375Smrg
2319424e9256Smrgvoid
232020131375Smrgdrm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
232120131375Smrg			      int x1, int y1, int width, int height,
232220131375Smrg			      enum aub_dump_bmp_format format,
232320131375Smrg			      int pitch, int offset)
232420131375Smrg{
232520131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
232620131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
232720131375Smrg	uint32_t cpp;
232820131375Smrg
232920131375Smrg	switch (format) {
233020131375Smrg	case AUB_DUMP_BMP_FORMAT_8BIT:
233120131375Smrg		cpp = 1;
233220131375Smrg		break;
233320131375Smrg	case AUB_DUMP_BMP_FORMAT_ARGB_4444:
233420131375Smrg		cpp = 2;
233520131375Smrg		break;
233620131375Smrg	case AUB_DUMP_BMP_FORMAT_ARGB_0888:
233720131375Smrg	case AUB_DUMP_BMP_FORMAT_ARGB_8888:
233820131375Smrg		cpp = 4;
233920131375Smrg		break;
234020131375Smrg	default:
234120131375Smrg		printf("Unknown AUB dump format %d\n", format);
234220131375Smrg		return;
234320131375Smrg	}
234420131375Smrg
234520131375Smrg	if (!bufmgr_gem->aub_file)
234620131375Smrg		return;
234720131375Smrg
234820131375Smrg	aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
234920131375Smrg	aub_out(bufmgr_gem, (y1 << 16) | x1);
235020131375Smrg	aub_out(bufmgr_gem,
235120131375Smrg		(format << 24) |
235220131375Smrg		(cpp << 19) |
235320131375Smrg		pitch / 4);
235420131375Smrg	aub_out(bufmgr_gem, (height << 16) | width);
235520131375Smrg	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
235620131375Smrg	aub_out(bufmgr_gem,
235720131375Smrg		((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
235820131375Smrg		((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
235920131375Smrg}
236020131375Smrg
236120131375Smrgstatic void
236220131375Smrgaub_exec(drm_intel_bo *bo, int ring_flag, int used)
236320131375Smrg{
236420131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
236520131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
236620131375Smrg	int i;
236720131375Smrg	bool batch_buffer_needs_annotations;
236820131375Smrg
236920131375Smrg	if (!bufmgr_gem->aub_file)
237020131375Smrg		return;
237120131375Smrg
237220131375Smrg	/* If batch buffer is not annotated, annotate it the best we
237320131375Smrg	 * can.
237420131375Smrg	 */
237520131375Smrg	batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0;
237620131375Smrg	if (batch_buffer_needs_annotations) {
237720131375Smrg		drm_intel_aub_annotation annotations[2] = {
237820131375Smrg			{ AUB_TRACE_TYPE_BATCH, 0, used },
237920131375Smrg			{ AUB_TRACE_TYPE_NOTYPE, 0, bo->size }
238020131375Smrg		};
238120131375Smrg		drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2);
238220131375Smrg	}
238320131375Smrg
238420131375Smrg	/* Write out all buffers to AUB memory */
238520131375Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
238620131375Smrg		aub_write_bo(bufmgr_gem->exec_bos[i]);
238720131375Smrg	}
238820131375Smrg
238920131375Smrg	/* Remove any annotations we added */
239020131375Smrg	if (batch_buffer_needs_annotations)
239120131375Smrg		drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0);
239220131375Smrg
239320131375Smrg	/* Dump ring buffer */
239420131375Smrg	aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
239520131375Smrg
239620131375Smrg	fflush(bufmgr_gem->aub_file);
239720131375Smrg
239820131375Smrg	/*
239920131375Smrg	 * One frame has been dumped. So reset the aub_offset for the next frame.
240020131375Smrg	 *
240120131375Smrg	 * FIXME: Can we do this?
240220131375Smrg	 */
240320131375Smrg	bufmgr_gem->aub_offset = 0x10000;
240420131375Smrg}
240520131375Smrg
240620131375Smrgstatic int
240720131375Smrgdrm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
240820131375Smrg		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
240920131375Smrg{
241020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
241120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
241220131375Smrg	struct drm_i915_gem_execbuffer execbuf;
241320131375Smrg	int ret, i;
241420131375Smrg
241520131375Smrg	if (bo_gem->has_error)
241620131375Smrg		return -ENOMEM;
241720131375Smrg
241820131375Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
241920131375Smrg	/* Update indices and set up the validate list. */
242020131375Smrg	drm_intel_gem_bo_process_reloc(bo);
242120131375Smrg
242220131375Smrg	/* Add the batch buffer to the validation list.  There are no
242320131375Smrg	 * relocations pointing to it.
242420131375Smrg	 */
242520131375Smrg	drm_intel_add_validate_buffer(bo);
242620131375Smrg
2427424e9256Smrg	memclear(execbuf);
242820131375Smrg	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
242920131375Smrg	execbuf.buffer_count = bufmgr_gem->exec_count;
243020131375Smrg	execbuf.batch_start_offset = 0;
243120131375Smrg	execbuf.batch_len = used;
243220131375Smrg	execbuf.cliprects_ptr = (uintptr_t) cliprects;
243320131375Smrg	execbuf.num_cliprects = num_cliprects;
243420131375Smrg	execbuf.DR1 = 0;
243520131375Smrg	execbuf.DR4 = DR4;
243620131375Smrg
243720131375Smrg	ret = drmIoctl(bufmgr_gem->fd,
243820131375Smrg		       DRM_IOCTL_I915_GEM_EXECBUFFER,
243920131375Smrg		       &execbuf);
244020131375Smrg	if (ret != 0) {
244120131375Smrg		ret = -errno;
244220131375Smrg		if (errno == ENOSPC) {
244320131375Smrg			DBG("Execbuffer fails to pin. "
244420131375Smrg			    "Estimate: %u. Actual: %u. Available: %u\n",
244520131375Smrg			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
244620131375Smrg							       bufmgr_gem->
244720131375Smrg							       exec_count),
244820131375Smrg			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
244920131375Smrg							      bufmgr_gem->
245020131375Smrg							      exec_count),
245120131375Smrg			    (unsigned int)bufmgr_gem->gtt_size);
245220131375Smrg		}
245320131375Smrg	}
245420131375Smrg	drm_intel_update_buffer_offsets(bufmgr_gem);
245520131375Smrg
245620131375Smrg	if (bufmgr_gem->bufmgr.debug)
245720131375Smrg		drm_intel_gem_dump_validation_list(bufmgr_gem);
245820131375Smrg
245920131375Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
246020131375Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
246120131375Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
246220131375Smrg
246320131375Smrg		bo_gem->idle = false;
246420131375Smrg
246520131375Smrg		/* Disconnect the buffer from the validate list */
246620131375Smrg		bo_gem->validate_index = -1;
246720131375Smrg		bufmgr_gem->exec_bos[i] = NULL;
246820131375Smrg	}
246920131375Smrg	bufmgr_gem->exec_count = 0;
247020131375Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
247120131375Smrg
247220131375Smrg	return ret;
247320131375Smrg}
247420131375Smrg
247520131375Smrgstatic int
247620131375Smrgdo_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
247720131375Smrg	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
247820131375Smrg	 unsigned int flags)
247920131375Smrg{
248020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
248120131375Smrg	struct drm_i915_gem_execbuffer2 execbuf;
248220131375Smrg	int ret = 0;
248320131375Smrg	int i;
248420131375Smrg
248520131375Smrg	switch (flags & 0x7) {
248620131375Smrg	default:
248720131375Smrg		return -EINVAL;
248820131375Smrg	case I915_EXEC_BLT:
24899ce4edccSmrg		if (!bufmgr_gem->has_blt)
24909ce4edccSmrg			return -EINVAL;
24919ce4edccSmrg		break;
24929ce4edccSmrg	case I915_EXEC_BSD:
24939ce4edccSmrg		if (!bufmgr_gem->has_bsd)
24949ce4edccSmrg			return -EINVAL;
24959ce4edccSmrg		break;
249620131375Smrg	case I915_EXEC_VEBOX:
249720131375Smrg		if (!bufmgr_gem->has_vebox)
249820131375Smrg			return -EINVAL;
249920131375Smrg		break;
25009ce4edccSmrg	case I915_EXEC_RENDER:
25019ce4edccSmrg	case I915_EXEC_DEFAULT:
25029ce4edccSmrg		break;
25039ce4edccSmrg	}
2504aaba2545Smrg
250522944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
250622944501Smrg	/* Update indices and set up the validate list. */
250722944501Smrg	drm_intel_gem_bo_process_reloc2(bo);
250822944501Smrg
250922944501Smrg	/* Add the batch buffer to the validation list.  There are no relocations
251022944501Smrg	 * pointing to it.
251122944501Smrg	 */
251222944501Smrg	drm_intel_add_validate_buffer2(bo, 0);
251322944501Smrg
2514424e9256Smrg	memclear(execbuf);
251522944501Smrg	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
251622944501Smrg	execbuf.buffer_count = bufmgr_gem->exec_count;
251722944501Smrg	execbuf.batch_start_offset = 0;
251822944501Smrg	execbuf.batch_len = used;
251922944501Smrg	execbuf.cliprects_ptr = (uintptr_t)cliprects;
252022944501Smrg	execbuf.num_cliprects = num_cliprects;
252122944501Smrg	execbuf.DR1 = 0;
252222944501Smrg	execbuf.DR4 = DR4;
252320131375Smrg	execbuf.flags = flags;
252420131375Smrg	if (ctx == NULL)
252520131375Smrg		i915_execbuffer2_set_context_id(execbuf, 0);
252620131375Smrg	else
252720131375Smrg		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
252822944501Smrg	execbuf.rsvd2 = 0;
252922944501Smrg
253020131375Smrg	aub_exec(bo, flags, used);
253120131375Smrg
253220131375Smrg	if (bufmgr_gem->no_exec)
253320131375Smrg		goto skip_execution;
253420131375Smrg
25356d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
25366d98c517Smrg		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
25376d98c517Smrg		       &execbuf);
253822944501Smrg	if (ret != 0) {
253922944501Smrg		ret = -errno;
25406d98c517Smrg		if (ret == -ENOSPC) {
25419ce4edccSmrg			DBG("Execbuffer fails to pin. "
25429ce4edccSmrg			    "Estimate: %u. Actual: %u. Available: %u\n",
25439ce4edccSmrg			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
25449ce4edccSmrg							       bufmgr_gem->exec_count),
25459ce4edccSmrg			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
25469ce4edccSmrg							      bufmgr_gem->exec_count),
25479ce4edccSmrg			    (unsigned int) bufmgr_gem->gtt_size);
254822944501Smrg		}
254922944501Smrg	}
255022944501Smrg	drm_intel_update_buffer_offsets2(bufmgr_gem);
255122944501Smrg
255220131375Smrgskip_execution:
255322944501Smrg	if (bufmgr_gem->bufmgr.debug)
255422944501Smrg		drm_intel_gem_dump_validation_list(bufmgr_gem);
255522944501Smrg
255622944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
255722944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
255822944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
255922944501Smrg
256020131375Smrg		bo_gem->idle = false;
256120131375Smrg
256222944501Smrg		/* Disconnect the buffer from the validate list */
256322944501Smrg		bo_gem->validate_index = -1;
256422944501Smrg		bufmgr_gem->exec_bos[i] = NULL;
256522944501Smrg	}
256622944501Smrg	bufmgr_gem->exec_count = 0;
256722944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
256822944501Smrg
256922944501Smrg	return ret;
257022944501Smrg}
257122944501Smrg
2572aaba2545Smrgstatic int
2573aaba2545Smrgdrm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2574aaba2545Smrg		       drm_clip_rect_t *cliprects, int num_cliprects,
2575aaba2545Smrg		       int DR4)
2576aaba2545Smrg{
257720131375Smrg	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
257820131375Smrg			I915_EXEC_RENDER);
257920131375Smrg}
258020131375Smrg
258120131375Smrgstatic int
258220131375Smrgdrm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
258320131375Smrg			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
258420131375Smrg			unsigned int flags)
258520131375Smrg{
258620131375Smrg	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
258720131375Smrg			flags);
258820131375Smrg}
258920131375Smrg
2590424e9256Smrgint
259120131375Smrgdrm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
259220131375Smrg			      int used, unsigned int flags)
259320131375Smrg{
259420131375Smrg	return do_exec2(bo, used, ctx, NULL, 0, 0, flags);
2595aaba2545Smrg}
2596aaba2545Smrg
259722944501Smrgstatic int
259822944501Smrgdrm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
259922944501Smrg{
260022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
260122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
260222944501Smrg	struct drm_i915_gem_pin pin;
260322944501Smrg	int ret;
260422944501Smrg
2605424e9256Smrg	memclear(pin);
260622944501Smrg	pin.handle = bo_gem->gem_handle;
260722944501Smrg	pin.alignment = alignment;
260822944501Smrg
26096d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
26106d98c517Smrg		       DRM_IOCTL_I915_GEM_PIN,
26116d98c517Smrg		       &pin);
261222944501Smrg	if (ret != 0)
261322944501Smrg		return -errno;
261422944501Smrg
261520131375Smrg	bo->offset64 = pin.offset;
261622944501Smrg	bo->offset = pin.offset;
261722944501Smrg	return 0;
261822944501Smrg}
261922944501Smrg
262022944501Smrgstatic int
262122944501Smrgdrm_intel_gem_bo_unpin(drm_intel_bo *bo)
262222944501Smrg{
262322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
262422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
262522944501Smrg	struct drm_i915_gem_unpin unpin;
262622944501Smrg	int ret;
262722944501Smrg
2628424e9256Smrg	memclear(unpin);
262922944501Smrg	unpin.handle = bo_gem->gem_handle;
263022944501Smrg
26316d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
263222944501Smrg	if (ret != 0)
263322944501Smrg		return -errno;
263422944501Smrg
263522944501Smrg	return 0;
263622944501Smrg}
263722944501Smrg
263822944501Smrgstatic int
26396d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
26406d98c517Smrg				     uint32_t tiling_mode,
26416d98c517Smrg				     uint32_t stride)
264222944501Smrg{
264322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
264422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
264522944501Smrg	struct drm_i915_gem_set_tiling set_tiling;
264622944501Smrg	int ret;
264722944501Smrg
26486d98c517Smrg	if (bo_gem->global_name == 0 &&
26496d98c517Smrg	    tiling_mode == bo_gem->tiling_mode &&
26506d98c517Smrg	    stride == bo_gem->stride)
265122944501Smrg		return 0;
265222944501Smrg
265322944501Smrg	memset(&set_tiling, 0, sizeof(set_tiling));
265422944501Smrg	do {
26556d98c517Smrg		/* set_tiling is slightly broken and overwrites the
26566d98c517Smrg		 * input on the error path, so we have to open code
26576d98c517Smrg		 * rmIoctl.
26586d98c517Smrg		 */
26596d98c517Smrg		set_tiling.handle = bo_gem->gem_handle;
26606d98c517Smrg		set_tiling.tiling_mode = tiling_mode;
266122944501Smrg		set_tiling.stride = stride;
266222944501Smrg
266322944501Smrg		ret = ioctl(bufmgr_gem->fd,
266422944501Smrg			    DRM_IOCTL_I915_GEM_SET_TILING,
266522944501Smrg			    &set_tiling);
26666d98c517Smrg	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
26676d98c517Smrg	if (ret == -1)
26686d98c517Smrg		return -errno;
26696d98c517Smrg
26706d98c517Smrg	bo_gem->tiling_mode = set_tiling.tiling_mode;
26716d98c517Smrg	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
26726d98c517Smrg	bo_gem->stride = set_tiling.stride;
26736d98c517Smrg	return 0;
26746d98c517Smrg}
26756d98c517Smrg
26766d98c517Smrgstatic int
26776d98c517Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
26786d98c517Smrg			    uint32_t stride)
26796d98c517Smrg{
26806d98c517Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
26816d98c517Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
26826d98c517Smrg	int ret;
26836d98c517Smrg
2684a884aba1Smrg	/* Tiling with userptr surfaces is not supported
2685a884aba1Smrg	 * on all hardware so refuse it for time being.
2686a884aba1Smrg	 */
2687a884aba1Smrg	if (bo_gem->is_userptr)
2688a884aba1Smrg		return -EINVAL;
2689a884aba1Smrg
26906d98c517Smrg	/* Linear buffers have no stride. By ensuring that we only ever use
26916d98c517Smrg	 * stride 0 with linear buffers, we simplify our code.
26926d98c517Smrg	 */
26936d98c517Smrg	if (*tiling_mode == I915_TILING_NONE)
26946d98c517Smrg		stride = 0;
26956d98c517Smrg
26966d98c517Smrg	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
26976d98c517Smrg	if (ret == 0)
2698aaba2545Smrg		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
269922944501Smrg
270022944501Smrg	*tiling_mode = bo_gem->tiling_mode;
2701aaba2545Smrg	return ret;
270222944501Smrg}
270322944501Smrg
270422944501Smrgstatic int
270522944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
270622944501Smrg			    uint32_t * swizzle_mode)
270722944501Smrg{
270822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
270922944501Smrg
271022944501Smrg	*tiling_mode = bo_gem->tiling_mode;
271122944501Smrg	*swizzle_mode = bo_gem->swizzle_mode;
271222944501Smrg	return 0;
271322944501Smrg}
271422944501Smrg
2715424e9256Smrgdrm_intel_bo *
271620131375Smrgdrm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
271720131375Smrg{
271820131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
271920131375Smrg	int ret;
272020131375Smrg	uint32_t handle;
272120131375Smrg	drm_intel_bo_gem *bo_gem;
272220131375Smrg	struct drm_i915_gem_get_tiling get_tiling;
272320131375Smrg	drmMMListHead *list;
272420131375Smrg
272520131375Smrg	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
272620131375Smrg
272720131375Smrg	/*
272820131375Smrg	 * See if the kernel has already returned this buffer to us. Just as
272920131375Smrg	 * for named buffers, we must not create two bo's pointing at the same
273020131375Smrg	 * kernel object
273120131375Smrg	 */
2732a884aba1Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
273320131375Smrg	for (list = bufmgr_gem->named.next;
273420131375Smrg	     list != &bufmgr_gem->named;
273520131375Smrg	     list = list->next) {
273620131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
273720131375Smrg		if (bo_gem->gem_handle == handle) {
273820131375Smrg			drm_intel_gem_bo_reference(&bo_gem->bo);
2739a884aba1Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
274020131375Smrg			return &bo_gem->bo;
274120131375Smrg		}
274220131375Smrg	}
274320131375Smrg
274420131375Smrg	if (ret) {
274520131375Smrg	  fprintf(stderr,"ret is %d %d\n", ret, errno);
2746a884aba1Smrg	  pthread_mutex_unlock(&bufmgr_gem->lock);
274720131375Smrg		return NULL;
274820131375Smrg	}
274920131375Smrg
275020131375Smrg	bo_gem = calloc(1, sizeof(*bo_gem));
2751a884aba1Smrg	if (!bo_gem) {
2752a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
275320131375Smrg		return NULL;
2754a884aba1Smrg	}
275520131375Smrg	/* Determine size of bo.  The fd-to-handle ioctl really should
275620131375Smrg	 * return the size, but it doesn't.  If we have kernel 3.12 or
275720131375Smrg	 * later, we can lseek on the prime fd to get the size.  Older
275820131375Smrg	 * kernels will just fail, in which case we fall back to the
275920131375Smrg	 * provided (estimated or guess size). */
276020131375Smrg	ret = lseek(prime_fd, 0, SEEK_END);
276120131375Smrg	if (ret != -1)
276220131375Smrg		bo_gem->bo.size = ret;
276320131375Smrg	else
276420131375Smrg		bo_gem->bo.size = size;
276520131375Smrg
276620131375Smrg	bo_gem->bo.handle = handle;
276720131375Smrg	bo_gem->bo.bufmgr = bufmgr;
276820131375Smrg
276920131375Smrg	bo_gem->gem_handle = handle;
277020131375Smrg
277120131375Smrg	atomic_set(&bo_gem->refcount, 1);
277220131375Smrg
277320131375Smrg	bo_gem->name = "prime";
277420131375Smrg	bo_gem->validate_index = -1;
277520131375Smrg	bo_gem->reloc_tree_fences = 0;
277620131375Smrg	bo_gem->used_as_reloc_target = false;
277720131375Smrg	bo_gem->has_error = false;
277820131375Smrg	bo_gem->reusable = false;
277920131375Smrg
278020131375Smrg	DRMINITLISTHEAD(&bo_gem->vma_list);
278120131375Smrg	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2782a884aba1Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
278320131375Smrg
2784424e9256Smrg	memclear(get_tiling);
278520131375Smrg	get_tiling.handle = bo_gem->gem_handle;
278620131375Smrg	ret = drmIoctl(bufmgr_gem->fd,
278720131375Smrg		       DRM_IOCTL_I915_GEM_GET_TILING,
278820131375Smrg		       &get_tiling);
278920131375Smrg	if (ret != 0) {
279020131375Smrg		drm_intel_gem_bo_unreference(&bo_gem->bo);
279120131375Smrg		return NULL;
279220131375Smrg	}
279320131375Smrg	bo_gem->tiling_mode = get_tiling.tiling_mode;
279420131375Smrg	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
279520131375Smrg	/* XXX stride is unknown */
279620131375Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
279720131375Smrg
279820131375Smrg	return &bo_gem->bo;
279920131375Smrg}
280020131375Smrg
2801424e9256Smrgint
280220131375Smrgdrm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
280320131375Smrg{
280420131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
280520131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
280620131375Smrg
2807a884aba1Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
280820131375Smrg        if (DRMLISTEMPTY(&bo_gem->name_list))
280920131375Smrg                DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2810a884aba1Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
281120131375Smrg
281220131375Smrg	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
281320131375Smrg			       DRM_CLOEXEC, prime_fd) != 0)
281420131375Smrg		return -errno;
281520131375Smrg
281620131375Smrg	bo_gem->reusable = false;
281720131375Smrg
281820131375Smrg	return 0;
281920131375Smrg}
282020131375Smrg
282122944501Smrgstatic int
282222944501Smrgdrm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
282322944501Smrg{
282422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
282522944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
282622944501Smrg	int ret;
282722944501Smrg
282822944501Smrg	if (!bo_gem->global_name) {
282920131375Smrg		struct drm_gem_flink flink;
283020131375Smrg
2831424e9256Smrg		memclear(flink);
283222944501Smrg		flink.handle = bo_gem->gem_handle;
283322944501Smrg
2834a884aba1Smrg		pthread_mutex_lock(&bufmgr_gem->lock);
2835a884aba1Smrg
28366d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
2837a884aba1Smrg		if (ret != 0) {
2838a884aba1Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
283922944501Smrg			return -errno;
2840a884aba1Smrg		}
284120131375Smrg
284222944501Smrg		bo_gem->global_name = flink.name;
284320131375Smrg		bo_gem->reusable = false;
284420131375Smrg
284520131375Smrg                if (DRMLISTEMPTY(&bo_gem->name_list))
284620131375Smrg                        DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2847a884aba1Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
284822944501Smrg	}
284922944501Smrg
285022944501Smrg	*name = bo_gem->global_name;
285122944501Smrg	return 0;
285222944501Smrg}
285322944501Smrg
285422944501Smrg/**
285522944501Smrg * Enables unlimited caching of buffer objects for reuse.
285622944501Smrg *
285722944501Smrg * This is potentially very memory expensive, as the cache at each bucket
285822944501Smrg * size is only bounded by how many buffers of that size we've managed to have
285922944501Smrg * in flight at once.
286022944501Smrg */
2861424e9256Smrgvoid
286222944501Smrgdrm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
286322944501Smrg{
286422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
286522944501Smrg
286620131375Smrg	bufmgr_gem->bo_reuse = true;
286722944501Smrg}
286822944501Smrg
286922944501Smrg/**
287022944501Smrg * Enable use of fenced reloc type.
287122944501Smrg *
287222944501Smrg * New code should enable this to avoid unnecessary fence register
287322944501Smrg * allocation.  If this option is not enabled, all relocs will have fence
287422944501Smrg * register allocated.
287522944501Smrg */
2876424e9256Smrgvoid
287722944501Smrgdrm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
287822944501Smrg{
287922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
288022944501Smrg
288122944501Smrg	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
288220131375Smrg		bufmgr_gem->fenced_relocs = true;
288322944501Smrg}
288422944501Smrg
288522944501Smrg/**
288622944501Smrg * Return the additional aperture space required by the tree of buffer objects
288722944501Smrg * rooted at bo.
288822944501Smrg */
288922944501Smrgstatic int
289022944501Smrgdrm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
289122944501Smrg{
289222944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
289322944501Smrg	int i;
289422944501Smrg	int total = 0;
289522944501Smrg
289622944501Smrg	if (bo == NULL || bo_gem->included_in_check_aperture)
289722944501Smrg		return 0;
289822944501Smrg
289922944501Smrg	total += bo->size;
290020131375Smrg	bo_gem->included_in_check_aperture = true;
290122944501Smrg
290222944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++)
290322944501Smrg		total +=
290422944501Smrg		    drm_intel_gem_bo_get_aperture_space(bo_gem->
290522944501Smrg							reloc_target_info[i].bo);
290622944501Smrg
290722944501Smrg	return total;
290822944501Smrg}
290922944501Smrg
291022944501Smrg/**
291122944501Smrg * Count the number of buffers in this list that need a fence reg
291222944501Smrg *
291322944501Smrg * If the count is greater than the number of available regs, we'll have
291422944501Smrg * to ask the caller to resubmit a batch with fewer tiled buffers.
291522944501Smrg *
291622944501Smrg * This function over-counts if the same buffer is used multiple times.
291722944501Smrg */
291822944501Smrgstatic unsigned int
291922944501Smrgdrm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
292022944501Smrg{
292122944501Smrg	int i;
292222944501Smrg	unsigned int total = 0;
292322944501Smrg
292422944501Smrg	for (i = 0; i < count; i++) {
292522944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
292622944501Smrg
292722944501Smrg		if (bo_gem == NULL)
292822944501Smrg			continue;
292922944501Smrg
293022944501Smrg		total += bo_gem->reloc_tree_fences;
293122944501Smrg	}
293222944501Smrg	return total;
293322944501Smrg}
293422944501Smrg
293522944501Smrg/**
293622944501Smrg * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
293722944501Smrg * for the next drm_intel_bufmgr_check_aperture_space() call.
293822944501Smrg */
293922944501Smrgstatic void
294022944501Smrgdrm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
294122944501Smrg{
294222944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
294322944501Smrg	int i;
294422944501Smrg
294522944501Smrg	if (bo == NULL || !bo_gem->included_in_check_aperture)
294622944501Smrg		return;
294722944501Smrg
294820131375Smrg	bo_gem->included_in_check_aperture = false;
294922944501Smrg
295022944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++)
295122944501Smrg		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
295222944501Smrg							   reloc_target_info[i].bo);
295322944501Smrg}
295422944501Smrg
295522944501Smrg/**
295622944501Smrg * Return a conservative estimate for the amount of aperture required
295722944501Smrg * for a collection of buffers. This may double-count some buffers.
295822944501Smrg */
295922944501Smrgstatic unsigned int
296022944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
296122944501Smrg{
296222944501Smrg	int i;
296322944501Smrg	unsigned int total = 0;
296422944501Smrg
296522944501Smrg	for (i = 0; i < count; i++) {
296622944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
296722944501Smrg		if (bo_gem != NULL)
296822944501Smrg			total += bo_gem->reloc_tree_size;
296922944501Smrg	}
297022944501Smrg	return total;
297122944501Smrg}
297222944501Smrg
297322944501Smrg/**
297422944501Smrg * Return the amount of aperture needed for a collection of buffers.
297522944501Smrg * This avoids double counting any buffers, at the cost of looking
297622944501Smrg * at every buffer in the set.
297722944501Smrg */
297822944501Smrgstatic unsigned int
297922944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
298022944501Smrg{
298122944501Smrg	int i;
298222944501Smrg	unsigned int total = 0;
298322944501Smrg
298422944501Smrg	for (i = 0; i < count; i++) {
298522944501Smrg		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
298622944501Smrg		/* For the first buffer object in the array, we get an
298722944501Smrg		 * accurate count back for its reloc_tree size (since nothing
298822944501Smrg		 * had been flagged as being counted yet).  We can save that
298922944501Smrg		 * value out as a more conservative reloc_tree_size that
299022944501Smrg		 * avoids double-counting target buffers.  Since the first
299122944501Smrg		 * buffer happens to usually be the batch buffer in our
299222944501Smrg		 * callers, this can pull us back from doing the tree
299322944501Smrg		 * walk on every new batch emit.
299422944501Smrg		 */
299522944501Smrg		if (i == 0) {
299622944501Smrg			drm_intel_bo_gem *bo_gem =
299722944501Smrg			    (drm_intel_bo_gem *) bo_array[i];
299822944501Smrg			bo_gem->reloc_tree_size = total;
299922944501Smrg		}
300022944501Smrg	}
300122944501Smrg
300222944501Smrg	for (i = 0; i < count; i++)
300322944501Smrg		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
300422944501Smrg	return total;
300522944501Smrg}
300622944501Smrg
300722944501Smrg/**
300822944501Smrg * Return -1 if the batchbuffer should be flushed before attempting to
300922944501Smrg * emit rendering referencing the buffers pointed to by bo_array.
301022944501Smrg *
301122944501Smrg * This is required because if we try to emit a batchbuffer with relocations
301222944501Smrg * to a tree of buffers that won't simultaneously fit in the aperture,
301322944501Smrg * the rendering will return an error at a point where the software is not
301422944501Smrg * prepared to recover from it.
301522944501Smrg *
301622944501Smrg * However, we also want to emit the batchbuffer significantly before we reach
301722944501Smrg * the limit, as a series of batchbuffers each of which references buffers
301822944501Smrg * covering almost all of the aperture means that at each emit we end up
301922944501Smrg * waiting to evict a buffer from the last rendering, and we get synchronous
302022944501Smrg * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
302122944501Smrg * get better parallelism.
302222944501Smrg */
302322944501Smrgstatic int
302422944501Smrgdrm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
302522944501Smrg{
302622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem =
302722944501Smrg	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
302822944501Smrg	unsigned int total = 0;
302922944501Smrg	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
303022944501Smrg	int total_fences;
303122944501Smrg
303222944501Smrg	/* Check for fence reg constraints if necessary */
303322944501Smrg	if (bufmgr_gem->available_fences) {
303422944501Smrg		total_fences = drm_intel_gem_total_fences(bo_array, count);
303522944501Smrg		if (total_fences > bufmgr_gem->available_fences)
303622944501Smrg			return -ENOSPC;
303722944501Smrg	}
303822944501Smrg
303922944501Smrg	total = drm_intel_gem_estimate_batch_space(bo_array, count);
304022944501Smrg
304122944501Smrg	if (total > threshold)
304222944501Smrg		total = drm_intel_gem_compute_batch_space(bo_array, count);
304322944501Smrg
304422944501Smrg	if (total > threshold) {
304522944501Smrg		DBG("check_space: overflowed available aperture, "
304622944501Smrg		    "%dkb vs %dkb\n",
304722944501Smrg		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
304822944501Smrg		return -ENOSPC;
304922944501Smrg	} else {
305022944501Smrg		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
305122944501Smrg		    (int)bufmgr_gem->gtt_size / 1024);
305222944501Smrg		return 0;
305322944501Smrg	}
305422944501Smrg}
305522944501Smrg
305622944501Smrg/*
305722944501Smrg * Disable buffer reuse for objects which are shared with the kernel
305822944501Smrg * as scanout buffers
305922944501Smrg */
306022944501Smrgstatic int
306122944501Smrgdrm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
306222944501Smrg{
306322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
306422944501Smrg
306520131375Smrg	bo_gem->reusable = false;
306622944501Smrg	return 0;
306722944501Smrg}
306822944501Smrg
3069aaba2545Smrgstatic int
3070aaba2545Smrgdrm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
3071aaba2545Smrg{
3072aaba2545Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3073aaba2545Smrg
3074aaba2545Smrg	return bo_gem->reusable;
3075aaba2545Smrg}
3076aaba2545Smrg
307722944501Smrgstatic int
307822944501Smrg_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
307922944501Smrg{
308022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
308122944501Smrg	int i;
308222944501Smrg
308322944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
308422944501Smrg		if (bo_gem->reloc_target_info[i].bo == target_bo)
308522944501Smrg			return 1;
3086aaba2545Smrg		if (bo == bo_gem->reloc_target_info[i].bo)
3087aaba2545Smrg			continue;
308822944501Smrg		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
308922944501Smrg						target_bo))
309022944501Smrg			return 1;
309122944501Smrg	}
309222944501Smrg
309322944501Smrg	return 0;
309422944501Smrg}
309522944501Smrg
309622944501Smrg/** Return true if target_bo is referenced by bo's relocation tree. */
309722944501Smrgstatic int
309822944501Smrgdrm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
309922944501Smrg{
310022944501Smrg	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
310122944501Smrg
310222944501Smrg	if (bo == NULL || target_bo == NULL)
310322944501Smrg		return 0;
310422944501Smrg	if (target_bo_gem->used_as_reloc_target)
310522944501Smrg		return _drm_intel_gem_bo_references(bo, target_bo);
310622944501Smrg	return 0;
310722944501Smrg}
310822944501Smrg
3109aaba2545Smrgstatic void
3110aaba2545Smrgadd_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
3111aaba2545Smrg{
3112aaba2545Smrg	unsigned int i = bufmgr_gem->num_buckets;
3113aaba2545Smrg
3114aaba2545Smrg	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3115aaba2545Smrg
3116aaba2545Smrg	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3117aaba2545Smrg	bufmgr_gem->cache_bucket[i].size = size;
3118aaba2545Smrg	bufmgr_gem->num_buckets++;
3119aaba2545Smrg}
3120aaba2545Smrg
3121aaba2545Smrgstatic void
3122aaba2545Smrginit_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
3123aaba2545Smrg{
3124aaba2545Smrg	unsigned long size, cache_max_size = 64 * 1024 * 1024;
3125aaba2545Smrg
3126aaba2545Smrg	/* OK, so power of two buckets was too wasteful of memory.
3127aaba2545Smrg	 * Give 3 other sizes between each power of two, to hopefully
3128aaba2545Smrg	 * cover things accurately enough.  (The alternative is
3129aaba2545Smrg	 * probably to just go for exact matching of sizes, and assume
3130aaba2545Smrg	 * that for things like composited window resize the tiled
3131aaba2545Smrg	 * width/height alignment and rounding of sizes to pages will
3132aaba2545Smrg	 * get us useful cache hit rates anyway)
3133aaba2545Smrg	 */
3134aaba2545Smrg	add_bucket(bufmgr_gem, 4096);
3135aaba2545Smrg	add_bucket(bufmgr_gem, 4096 * 2);
3136aaba2545Smrg	add_bucket(bufmgr_gem, 4096 * 3);
3137aaba2545Smrg
3138aaba2545Smrg	/* Initialize the linked lists for BO reuse cache. */
3139aaba2545Smrg	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3140aaba2545Smrg		add_bucket(bufmgr_gem, size);
3141aaba2545Smrg
3142aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 1 / 4);
3143aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 2 / 4);
3144aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 3 / 4);
3145aaba2545Smrg	}
3146aaba2545Smrg}
3147aaba2545Smrg
3148424e9256Smrgvoid
314920131375Smrgdrm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
315020131375Smrg{
315120131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
315220131375Smrg
315320131375Smrg	bufmgr_gem->vma_max = limit;
315420131375Smrg
315520131375Smrg	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
315620131375Smrg}
315720131375Smrg
315820131375Smrg/**
315920131375Smrg * Get the PCI ID for the device.  This can be overridden by setting the
316020131375Smrg * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
316120131375Smrg */
316220131375Smrgstatic int
316320131375Smrgget_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
316420131375Smrg{
316520131375Smrg	char *devid_override;
3166424e9256Smrg	int devid = 0;
316720131375Smrg	int ret;
316820131375Smrg	drm_i915_getparam_t gp;
316920131375Smrg
317020131375Smrg	if (geteuid() == getuid()) {
317120131375Smrg		devid_override = getenv("INTEL_DEVID_OVERRIDE");
317220131375Smrg		if (devid_override) {
317320131375Smrg			bufmgr_gem->no_exec = true;
317420131375Smrg			return strtod(devid_override, NULL);
317520131375Smrg		}
317620131375Smrg	}
317720131375Smrg
3178424e9256Smrg	memclear(gp);
317920131375Smrg	gp.param = I915_PARAM_CHIPSET_ID;
318020131375Smrg	gp.value = &devid;
318120131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
318220131375Smrg	if (ret) {
318320131375Smrg		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
318420131375Smrg		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
318520131375Smrg	}
318620131375Smrg	return devid;
318720131375Smrg}
318820131375Smrg
3189424e9256Smrgint
319020131375Smrgdrm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
319120131375Smrg{
319220131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
319320131375Smrg
319420131375Smrg	return bufmgr_gem->pci_device;
319520131375Smrg}
319620131375Smrg
319720131375Smrg/**
319820131375Smrg * Sets the AUB filename.
319920131375Smrg *
320020131375Smrg * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
320120131375Smrg * for it to have any effect.
320220131375Smrg */
3203424e9256Smrgvoid
320420131375Smrgdrm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
320520131375Smrg				      const char *filename)
320620131375Smrg{
320720131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
320820131375Smrg
320920131375Smrg	free(bufmgr_gem->aub_filename);
321020131375Smrg	if (filename)
321120131375Smrg		bufmgr_gem->aub_filename = strdup(filename);
321220131375Smrg}
321320131375Smrg
321420131375Smrg/**
321520131375Smrg * Sets up AUB dumping.
321620131375Smrg *
321720131375Smrg * This is a trace file format that can be used with the simulator.
321820131375Smrg * Packets are emitted in a format somewhat like GPU command packets.
321920131375Smrg * You can set up a GTT and upload your objects into the referenced
322020131375Smrg * space, then send off batchbuffers and get BMPs out the other end.
322120131375Smrg */
3222424e9256Smrgvoid
322320131375Smrgdrm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
322420131375Smrg{
322520131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
322620131375Smrg	int entry = 0x200003;
322720131375Smrg	int i;
322820131375Smrg	int gtt_size = 0x10000;
322920131375Smrg	const char *filename;
323020131375Smrg
323120131375Smrg	if (!enable) {
323220131375Smrg		if (bufmgr_gem->aub_file) {
323320131375Smrg			fclose(bufmgr_gem->aub_file);
323420131375Smrg			bufmgr_gem->aub_file = NULL;
323520131375Smrg		}
323620131375Smrg		return;
323720131375Smrg	}
323820131375Smrg
323920131375Smrg	if (geteuid() != getuid())
324020131375Smrg		return;
324120131375Smrg
324220131375Smrg	if (bufmgr_gem->aub_filename)
324320131375Smrg		filename = bufmgr_gem->aub_filename;
324420131375Smrg	else
324520131375Smrg		filename = "intel.aub";
324620131375Smrg	bufmgr_gem->aub_file = fopen(filename, "w+");
324720131375Smrg	if (!bufmgr_gem->aub_file)
324820131375Smrg		return;
324920131375Smrg
325020131375Smrg	/* Start allocating objects from just after the GTT. */
325120131375Smrg	bufmgr_gem->aub_offset = gtt_size;
325220131375Smrg
325320131375Smrg	/* Start with a (required) version packet. */
325420131375Smrg	aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
325520131375Smrg	aub_out(bufmgr_gem,
325620131375Smrg		(4 << AUB_HEADER_MAJOR_SHIFT) |
325720131375Smrg		(0 << AUB_HEADER_MINOR_SHIFT));
325820131375Smrg	for (i = 0; i < 8; i++) {
325920131375Smrg		aub_out(bufmgr_gem, 0); /* app name */
326020131375Smrg	}
326120131375Smrg	aub_out(bufmgr_gem, 0); /* timestamp */
326220131375Smrg	aub_out(bufmgr_gem, 0); /* timestamp */
326320131375Smrg	aub_out(bufmgr_gem, 0); /* comment len */
326420131375Smrg
326520131375Smrg	/* Set up the GTT. The max we can handle is 256M */
326620131375Smrg	aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
32673c748557Ssnj	/* Need to use GTT_ENTRY type for recent emulator */
32683c748557Ssnj	aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT_ENTRY | 0 | AUB_TRACE_OP_DATA_WRITE);
326920131375Smrg	aub_out(bufmgr_gem, 0); /* subtype */
327020131375Smrg	aub_out(bufmgr_gem, 0); /* offset */
327120131375Smrg	aub_out(bufmgr_gem, gtt_size); /* size */
327220131375Smrg	if (bufmgr_gem->gen >= 8)
327320131375Smrg		aub_out(bufmgr_gem, 0);
327420131375Smrg	for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
327520131375Smrg		aub_out(bufmgr_gem, entry);
327620131375Smrg	}
327720131375Smrg}
327820131375Smrg
3279424e9256Smrgdrm_intel_context *
328020131375Smrgdrm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
328120131375Smrg{
328220131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
328320131375Smrg	struct drm_i915_gem_context_create create;
328420131375Smrg	drm_intel_context *context = NULL;
328520131375Smrg	int ret;
328620131375Smrg
328720131375Smrg	context = calloc(1, sizeof(*context));
328820131375Smrg	if (!context)
328920131375Smrg		return NULL;
329020131375Smrg
3291424e9256Smrg	memclear(create);
329220131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
329320131375Smrg	if (ret != 0) {
329420131375Smrg		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
329520131375Smrg		    strerror(errno));
329620131375Smrg		free(context);
329720131375Smrg		return NULL;
329820131375Smrg	}
329920131375Smrg
330020131375Smrg	context->ctx_id = create.ctx_id;
330120131375Smrg	context->bufmgr = bufmgr;
330220131375Smrg
330320131375Smrg	return context;
330420131375Smrg}
330520131375Smrg
3306424e9256Smrgvoid
330720131375Smrgdrm_intel_gem_context_destroy(drm_intel_context *ctx)
330820131375Smrg{
330920131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
331020131375Smrg	struct drm_i915_gem_context_destroy destroy;
331120131375Smrg	int ret;
331220131375Smrg
331320131375Smrg	if (ctx == NULL)
331420131375Smrg		return;
331520131375Smrg
3316424e9256Smrg	memclear(destroy);
331720131375Smrg
331820131375Smrg	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
331920131375Smrg	destroy.ctx_id = ctx->ctx_id;
332020131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
332120131375Smrg		       &destroy);
332220131375Smrg	if (ret != 0)
332320131375Smrg		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
332420131375Smrg			strerror(errno));
332520131375Smrg
332620131375Smrg	free(ctx);
332720131375Smrg}
332820131375Smrg
3329424e9256Smrgint
333020131375Smrgdrm_intel_get_reset_stats(drm_intel_context *ctx,
333120131375Smrg			  uint32_t *reset_count,
333220131375Smrg			  uint32_t *active,
333320131375Smrg			  uint32_t *pending)
333420131375Smrg{
333520131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
333620131375Smrg	struct drm_i915_reset_stats stats;
333720131375Smrg	int ret;
333820131375Smrg
333920131375Smrg	if (ctx == NULL)
334020131375Smrg		return -EINVAL;
334120131375Smrg
3342424e9256Smrg	memclear(stats);
334320131375Smrg
334420131375Smrg	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
334520131375Smrg	stats.ctx_id = ctx->ctx_id;
334620131375Smrg	ret = drmIoctl(bufmgr_gem->fd,
334720131375Smrg		       DRM_IOCTL_I915_GET_RESET_STATS,
334820131375Smrg		       &stats);
334920131375Smrg	if (ret == 0) {
335020131375Smrg		if (reset_count != NULL)
335120131375Smrg			*reset_count = stats.reset_count;
335220131375Smrg
335320131375Smrg		if (active != NULL)
335420131375Smrg			*active = stats.batch_active;
335520131375Smrg
335620131375Smrg		if (pending != NULL)
335720131375Smrg			*pending = stats.batch_pending;
335820131375Smrg	}
335920131375Smrg
336020131375Smrg	return ret;
336120131375Smrg}
336220131375Smrg
3363424e9256Smrgint
336420131375Smrgdrm_intel_reg_read(drm_intel_bufmgr *bufmgr,
336520131375Smrg		   uint32_t offset,
336620131375Smrg		   uint64_t *result)
336720131375Smrg{
336820131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
336920131375Smrg	struct drm_i915_reg_read reg_read;
337020131375Smrg	int ret;
337120131375Smrg
3372424e9256Smrg	memclear(reg_read);
337320131375Smrg	reg_read.offset = offset;
337420131375Smrg
337520131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
337620131375Smrg
337720131375Smrg	*result = reg_read.val;
337820131375Smrg	return ret;
337920131375Smrg}
338020131375Smrg
3381424e9256Smrgint
3382424e9256Smrgdrm_intel_get_subslice_total(int fd, unsigned int *subslice_total)
3383424e9256Smrg{
3384424e9256Smrg	drm_i915_getparam_t gp;
3385424e9256Smrg	int ret;
3386424e9256Smrg
3387424e9256Smrg	memclear(gp);
3388424e9256Smrg	gp.value = (int*)subslice_total;
3389424e9256Smrg	gp.param = I915_PARAM_SUBSLICE_TOTAL;
3390424e9256Smrg	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3391424e9256Smrg	if (ret)
3392424e9256Smrg		return -errno;
3393424e9256Smrg
3394424e9256Smrg	return 0;
3395424e9256Smrg}
3396424e9256Smrg
3397424e9256Smrgint
3398424e9256Smrgdrm_intel_get_eu_total(int fd, unsigned int *eu_total)
3399424e9256Smrg{
3400424e9256Smrg	drm_i915_getparam_t gp;
3401424e9256Smrg	int ret;
3402424e9256Smrg
3403424e9256Smrg	memclear(gp);
3404424e9256Smrg	gp.value = (int*)eu_total;
3405424e9256Smrg	gp.param = I915_PARAM_EU_TOTAL;
3406424e9256Smrg	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3407424e9256Smrg	if (ret)
3408424e9256Smrg		return -errno;
3409424e9256Smrg
3410424e9256Smrg	return 0;
3411424e9256Smrg}
341220131375Smrg
341320131375Smrg/**
341420131375Smrg * Annotate the given bo for use in aub dumping.
341520131375Smrg *
341620131375Smrg * \param annotations is an array of drm_intel_aub_annotation objects
341720131375Smrg * describing the type of data in various sections of the bo.  Each
341820131375Smrg * element of the array specifies the type and subtype of a section of
341920131375Smrg * the bo, and the past-the-end offset of that section.  The elements
342020131375Smrg * of \c annotations must be sorted so that ending_offset is
342120131375Smrg * increasing.
342220131375Smrg *
342320131375Smrg * \param count is the number of elements in the \c annotations array.
342420131375Smrg * If \c count is zero, then \c annotations will not be dereferenced.
342520131375Smrg *
342620131375Smrg * Annotations are copied into a private data structure, so caller may
342720131375Smrg * re-use the memory pointed to by \c annotations after the call
342820131375Smrg * returns.
342920131375Smrg *
343020131375Smrg * Annotations are stored for the lifetime of the bo; to reset to the
343120131375Smrg * default state (no annotations), call this function with a \c count
343220131375Smrg * of zero.
343320131375Smrg */
3434424e9256Smrgvoid
343520131375Smrgdrm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
343620131375Smrg					 drm_intel_aub_annotation *annotations,
343720131375Smrg					 unsigned count)
343820131375Smrg{
343920131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
344020131375Smrg	unsigned size = sizeof(*annotations) * count;
344120131375Smrg	drm_intel_aub_annotation *new_annotations =
344220131375Smrg		count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL;
344320131375Smrg	if (new_annotations == NULL) {
344420131375Smrg		free(bo_gem->aub_annotations);
344520131375Smrg		bo_gem->aub_annotations = NULL;
344620131375Smrg		bo_gem->aub_annotation_count = 0;
344720131375Smrg		return;
344820131375Smrg	}
344920131375Smrg	memcpy(new_annotations, annotations, size);
345020131375Smrg	bo_gem->aub_annotations = new_annotations;
345120131375Smrg	bo_gem->aub_annotation_count = count;
345220131375Smrg}
345320131375Smrg
3454a884aba1Smrgstatic pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3455a884aba1Smrgstatic drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3456a884aba1Smrg
3457a884aba1Smrgstatic drm_intel_bufmgr_gem *
3458a884aba1Smrgdrm_intel_bufmgr_gem_find(int fd)
3459a884aba1Smrg{
3460a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
3461a884aba1Smrg
3462a884aba1Smrg	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3463a884aba1Smrg		if (bufmgr_gem->fd == fd) {
3464a884aba1Smrg			atomic_inc(&bufmgr_gem->refcount);
3465a884aba1Smrg			return bufmgr_gem;
3466a884aba1Smrg		}
3467a884aba1Smrg	}
3468a884aba1Smrg
3469a884aba1Smrg	return NULL;
3470a884aba1Smrg}
3471a884aba1Smrg
3472a884aba1Smrgstatic void
3473a884aba1Smrgdrm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
3474a884aba1Smrg{
3475a884aba1Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3476a884aba1Smrg
3477a884aba1Smrg	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3478a884aba1Smrg		pthread_mutex_lock(&bufmgr_list_mutex);
3479a884aba1Smrg
3480a884aba1Smrg		if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
3481a884aba1Smrg			DRMLISTDEL(&bufmgr_gem->managers);
3482a884aba1Smrg			drm_intel_bufmgr_gem_destroy(bufmgr);
3483a884aba1Smrg		}
3484a884aba1Smrg
3485a884aba1Smrg		pthread_mutex_unlock(&bufmgr_list_mutex);
3486a884aba1Smrg	}
3487a884aba1Smrg}
3488a884aba1Smrg
348922944501Smrg/**
349022944501Smrg * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
349122944501Smrg * and manage map buffer objections.
349222944501Smrg *
349322944501Smrg * \param fd File descriptor of the opened DRM device.
349422944501Smrg */
3495424e9256Smrgdrm_intel_bufmgr *
349622944501Smrgdrm_intel_bufmgr_gem_init(int fd, int batch_size)
349722944501Smrg{
349822944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
349922944501Smrg	struct drm_i915_gem_get_aperture aperture;
350022944501Smrg	drm_i915_getparam_t gp;
350120131375Smrg	int ret, tmp;
350220131375Smrg	bool exec2 = false;
350322944501Smrg
3504a884aba1Smrg	pthread_mutex_lock(&bufmgr_list_mutex);
3505a884aba1Smrg
3506a884aba1Smrg	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
3507a884aba1Smrg	if (bufmgr_gem)
3508a884aba1Smrg		goto exit;
3509a884aba1Smrg
351022944501Smrg	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
351122944501Smrg	if (bufmgr_gem == NULL)
3512a884aba1Smrg		goto exit;
351322944501Smrg
351422944501Smrg	bufmgr_gem->fd = fd;
3515a884aba1Smrg	atomic_set(&bufmgr_gem->refcount, 1);
351622944501Smrg
351722944501Smrg	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
351822944501Smrg		free(bufmgr_gem);
3519a884aba1Smrg		bufmgr_gem = NULL;
3520a884aba1Smrg		goto exit;
352122944501Smrg	}
352222944501Smrg
3523424e9256Smrg	memclear(aperture);
35246d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
35256d98c517Smrg		       DRM_IOCTL_I915_GEM_GET_APERTURE,
35266d98c517Smrg		       &aperture);
352722944501Smrg
352822944501Smrg	if (ret == 0)
352922944501Smrg		bufmgr_gem->gtt_size = aperture.aper_available_size;
353022944501Smrg	else {
353122944501Smrg		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
353222944501Smrg			strerror(errno));
353322944501Smrg		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
353422944501Smrg		fprintf(stderr, "Assuming %dkB available aperture size.\n"
353522944501Smrg			"May lead to reduced performance or incorrect "
353622944501Smrg			"rendering.\n",
353722944501Smrg			(int)bufmgr_gem->gtt_size / 1024);
353822944501Smrg	}
353922944501Smrg
354020131375Smrg	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
354122944501Smrg
354220131375Smrg	if (IS_GEN2(bufmgr_gem->pci_device))
354322944501Smrg		bufmgr_gem->gen = 2;
354420131375Smrg	else if (IS_GEN3(bufmgr_gem->pci_device))
354522944501Smrg		bufmgr_gem->gen = 3;
354620131375Smrg	else if (IS_GEN4(bufmgr_gem->pci_device))
354722944501Smrg		bufmgr_gem->gen = 4;
354820131375Smrg	else if (IS_GEN5(bufmgr_gem->pci_device))
354920131375Smrg		bufmgr_gem->gen = 5;
355020131375Smrg	else if (IS_GEN6(bufmgr_gem->pci_device))
355122944501Smrg		bufmgr_gem->gen = 6;
355220131375Smrg	else if (IS_GEN7(bufmgr_gem->pci_device))
355320131375Smrg		bufmgr_gem->gen = 7;
355420131375Smrg	else if (IS_GEN8(bufmgr_gem->pci_device))
355520131375Smrg		bufmgr_gem->gen = 8;
35563c748557Ssnj	else if (IS_GEN9(bufmgr_gem->pci_device))
35573c748557Ssnj		bufmgr_gem->gen = 9;
355820131375Smrg	else {
355920131375Smrg		free(bufmgr_gem);
3560a884aba1Smrg		bufmgr_gem = NULL;
3561a884aba1Smrg		goto exit;
356220131375Smrg	}
356320131375Smrg
356420131375Smrg	if (IS_GEN3(bufmgr_gem->pci_device) &&
356520131375Smrg	    bufmgr_gem->gtt_size > 256*1024*1024) {
356620131375Smrg		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
356720131375Smrg		 * be used for tiled blits. To simplify the accounting, just
356820131375Smrg		 * substract the unmappable part (fixed to 256MB on all known
356920131375Smrg		 * gen3 devices) if the kernel advertises it. */
357020131375Smrg		bufmgr_gem->gtt_size -= 256*1024*1024;
357120131375Smrg	}
357220131375Smrg
3573424e9256Smrg	memclear(gp);
357420131375Smrg	gp.value = &tmp;
357522944501Smrg
357622944501Smrg	gp.param = I915_PARAM_HAS_EXECBUF2;
35776d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
357822944501Smrg	if (!ret)
357920131375Smrg		exec2 = true;
358022944501Smrg
3581aaba2545Smrg	gp.param = I915_PARAM_HAS_BSD;
35826d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
35839ce4edccSmrg	bufmgr_gem->has_bsd = ret == 0;
35849ce4edccSmrg
35859ce4edccSmrg	gp.param = I915_PARAM_HAS_BLT;
35869ce4edccSmrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
35879ce4edccSmrg	bufmgr_gem->has_blt = ret == 0;
35889ce4edccSmrg
35899ce4edccSmrg	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
35909ce4edccSmrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
35919ce4edccSmrg	bufmgr_gem->has_relaxed_fencing = ret == 0;
3592aaba2545Smrg
3593424e9256Smrg	bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr;
3594a884aba1Smrg
359520131375Smrg	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
359620131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
359720131375Smrg	bufmgr_gem->has_wait_timeout = ret == 0;
359820131375Smrg
359920131375Smrg	gp.param = I915_PARAM_HAS_LLC;
360020131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
360120131375Smrg	if (ret != 0) {
360220131375Smrg		/* Kernel does not supports HAS_LLC query, fallback to GPU
360320131375Smrg		 * generation detection and assume that we have LLC on GEN6/7
360420131375Smrg		 */
360520131375Smrg		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
360620131375Smrg				IS_GEN7(bufmgr_gem->pci_device));
360720131375Smrg	} else
360820131375Smrg		bufmgr_gem->has_llc = *gp.value;
360920131375Smrg
361020131375Smrg	gp.param = I915_PARAM_HAS_VEBOX;
361120131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
361220131375Smrg	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
361320131375Smrg
361422944501Smrg	if (bufmgr_gem->gen < 4) {
361522944501Smrg		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
361622944501Smrg		gp.value = &bufmgr_gem->available_fences;
36176d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
361822944501Smrg		if (ret) {
361922944501Smrg			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
362022944501Smrg				errno);
362122944501Smrg			fprintf(stderr, "param: %d, val: %d\n", gp.param,
362222944501Smrg				*gp.value);
362322944501Smrg			bufmgr_gem->available_fences = 0;
362422944501Smrg		} else {
362522944501Smrg			/* XXX The kernel reports the total number of fences,
362622944501Smrg			 * including any that may be pinned.
362722944501Smrg			 *
362822944501Smrg			 * We presume that there will be at least one pinned
362922944501Smrg			 * fence for the scanout buffer, but there may be more
363022944501Smrg			 * than one scanout and the user may be manually
363122944501Smrg			 * pinning buffers. Let's move to execbuffer2 and
363222944501Smrg			 * thereby forget the insanity of using fences...
363322944501Smrg			 */
363422944501Smrg			bufmgr_gem->available_fences -= 2;
363522944501Smrg			if (bufmgr_gem->available_fences < 0)
363622944501Smrg				bufmgr_gem->available_fences = 0;
363722944501Smrg		}
363822944501Smrg	}
363922944501Smrg
364022944501Smrg	/* Let's go with one relocation per every 2 dwords (but round down a bit
364122944501Smrg	 * since a power of two will mean an extra page allocation for the reloc
364222944501Smrg	 * buffer).
364322944501Smrg	 *
364422944501Smrg	 * Every 4 was too few for the blender benchmark.
364522944501Smrg	 */
364622944501Smrg	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
364722944501Smrg
364822944501Smrg	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
364922944501Smrg	bufmgr_gem->bufmgr.bo_alloc_for_render =
365022944501Smrg	    drm_intel_gem_bo_alloc_for_render;
365122944501Smrg	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
365222944501Smrg	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
365322944501Smrg	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
365422944501Smrg	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
365522944501Smrg	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
365622944501Smrg	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
365722944501Smrg	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
365822944501Smrg	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
365922944501Smrg	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
366022944501Smrg	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
366122944501Smrg	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
366222944501Smrg	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
366322944501Smrg	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
366422944501Smrg	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
366522944501Smrg	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
366622944501Smrg	/* Use the new one if available */
3667aaba2545Smrg	if (exec2) {
366822944501Smrg		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
36699ce4edccSmrg		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3670aaba2545Smrg	} else
367122944501Smrg		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
367222944501Smrg	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
367322944501Smrg	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3674a884aba1Smrg	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
367522944501Smrg	bufmgr_gem->bufmgr.debug = 0;
367622944501Smrg	bufmgr_gem->bufmgr.check_aperture_space =
367722944501Smrg	    drm_intel_gem_check_aperture_space;
367822944501Smrg	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3679aaba2545Smrg	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
368022944501Smrg	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
368122944501Smrg	    drm_intel_gem_get_pipe_from_crtc_id;
368222944501Smrg	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
368322944501Smrg
368420131375Smrg	DRMINITLISTHEAD(&bufmgr_gem->named);
3685aaba2545Smrg	init_cache_buckets(bufmgr_gem);
368622944501Smrg
368720131375Smrg	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
368820131375Smrg	bufmgr_gem->vma_max = -1; /* unlimited by default */
368920131375Smrg
3690a884aba1Smrg	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3691a884aba1Smrg
3692a884aba1Smrgexit:
3693a884aba1Smrg	pthread_mutex_unlock(&bufmgr_list_mutex);
3694a884aba1Smrg
3695a884aba1Smrg	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
369622944501Smrg}
3697