intel_bufmgr_gem.c revision d82d45b3
122944501Smrg/**************************************************************************
222944501Smrg *
322944501Smrg * Copyright � 2007 Red Hat Inc.
420131375Smrg * Copyright � 2007-2012 Intel Corporation
522944501Smrg * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
622944501Smrg * All Rights Reserved.
722944501Smrg *
822944501Smrg * Permission is hereby granted, free of charge, to any person obtaining a
922944501Smrg * copy of this software and associated documentation files (the
1022944501Smrg * "Software"), to deal in the Software without restriction, including
1122944501Smrg * without limitation the rights to use, copy, modify, merge, publish,
1222944501Smrg * distribute, sub license, and/or sell copies of the Software, and to
1322944501Smrg * permit persons to whom the Software is furnished to do so, subject to
1422944501Smrg * the following conditions:
1522944501Smrg *
1622944501Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1722944501Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1822944501Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
1922944501Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
2022944501Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
2122944501Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2222944501Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
2322944501Smrg *
2422944501Smrg * The above copyright notice and this permission notice (including the
2522944501Smrg * next paragraph) shall be included in all copies or substantial portions
2622944501Smrg * of the Software.
2722944501Smrg *
2822944501Smrg *
2922944501Smrg **************************************************************************/
3022944501Smrg/*
3122944501Smrg * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
3222944501Smrg *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
3322944501Smrg *	    Eric Anholt <eric@anholt.net>
3422944501Smrg *	    Dave Airlie <airlied@linux.ie>
3522944501Smrg */
3622944501Smrg
3722944501Smrg#ifdef HAVE_CONFIG_H
3822944501Smrg#include "config.h"
3922944501Smrg#endif
4022944501Smrg
4122944501Smrg#include <xf86drm.h>
4222944501Smrg#include <xf86atomic.h>
4322944501Smrg#include <fcntl.h>
4422944501Smrg#include <stdio.h>
4522944501Smrg#include <stdlib.h>
4622944501Smrg#include <string.h>
4722944501Smrg#include <unistd.h>
4822944501Smrg#include <assert.h>
4922944501Smrg#include <pthread.h>
502e6867f6Smrg#include <stddef.h>
5122944501Smrg#include <sys/ioctl.h>
5222944501Smrg#include <sys/mman.h>
5322944501Smrg#include <sys/stat.h>
5422944501Smrg#include <sys/types.h>
5520131375Smrg#include <stdbool.h>
5622944501Smrg
5722944501Smrg#include "errno.h"
5820131375Smrg#ifndef ETIME
5920131375Smrg#define ETIME ETIMEDOUT
6020131375Smrg#endif
6122944501Smrg#include "libdrm_lists.h"
6222944501Smrg#include "intel_bufmgr.h"
6322944501Smrg#include "intel_bufmgr_priv.h"
6422944501Smrg#include "intel_chipset.h"
6520131375Smrg#include "intel_aub.h"
6622944501Smrg#include "string.h"
6722944501Smrg
6822944501Smrg#include "i915_drm.h"
6922944501Smrg
7020131375Smrg#ifdef HAVE_VALGRIND
7120131375Smrg#include <valgrind.h>
7220131375Smrg#include <memcheck.h>
7320131375Smrg#define VG(x) x
7420131375Smrg#else
7520131375Smrg#define VG(x)
7620131375Smrg#endif
7720131375Smrg
7820131375Smrg#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
7920131375Smrg
8022944501Smrg#define DBG(...) do {					\
8122944501Smrg	if (bufmgr_gem->bufmgr.debug)			\
8222944501Smrg		fprintf(stderr, __VA_ARGS__);		\
8322944501Smrg} while (0)
8422944501Smrg
85aaba2545Smrg#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
86aaba2545Smrg
8722944501Smrgtypedef struct _drm_intel_bo_gem drm_intel_bo_gem;
8822944501Smrg
8922944501Smrgstruct drm_intel_gem_bo_bucket {
9022944501Smrg	drmMMListHead head;
9122944501Smrg	unsigned long size;
9222944501Smrg};
9322944501Smrg
9422944501Smrgtypedef struct _drm_intel_bufmgr_gem {
9522944501Smrg	drm_intel_bufmgr bufmgr;
9622944501Smrg
9722944501Smrg	int fd;
9822944501Smrg
9922944501Smrg	int max_relocs;
10022944501Smrg
10122944501Smrg	pthread_mutex_t lock;
10222944501Smrg
10322944501Smrg	struct drm_i915_gem_exec_object *exec_objects;
10422944501Smrg	struct drm_i915_gem_exec_object2 *exec2_objects;
10522944501Smrg	drm_intel_bo **exec_bos;
10622944501Smrg	int exec_size;
10722944501Smrg	int exec_count;
10822944501Smrg
10922944501Smrg	/** Array of lists of cached gem objects of power-of-two sizes */
110aaba2545Smrg	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
111aaba2545Smrg	int num_buckets;
1126d98c517Smrg	time_t time;
11322944501Smrg
11420131375Smrg	drmMMListHead named;
11520131375Smrg	drmMMListHead vma_cache;
11620131375Smrg	int vma_count, vma_open, vma_max;
11720131375Smrg
11822944501Smrg	uint64_t gtt_size;
11922944501Smrg	int available_fences;
12022944501Smrg	int pci_device;
12122944501Smrg	int gen;
1229ce4edccSmrg	unsigned int has_bsd : 1;
1239ce4edccSmrg	unsigned int has_blt : 1;
1249ce4edccSmrg	unsigned int has_relaxed_fencing : 1;
12520131375Smrg	unsigned int has_llc : 1;
12620131375Smrg	unsigned int has_wait_timeout : 1;
1279ce4edccSmrg	unsigned int bo_reuse : 1;
12820131375Smrg	unsigned int no_exec : 1;
12920131375Smrg	unsigned int has_vebox : 1;
13020131375Smrg	bool fenced_relocs;
13120131375Smrg
13220131375Smrg	char *aub_filename;
13320131375Smrg	FILE *aub_file;
13420131375Smrg	uint32_t aub_offset;
13522944501Smrg} drm_intel_bufmgr_gem;
13622944501Smrg
13722944501Smrg#define DRM_INTEL_RELOC_FENCE (1<<0)
13822944501Smrg
13922944501Smrgtypedef struct _drm_intel_reloc_target_info {
14022944501Smrg	drm_intel_bo *bo;
14122944501Smrg	int flags;
14222944501Smrg} drm_intel_reloc_target;
14322944501Smrg
14422944501Smrgstruct _drm_intel_bo_gem {
14522944501Smrg	drm_intel_bo bo;
14622944501Smrg
14722944501Smrg	atomic_t refcount;
14822944501Smrg	uint32_t gem_handle;
14922944501Smrg	const char *name;
15022944501Smrg
15122944501Smrg	/**
15222944501Smrg	 * Kenel-assigned global name for this object
15320131375Smrg         *
15420131375Smrg         * List contains both flink named and prime fd'd objects
15522944501Smrg	 */
15622944501Smrg	unsigned int global_name;
15720131375Smrg	drmMMListHead name_list;
15822944501Smrg
15922944501Smrg	/**
16022944501Smrg	 * Index of the buffer within the validation list while preparing a
16122944501Smrg	 * batchbuffer execution.
16222944501Smrg	 */
16322944501Smrg	int validate_index;
16422944501Smrg
16522944501Smrg	/**
16622944501Smrg	 * Current tiling mode
16722944501Smrg	 */
16822944501Smrg	uint32_t tiling_mode;
16922944501Smrg	uint32_t swizzle_mode;
1706d98c517Smrg	unsigned long stride;
17122944501Smrg
17222944501Smrg	time_t free_time;
17322944501Smrg
17422944501Smrg	/** Array passed to the DRM containing relocation information. */
17522944501Smrg	struct drm_i915_gem_relocation_entry *relocs;
17622944501Smrg	/**
17722944501Smrg	 * Array of info structs corresponding to relocs[i].target_handle etc
17822944501Smrg	 */
17922944501Smrg	drm_intel_reloc_target *reloc_target_info;
18022944501Smrg	/** Number of entries in relocs */
18122944501Smrg	int reloc_count;
18222944501Smrg	/** Mapped address for the buffer, saved across map/unmap cycles */
18322944501Smrg	void *mem_virtual;
18422944501Smrg	/** GTT virtual address for the buffer, saved across map/unmap cycles */
18522944501Smrg	void *gtt_virtual;
18620131375Smrg	int map_count;
18720131375Smrg	drmMMListHead vma_list;
18822944501Smrg
18922944501Smrg	/** BO cache list */
19022944501Smrg	drmMMListHead head;
19122944501Smrg
19222944501Smrg	/**
19322944501Smrg	 * Boolean of whether this BO and its children have been included in
19422944501Smrg	 * the current drm_intel_bufmgr_check_aperture_space() total.
19522944501Smrg	 */
19620131375Smrg	bool included_in_check_aperture;
19722944501Smrg
19822944501Smrg	/**
19922944501Smrg	 * Boolean of whether this buffer has been used as a relocation
20022944501Smrg	 * target and had its size accounted for, and thus can't have any
20122944501Smrg	 * further relocations added to it.
20222944501Smrg	 */
20320131375Smrg	bool used_as_reloc_target;
20422944501Smrg
20522944501Smrg	/**
20622944501Smrg	 * Boolean of whether we have encountered an error whilst building the relocation tree.
20722944501Smrg	 */
20820131375Smrg	bool has_error;
20922944501Smrg
21022944501Smrg	/**
21122944501Smrg	 * Boolean of whether this buffer can be re-used
21222944501Smrg	 */
21320131375Smrg	bool reusable;
21420131375Smrg
21520131375Smrg	/**
21620131375Smrg	 * Boolean of whether the GPU is definitely not accessing the buffer.
21720131375Smrg	 *
21820131375Smrg	 * This is only valid when reusable, since non-reusable
21920131375Smrg	 * buffers are those that have been shared wth other
22020131375Smrg	 * processes, so we don't know their state.
22120131375Smrg	 */
22220131375Smrg	bool idle;
22322944501Smrg
22422944501Smrg	/**
22522944501Smrg	 * Size in bytes of this buffer and its relocation descendents.
22622944501Smrg	 *
22722944501Smrg	 * Used to avoid costly tree walking in
22822944501Smrg	 * drm_intel_bufmgr_check_aperture in the common case.
22922944501Smrg	 */
23022944501Smrg	int reloc_tree_size;
23122944501Smrg
23222944501Smrg	/**
23322944501Smrg	 * Number of potential fence registers required by this buffer and its
23422944501Smrg	 * relocations.
23522944501Smrg	 */
23622944501Smrg	int reloc_tree_fences;
23720131375Smrg
23820131375Smrg	/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
23920131375Smrg	bool mapped_cpu_write;
24020131375Smrg
24120131375Smrg	uint32_t aub_offset;
24220131375Smrg
24320131375Smrg	drm_intel_aub_annotation *aub_annotations;
24420131375Smrg	unsigned aub_annotation_count;
24522944501Smrg};
24622944501Smrg
24722944501Smrgstatic unsigned int
24822944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
24922944501Smrg
25022944501Smrgstatic unsigned int
25122944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
25222944501Smrg
25322944501Smrgstatic int
25422944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
25522944501Smrg			    uint32_t * swizzle_mode);
25622944501Smrg
25722944501Smrgstatic int
2586d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2596d98c517Smrg				     uint32_t tiling_mode,
2606d98c517Smrg				     uint32_t stride);
26122944501Smrg
26222944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
26322944501Smrg						      time_t time);
26422944501Smrg
26522944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
26622944501Smrg
26722944501Smrgstatic void drm_intel_gem_bo_free(drm_intel_bo *bo);
26822944501Smrg
26922944501Smrgstatic unsigned long
27022944501Smrgdrm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
27122944501Smrg			   uint32_t *tiling_mode)
27222944501Smrg{
27322944501Smrg	unsigned long min_size, max_size;
27422944501Smrg	unsigned long i;
27522944501Smrg
27622944501Smrg	if (*tiling_mode == I915_TILING_NONE)
27722944501Smrg		return size;
27822944501Smrg
27922944501Smrg	/* 965+ just need multiples of page size for tiling */
28022944501Smrg	if (bufmgr_gem->gen >= 4)
28122944501Smrg		return ROUND_UP_TO(size, 4096);
28222944501Smrg
28322944501Smrg	/* Older chips need powers of two, of at least 512k or 1M */
28422944501Smrg	if (bufmgr_gem->gen == 3) {
28522944501Smrg		min_size = 1024*1024;
28622944501Smrg		max_size = 128*1024*1024;
28722944501Smrg	} else {
28822944501Smrg		min_size = 512*1024;
28922944501Smrg		max_size = 64*1024*1024;
29022944501Smrg	}
29122944501Smrg
29222944501Smrg	if (size > max_size) {
29322944501Smrg		*tiling_mode = I915_TILING_NONE;
29422944501Smrg		return size;
29522944501Smrg	}
29622944501Smrg
2979ce4edccSmrg	/* Do we need to allocate every page for the fence? */
2989ce4edccSmrg	if (bufmgr_gem->has_relaxed_fencing)
2999ce4edccSmrg		return ROUND_UP_TO(size, 4096);
3009ce4edccSmrg
30122944501Smrg	for (i = min_size; i < size; i <<= 1)
30222944501Smrg		;
30322944501Smrg
30422944501Smrg	return i;
30522944501Smrg}
30622944501Smrg
30722944501Smrg/*
30822944501Smrg * Round a given pitch up to the minimum required for X tiling on a
30922944501Smrg * given chip.  We use 512 as the minimum to allow for a later tiling
31022944501Smrg * change.
31122944501Smrg */
31222944501Smrgstatic unsigned long
31322944501Smrgdrm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
3146d98c517Smrg			    unsigned long pitch, uint32_t *tiling_mode)
31522944501Smrg{
31622944501Smrg	unsigned long tile_width;
31722944501Smrg	unsigned long i;
31822944501Smrg
31922944501Smrg	/* If untiled, then just align it so that we can do rendering
32022944501Smrg	 * to it with the 3D engine.
32122944501Smrg	 */
3226d98c517Smrg	if (*tiling_mode == I915_TILING_NONE)
32322944501Smrg		return ALIGN(pitch, 64);
32422944501Smrg
32520131375Smrg	if (*tiling_mode == I915_TILING_X
32620131375Smrg			|| (IS_915(bufmgr_gem->pci_device)
32720131375Smrg			    && *tiling_mode == I915_TILING_Y))
32822944501Smrg		tile_width = 512;
32922944501Smrg	else
33022944501Smrg		tile_width = 128;
33122944501Smrg
33222944501Smrg	/* 965 is flexible */
33322944501Smrg	if (bufmgr_gem->gen >= 4)
33422944501Smrg		return ROUND_UP_TO(pitch, tile_width);
33522944501Smrg
3366d98c517Smrg	/* The older hardware has a maximum pitch of 8192 with tiled
3376d98c517Smrg	 * surfaces, so fallback to untiled if it's too large.
3386d98c517Smrg	 */
3396d98c517Smrg	if (pitch > 8192) {
3406d98c517Smrg		*tiling_mode = I915_TILING_NONE;
3416d98c517Smrg		return ALIGN(pitch, 64);
3426d98c517Smrg	}
3436d98c517Smrg
34422944501Smrg	/* Pre-965 needs power of two tile width */
34522944501Smrg	for (i = tile_width; i < pitch; i <<= 1)
34622944501Smrg		;
34722944501Smrg
34822944501Smrg	return i;
34922944501Smrg}
35022944501Smrg
35122944501Smrgstatic struct drm_intel_gem_bo_bucket *
35222944501Smrgdrm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
35322944501Smrg				 unsigned long size)
35422944501Smrg{
35522944501Smrg	int i;
35622944501Smrg
357aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
35822944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
35922944501Smrg		    &bufmgr_gem->cache_bucket[i];
36022944501Smrg		if (bucket->size >= size) {
36122944501Smrg			return bucket;
36222944501Smrg		}
36322944501Smrg	}
36422944501Smrg
36522944501Smrg	return NULL;
36622944501Smrg}
36722944501Smrg
36822944501Smrgstatic void
36922944501Smrgdrm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
37022944501Smrg{
37122944501Smrg	int i, j;
37222944501Smrg
37322944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
37422944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
37522944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
37622944501Smrg
37722944501Smrg		if (bo_gem->relocs == NULL) {
37822944501Smrg			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
37922944501Smrg			    bo_gem->name);
38022944501Smrg			continue;
38122944501Smrg		}
38222944501Smrg
38322944501Smrg		for (j = 0; j < bo_gem->reloc_count; j++) {
38422944501Smrg			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
38522944501Smrg			drm_intel_bo_gem *target_gem =
38622944501Smrg			    (drm_intel_bo_gem *) target_bo;
38722944501Smrg
38822944501Smrg			DBG("%2d: %d (%s)@0x%08llx -> "
389d82d45b3Sjoerg			    "%d (%s)@0x%08llx + 0x%08x\n",
39022944501Smrg			    i,
39122944501Smrg			    bo_gem->gem_handle, bo_gem->name,
39222944501Smrg			    (unsigned long long)bo_gem->relocs[j].offset,
39322944501Smrg			    target_gem->gem_handle,
39422944501Smrg			    target_gem->name,
395d82d45b3Sjoerg			    (unsigned long long)target_bo->offset64,
39622944501Smrg			    bo_gem->relocs[j].delta);
39722944501Smrg		}
39822944501Smrg	}
39922944501Smrg}
40022944501Smrg
40122944501Smrgstatic inline void
40222944501Smrgdrm_intel_gem_bo_reference(drm_intel_bo *bo)
40322944501Smrg{
40422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
40522944501Smrg
40622944501Smrg	atomic_inc(&bo_gem->refcount);
40722944501Smrg}
40822944501Smrg
40922944501Smrg/**
41022944501Smrg * Adds the given buffer to the list of buffers to be validated (moved into the
41122944501Smrg * appropriate memory type) with the next batch submission.
41222944501Smrg *
41322944501Smrg * If a buffer is validated multiple times in a batch submission, it ends up
41422944501Smrg * with the intersection of the memory type flags and the union of the
41522944501Smrg * access flags.
41622944501Smrg */
41722944501Smrgstatic void
41822944501Smrgdrm_intel_add_validate_buffer(drm_intel_bo *bo)
41922944501Smrg{
42022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
42122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
42222944501Smrg	int index;
42322944501Smrg
42422944501Smrg	if (bo_gem->validate_index != -1)
42522944501Smrg		return;
42622944501Smrg
42722944501Smrg	/* Extend the array of validation entries as necessary. */
42822944501Smrg	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
42922944501Smrg		int new_size = bufmgr_gem->exec_size * 2;
43022944501Smrg
43122944501Smrg		if (new_size == 0)
43222944501Smrg			new_size = 5;
43322944501Smrg
43422944501Smrg		bufmgr_gem->exec_objects =
43522944501Smrg		    realloc(bufmgr_gem->exec_objects,
43622944501Smrg			    sizeof(*bufmgr_gem->exec_objects) * new_size);
43722944501Smrg		bufmgr_gem->exec_bos =
43822944501Smrg		    realloc(bufmgr_gem->exec_bos,
43922944501Smrg			    sizeof(*bufmgr_gem->exec_bos) * new_size);
44022944501Smrg		bufmgr_gem->exec_size = new_size;
44122944501Smrg	}
44222944501Smrg
44322944501Smrg	index = bufmgr_gem->exec_count;
44422944501Smrg	bo_gem->validate_index = index;
44522944501Smrg	/* Fill in array entry */
44622944501Smrg	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
44722944501Smrg	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
44822944501Smrg	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
44922944501Smrg	bufmgr_gem->exec_objects[index].alignment = 0;
45022944501Smrg	bufmgr_gem->exec_objects[index].offset = 0;
45122944501Smrg	bufmgr_gem->exec_bos[index] = bo;
45222944501Smrg	bufmgr_gem->exec_count++;
45322944501Smrg}
45422944501Smrg
45522944501Smrgstatic void
45622944501Smrgdrm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
45722944501Smrg{
45822944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
45922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
46022944501Smrg	int index;
46122944501Smrg
46222944501Smrg	if (bo_gem->validate_index != -1) {
46322944501Smrg		if (need_fence)
46422944501Smrg			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
46522944501Smrg				EXEC_OBJECT_NEEDS_FENCE;
46622944501Smrg		return;
46722944501Smrg	}
46822944501Smrg
46922944501Smrg	/* Extend the array of validation entries as necessary. */
47022944501Smrg	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
47122944501Smrg		int new_size = bufmgr_gem->exec_size * 2;
47222944501Smrg
47322944501Smrg		if (new_size == 0)
47422944501Smrg			new_size = 5;
47522944501Smrg
47622944501Smrg		bufmgr_gem->exec2_objects =
47722944501Smrg			realloc(bufmgr_gem->exec2_objects,
47822944501Smrg				sizeof(*bufmgr_gem->exec2_objects) * new_size);
47922944501Smrg		bufmgr_gem->exec_bos =
48022944501Smrg			realloc(bufmgr_gem->exec_bos,
48122944501Smrg				sizeof(*bufmgr_gem->exec_bos) * new_size);
48222944501Smrg		bufmgr_gem->exec_size = new_size;
48322944501Smrg	}
48422944501Smrg
48522944501Smrg	index = bufmgr_gem->exec_count;
48622944501Smrg	bo_gem->validate_index = index;
48722944501Smrg	/* Fill in array entry */
48822944501Smrg	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
48922944501Smrg	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
49022944501Smrg	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
49122944501Smrg	bufmgr_gem->exec2_objects[index].alignment = 0;
49222944501Smrg	bufmgr_gem->exec2_objects[index].offset = 0;
49322944501Smrg	bufmgr_gem->exec_bos[index] = bo;
49422944501Smrg	bufmgr_gem->exec2_objects[index].flags = 0;
49522944501Smrg	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
49622944501Smrg	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
49722944501Smrg	if (need_fence) {
49822944501Smrg		bufmgr_gem->exec2_objects[index].flags |=
49922944501Smrg			EXEC_OBJECT_NEEDS_FENCE;
50022944501Smrg	}
50122944501Smrg	bufmgr_gem->exec_count++;
50222944501Smrg}
50322944501Smrg
50422944501Smrg#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
50522944501Smrg	sizeof(uint32_t))
50622944501Smrg
50722944501Smrgstatic void
50822944501Smrgdrm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
50922944501Smrg				      drm_intel_bo_gem *bo_gem)
51022944501Smrg{
51122944501Smrg	int size;
51222944501Smrg
51322944501Smrg	assert(!bo_gem->used_as_reloc_target);
51422944501Smrg
51522944501Smrg	/* The older chipsets are far-less flexible in terms of tiling,
51622944501Smrg	 * and require tiled buffer to be size aligned in the aperture.
51722944501Smrg	 * This means that in the worst possible case we will need a hole
51822944501Smrg	 * twice as large as the object in order for it to fit into the
51922944501Smrg	 * aperture. Optimal packing is for wimps.
52022944501Smrg	 */
52122944501Smrg	size = bo_gem->bo.size;
5229ce4edccSmrg	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
5239ce4edccSmrg		int min_size;
5249ce4edccSmrg
5259ce4edccSmrg		if (bufmgr_gem->has_relaxed_fencing) {
5269ce4edccSmrg			if (bufmgr_gem->gen == 3)
5279ce4edccSmrg				min_size = 1024*1024;
5289ce4edccSmrg			else
5299ce4edccSmrg				min_size = 512*1024;
5309ce4edccSmrg
5319ce4edccSmrg			while (min_size < size)
5329ce4edccSmrg				min_size *= 2;
5339ce4edccSmrg		} else
5349ce4edccSmrg			min_size = size;
5359ce4edccSmrg
5369ce4edccSmrg		/* Account for worst-case alignment. */
5379ce4edccSmrg		size = 2 * min_size;
5389ce4edccSmrg	}
53922944501Smrg
54022944501Smrg	bo_gem->reloc_tree_size = size;
54122944501Smrg}
54222944501Smrg
54322944501Smrgstatic int
54422944501Smrgdrm_intel_setup_reloc_list(drm_intel_bo *bo)
54522944501Smrg{
54622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
54722944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
54822944501Smrg	unsigned int max_relocs = bufmgr_gem->max_relocs;
54922944501Smrg
55022944501Smrg	if (bo->size / 4 < max_relocs)
55122944501Smrg		max_relocs = bo->size / 4;
55222944501Smrg
55322944501Smrg	bo_gem->relocs = malloc(max_relocs *
55422944501Smrg				sizeof(struct drm_i915_gem_relocation_entry));
55522944501Smrg	bo_gem->reloc_target_info = malloc(max_relocs *
556aaba2545Smrg					   sizeof(drm_intel_reloc_target));
55722944501Smrg	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
55820131375Smrg		bo_gem->has_error = true;
55922944501Smrg
56022944501Smrg		free (bo_gem->relocs);
56122944501Smrg		bo_gem->relocs = NULL;
56222944501Smrg
56322944501Smrg		free (bo_gem->reloc_target_info);
56422944501Smrg		bo_gem->reloc_target_info = NULL;
56522944501Smrg
56622944501Smrg		return 1;
56722944501Smrg	}
56822944501Smrg
56922944501Smrg	return 0;
57022944501Smrg}
57122944501Smrg
57222944501Smrgstatic int
57322944501Smrgdrm_intel_gem_bo_busy(drm_intel_bo *bo)
57422944501Smrg{
57522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
57622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
57722944501Smrg	struct drm_i915_gem_busy busy;
57822944501Smrg	int ret;
57922944501Smrg
58020131375Smrg	if (bo_gem->reusable && bo_gem->idle)
58120131375Smrg		return false;
58220131375Smrg
58320131375Smrg	VG_CLEAR(busy);
58422944501Smrg	busy.handle = bo_gem->gem_handle;
58522944501Smrg
5866d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
58720131375Smrg	if (ret == 0) {
58820131375Smrg		bo_gem->idle = !busy.busy;
58920131375Smrg		return busy.busy;
59020131375Smrg	} else {
59120131375Smrg		return false;
59220131375Smrg	}
59322944501Smrg	return (ret == 0 && busy.busy);
59422944501Smrg}
59522944501Smrg
59622944501Smrgstatic int
59722944501Smrgdrm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
59822944501Smrg				  drm_intel_bo_gem *bo_gem, int state)
59922944501Smrg{
60022944501Smrg	struct drm_i915_gem_madvise madv;
60122944501Smrg
60220131375Smrg	VG_CLEAR(madv);
60322944501Smrg	madv.handle = bo_gem->gem_handle;
60422944501Smrg	madv.madv = state;
60522944501Smrg	madv.retained = 1;
6066d98c517Smrg	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
60722944501Smrg
60822944501Smrg	return madv.retained;
60922944501Smrg}
61022944501Smrg
61122944501Smrgstatic int
61222944501Smrgdrm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
61322944501Smrg{
61422944501Smrg	return drm_intel_gem_bo_madvise_internal
61522944501Smrg		((drm_intel_bufmgr_gem *) bo->bufmgr,
61622944501Smrg		 (drm_intel_bo_gem *) bo,
61722944501Smrg		 madv);
61822944501Smrg}
61922944501Smrg
62022944501Smrg/* drop the oldest entries that have been purged by the kernel */
62122944501Smrgstatic void
62222944501Smrgdrm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
62322944501Smrg				    struct drm_intel_gem_bo_bucket *bucket)
62422944501Smrg{
62522944501Smrg	while (!DRMLISTEMPTY(&bucket->head)) {
62622944501Smrg		drm_intel_bo_gem *bo_gem;
62722944501Smrg
62822944501Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
62922944501Smrg				      bucket->head.next, head);
63022944501Smrg		if (drm_intel_gem_bo_madvise_internal
63122944501Smrg		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
63222944501Smrg			break;
63322944501Smrg
63422944501Smrg		DRMLISTDEL(&bo_gem->head);
63522944501Smrg		drm_intel_gem_bo_free(&bo_gem->bo);
63622944501Smrg	}
63722944501Smrg}
63822944501Smrg
63922944501Smrgstatic drm_intel_bo *
64022944501Smrgdrm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
64122944501Smrg				const char *name,
64222944501Smrg				unsigned long size,
6436d98c517Smrg				unsigned long flags,
6446d98c517Smrg				uint32_t tiling_mode,
6456d98c517Smrg				unsigned long stride)
64622944501Smrg{
64722944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
64822944501Smrg	drm_intel_bo_gem *bo_gem;
64922944501Smrg	unsigned int page_size = getpagesize();
65022944501Smrg	int ret;
65122944501Smrg	struct drm_intel_gem_bo_bucket *bucket;
65220131375Smrg	bool alloc_from_cache;
65322944501Smrg	unsigned long bo_size;
65420131375Smrg	bool for_render = false;
65522944501Smrg
65622944501Smrg	if (flags & BO_ALLOC_FOR_RENDER)
65720131375Smrg		for_render = true;
65822944501Smrg
65922944501Smrg	/* Round the allocated size up to a power of two number of pages. */
66022944501Smrg	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
66122944501Smrg
66222944501Smrg	/* If we don't have caching at this size, don't actually round the
66322944501Smrg	 * allocation up.
66422944501Smrg	 */
66522944501Smrg	if (bucket == NULL) {
66622944501Smrg		bo_size = size;
66722944501Smrg		if (bo_size < page_size)
66822944501Smrg			bo_size = page_size;
66922944501Smrg	} else {
67022944501Smrg		bo_size = bucket->size;
67122944501Smrg	}
67222944501Smrg
67322944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
67422944501Smrg	/* Get a buffer out of the cache if available */
67522944501Smrgretry:
67620131375Smrg	alloc_from_cache = false;
67722944501Smrg	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
67822944501Smrg		if (for_render) {
67922944501Smrg			/* Allocate new render-target BOs from the tail (MRU)
68022944501Smrg			 * of the list, as it will likely be hot in the GPU
68122944501Smrg			 * cache and in the aperture for us.
68222944501Smrg			 */
68322944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
68422944501Smrg					      bucket->head.prev, head);
68522944501Smrg			DRMLISTDEL(&bo_gem->head);
68620131375Smrg			alloc_from_cache = true;
68722944501Smrg		} else {
68822944501Smrg			/* For non-render-target BOs (where we're probably
68922944501Smrg			 * going to map it first thing in order to fill it
69022944501Smrg			 * with data), check if the last BO in the cache is
69122944501Smrg			 * unbusy, and only reuse in that case. Otherwise,
69222944501Smrg			 * allocating a new buffer is probably faster than
69322944501Smrg			 * waiting for the GPU to finish.
69422944501Smrg			 */
69522944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
69622944501Smrg					      bucket->head.next, head);
69722944501Smrg			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
69820131375Smrg				alloc_from_cache = true;
69922944501Smrg				DRMLISTDEL(&bo_gem->head);
70022944501Smrg			}
70122944501Smrg		}
70222944501Smrg
70322944501Smrg		if (alloc_from_cache) {
70422944501Smrg			if (!drm_intel_gem_bo_madvise_internal
70522944501Smrg			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
70622944501Smrg				drm_intel_gem_bo_free(&bo_gem->bo);
70722944501Smrg				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
70822944501Smrg								    bucket);
70922944501Smrg				goto retry;
71022944501Smrg			}
7116d98c517Smrg
7126d98c517Smrg			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
7136d98c517Smrg								 tiling_mode,
7146d98c517Smrg								 stride)) {
7156d98c517Smrg				drm_intel_gem_bo_free(&bo_gem->bo);
7166d98c517Smrg				goto retry;
7176d98c517Smrg			}
71822944501Smrg		}
71922944501Smrg	}
72022944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
72122944501Smrg
72222944501Smrg	if (!alloc_from_cache) {
72322944501Smrg		struct drm_i915_gem_create create;
72422944501Smrg
72522944501Smrg		bo_gem = calloc(1, sizeof(*bo_gem));
72622944501Smrg		if (!bo_gem)
72722944501Smrg			return NULL;
72822944501Smrg
72922944501Smrg		bo_gem->bo.size = bo_size;
73020131375Smrg
73120131375Smrg		VG_CLEAR(create);
73222944501Smrg		create.size = bo_size;
73322944501Smrg
7346d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd,
7356d98c517Smrg			       DRM_IOCTL_I915_GEM_CREATE,
7366d98c517Smrg			       &create);
73722944501Smrg		bo_gem->gem_handle = create.handle;
73822944501Smrg		bo_gem->bo.handle = bo_gem->gem_handle;
73922944501Smrg		if (ret != 0) {
74022944501Smrg			free(bo_gem);
74122944501Smrg			return NULL;
74222944501Smrg		}
74322944501Smrg		bo_gem->bo.bufmgr = bufmgr;
7446d98c517Smrg
7456d98c517Smrg		bo_gem->tiling_mode = I915_TILING_NONE;
7466d98c517Smrg		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
7476d98c517Smrg		bo_gem->stride = 0;
7486d98c517Smrg
7496d98c517Smrg		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
7506d98c517Smrg							 tiling_mode,
7516d98c517Smrg							 stride)) {
7526d98c517Smrg		    drm_intel_gem_bo_free(&bo_gem->bo);
7536d98c517Smrg		    return NULL;
7546d98c517Smrg		}
75520131375Smrg
75620131375Smrg		DRMINITLISTHEAD(&bo_gem->name_list);
75720131375Smrg		DRMINITLISTHEAD(&bo_gem->vma_list);
75822944501Smrg	}
75922944501Smrg
76022944501Smrg	bo_gem->name = name;
76122944501Smrg	atomic_set(&bo_gem->refcount, 1);
76222944501Smrg	bo_gem->validate_index = -1;
76322944501Smrg	bo_gem->reloc_tree_fences = 0;
76420131375Smrg	bo_gem->used_as_reloc_target = false;
76520131375Smrg	bo_gem->has_error = false;
76620131375Smrg	bo_gem->reusable = true;
76720131375Smrg	bo_gem->aub_annotations = NULL;
76820131375Smrg	bo_gem->aub_annotation_count = 0;
76922944501Smrg
77022944501Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
77122944501Smrg
77222944501Smrg	DBG("bo_create: buf %d (%s) %ldb\n",
77322944501Smrg	    bo_gem->gem_handle, bo_gem->name, size);
77422944501Smrg
77522944501Smrg	return &bo_gem->bo;
77622944501Smrg}
77722944501Smrg
77822944501Smrgstatic drm_intel_bo *
77922944501Smrgdrm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
78022944501Smrg				  const char *name,
78122944501Smrg				  unsigned long size,
78222944501Smrg				  unsigned int alignment)
78322944501Smrg{
78422944501Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
7856d98c517Smrg					       BO_ALLOC_FOR_RENDER,
7866d98c517Smrg					       I915_TILING_NONE, 0);
78722944501Smrg}
78822944501Smrg
78922944501Smrgstatic drm_intel_bo *
79022944501Smrgdrm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
79122944501Smrg		       const char *name,
79222944501Smrg		       unsigned long size,
79322944501Smrg		       unsigned int alignment)
79422944501Smrg{
7956d98c517Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
7966d98c517Smrg					       I915_TILING_NONE, 0);
79722944501Smrg}
79822944501Smrg
79922944501Smrgstatic drm_intel_bo *
80022944501Smrgdrm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
80122944501Smrg			     int x, int y, int cpp, uint32_t *tiling_mode,
80222944501Smrg			     unsigned long *pitch, unsigned long flags)
80322944501Smrg{
80422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
805aaba2545Smrg	unsigned long size, stride;
806aaba2545Smrg	uint32_t tiling;
80722944501Smrg
808aaba2545Smrg	do {
80920131375Smrg		unsigned long aligned_y, height_alignment;
810aaba2545Smrg
811aaba2545Smrg		tiling = *tiling_mode;
812aaba2545Smrg
813aaba2545Smrg		/* If we're tiled, our allocations are in 8 or 32-row blocks,
814aaba2545Smrg		 * so failure to align our height means that we won't allocate
815aaba2545Smrg		 * enough pages.
816aaba2545Smrg		 *
817aaba2545Smrg		 * If we're untiled, we still have to align to 2 rows high
818aaba2545Smrg		 * because the data port accesses 2x2 blocks even if the
819aaba2545Smrg		 * bottom row isn't to be rendered, so failure to align means
820aaba2545Smrg		 * we could walk off the end of the GTT and fault.  This is
821aaba2545Smrg		 * documented on 965, and may be the case on older chipsets
822aaba2545Smrg		 * too so we try to be careful.
823aaba2545Smrg		 */
824aaba2545Smrg		aligned_y = y;
82520131375Smrg		height_alignment = 2;
82620131375Smrg
82720131375Smrg		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
82820131375Smrg			height_alignment = 16;
82920131375Smrg		else if (tiling == I915_TILING_X
83020131375Smrg			|| (IS_915(bufmgr_gem->pci_device)
83120131375Smrg			    && tiling == I915_TILING_Y))
83220131375Smrg			height_alignment = 8;
833aaba2545Smrg		else if (tiling == I915_TILING_Y)
83420131375Smrg			height_alignment = 32;
83520131375Smrg		aligned_y = ALIGN(y, height_alignment);
836aaba2545Smrg
837aaba2545Smrg		stride = x * cpp;
8386d98c517Smrg		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
839aaba2545Smrg		size = stride * aligned_y;
840aaba2545Smrg		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
841aaba2545Smrg	} while (*tiling_mode != tiling);
84222944501Smrg	*pitch = stride;
84322944501Smrg
8446d98c517Smrg	if (tiling == I915_TILING_NONE)
8456d98c517Smrg		stride = 0;
8466d98c517Smrg
8476d98c517Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
8486d98c517Smrg					       tiling, stride);
84922944501Smrg}
85022944501Smrg
85122944501Smrg/**
85222944501Smrg * Returns a drm_intel_bo wrapping the given buffer object handle.
85322944501Smrg *
85422944501Smrg * This can be used when one application needs to pass a buffer object
85522944501Smrg * to another.
85622944501Smrg */
85722944501Smrgdrm_intel_bo *
85822944501Smrgdrm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
85922944501Smrg				  const char *name,
86022944501Smrg				  unsigned int handle)
86122944501Smrg{
86222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
86322944501Smrg	drm_intel_bo_gem *bo_gem;
86422944501Smrg	int ret;
86522944501Smrg	struct drm_gem_open open_arg;
86622944501Smrg	struct drm_i915_gem_get_tiling get_tiling;
86720131375Smrg	drmMMListHead *list;
86822944501Smrg
86920131375Smrg	/* At the moment most applications only have a few named bo.
87020131375Smrg	 * For instance, in a DRI client only the render buffers passed
87120131375Smrg	 * between X and the client are named. And since X returns the
87220131375Smrg	 * alternating names for the front/back buffer a linear search
87320131375Smrg	 * provides a sufficiently fast match.
87420131375Smrg	 */
87520131375Smrg	for (list = bufmgr_gem->named.next;
87620131375Smrg	     list != &bufmgr_gem->named;
87720131375Smrg	     list = list->next) {
87820131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
87920131375Smrg		if (bo_gem->global_name == handle) {
88020131375Smrg			drm_intel_gem_bo_reference(&bo_gem->bo);
88120131375Smrg			return &bo_gem->bo;
88220131375Smrg		}
88320131375Smrg	}
88422944501Smrg
88520131375Smrg	VG_CLEAR(open_arg);
88622944501Smrg	open_arg.name = handle;
8876d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
8886d98c517Smrg		       DRM_IOCTL_GEM_OPEN,
8896d98c517Smrg		       &open_arg);
89022944501Smrg	if (ret != 0) {
8919ce4edccSmrg		DBG("Couldn't reference %s handle 0x%08x: %s\n",
8929ce4edccSmrg		    name, handle, strerror(errno));
89322944501Smrg		return NULL;
89422944501Smrg	}
89520131375Smrg        /* Now see if someone has used a prime handle to get this
89620131375Smrg         * object from the kernel before by looking through the list
89720131375Smrg         * again for a matching gem_handle
89820131375Smrg         */
89920131375Smrg	for (list = bufmgr_gem->named.next;
90020131375Smrg	     list != &bufmgr_gem->named;
90120131375Smrg	     list = list->next) {
90220131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
90320131375Smrg		if (bo_gem->gem_handle == open_arg.handle) {
90420131375Smrg			drm_intel_gem_bo_reference(&bo_gem->bo);
90520131375Smrg			return &bo_gem->bo;
90620131375Smrg		}
90720131375Smrg	}
90820131375Smrg
90920131375Smrg	bo_gem = calloc(1, sizeof(*bo_gem));
91020131375Smrg	if (!bo_gem)
91120131375Smrg		return NULL;
91220131375Smrg
91322944501Smrg	bo_gem->bo.size = open_arg.size;
91422944501Smrg	bo_gem->bo.offset = 0;
91520131375Smrg	bo_gem->bo.offset64 = 0;
91622944501Smrg	bo_gem->bo.virtual = NULL;
91722944501Smrg	bo_gem->bo.bufmgr = bufmgr;
91822944501Smrg	bo_gem->name = name;
91922944501Smrg	atomic_set(&bo_gem->refcount, 1);
92022944501Smrg	bo_gem->validate_index = -1;
92122944501Smrg	bo_gem->gem_handle = open_arg.handle;
92220131375Smrg	bo_gem->bo.handle = open_arg.handle;
92322944501Smrg	bo_gem->global_name = handle;
92420131375Smrg	bo_gem->reusable = false;
92522944501Smrg
92620131375Smrg	VG_CLEAR(get_tiling);
92722944501Smrg	get_tiling.handle = bo_gem->gem_handle;
9286d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
9296d98c517Smrg		       DRM_IOCTL_I915_GEM_GET_TILING,
9306d98c517Smrg		       &get_tiling);
93122944501Smrg	if (ret != 0) {
93222944501Smrg		drm_intel_gem_bo_unreference(&bo_gem->bo);
93322944501Smrg		return NULL;
93422944501Smrg	}
93522944501Smrg	bo_gem->tiling_mode = get_tiling.tiling_mode;
93622944501Smrg	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
9376d98c517Smrg	/* XXX stride is unknown */
93822944501Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
93922944501Smrg
94020131375Smrg	DRMINITLISTHEAD(&bo_gem->vma_list);
94120131375Smrg	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
94222944501Smrg	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
94322944501Smrg
94422944501Smrg	return &bo_gem->bo;
94522944501Smrg}
94622944501Smrg
94722944501Smrgstatic void
94822944501Smrgdrm_intel_gem_bo_free(drm_intel_bo *bo)
94922944501Smrg{
95022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
95122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
95222944501Smrg	struct drm_gem_close close;
95322944501Smrg	int ret;
95422944501Smrg
95520131375Smrg	DRMLISTDEL(&bo_gem->vma_list);
95620131375Smrg	if (bo_gem->mem_virtual) {
95720131375Smrg		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
95822944501Smrg		munmap(bo_gem->mem_virtual, bo_gem->bo.size);
95920131375Smrg		bufmgr_gem->vma_count--;
96020131375Smrg	}
96120131375Smrg	if (bo_gem->gtt_virtual) {
96222944501Smrg		munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
96320131375Smrg		bufmgr_gem->vma_count--;
96420131375Smrg	}
96522944501Smrg
96622944501Smrg	/* Close this object */
96720131375Smrg	VG_CLEAR(close);
96822944501Smrg	close.handle = bo_gem->gem_handle;
9696d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
97022944501Smrg	if (ret != 0) {
9719ce4edccSmrg		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
9729ce4edccSmrg		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
97322944501Smrg	}
97420131375Smrg	free(bo_gem->aub_annotations);
97522944501Smrg	free(bo);
97622944501Smrg}
97722944501Smrg
97820131375Smrgstatic void
97920131375Smrgdrm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
98020131375Smrg{
98120131375Smrg#if HAVE_VALGRIND
98220131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
98320131375Smrg
98420131375Smrg	if (bo_gem->mem_virtual)
98520131375Smrg		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
98620131375Smrg
98720131375Smrg	if (bo_gem->gtt_virtual)
98820131375Smrg		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
98920131375Smrg#endif
99020131375Smrg}
99120131375Smrg
99222944501Smrg/** Frees all cached buffers significantly older than @time. */
99322944501Smrgstatic void
99422944501Smrgdrm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
99522944501Smrg{
99622944501Smrg	int i;
99722944501Smrg
9986d98c517Smrg	if (bufmgr_gem->time == time)
9996d98c517Smrg		return;
10006d98c517Smrg
1001aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
100222944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
100322944501Smrg		    &bufmgr_gem->cache_bucket[i];
100422944501Smrg
100522944501Smrg		while (!DRMLISTEMPTY(&bucket->head)) {
100622944501Smrg			drm_intel_bo_gem *bo_gem;
100722944501Smrg
100822944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
100922944501Smrg					      bucket->head.next, head);
101022944501Smrg			if (time - bo_gem->free_time <= 1)
101122944501Smrg				break;
101222944501Smrg
101322944501Smrg			DRMLISTDEL(&bo_gem->head);
101422944501Smrg
101522944501Smrg			drm_intel_gem_bo_free(&bo_gem->bo);
101622944501Smrg		}
101722944501Smrg	}
10186d98c517Smrg
10196d98c517Smrg	bufmgr_gem->time = time;
102022944501Smrg}
102122944501Smrg
102220131375Smrgstatic void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
102320131375Smrg{
102420131375Smrg	int limit;
102520131375Smrg
102620131375Smrg	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
102720131375Smrg	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
102820131375Smrg
102920131375Smrg	if (bufmgr_gem->vma_max < 0)
103020131375Smrg		return;
103120131375Smrg
103220131375Smrg	/* We may need to evict a few entries in order to create new mmaps */
103320131375Smrg	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
103420131375Smrg	if (limit < 0)
103520131375Smrg		limit = 0;
103620131375Smrg
103720131375Smrg	while (bufmgr_gem->vma_count > limit) {
103820131375Smrg		drm_intel_bo_gem *bo_gem;
103920131375Smrg
104020131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
104120131375Smrg				      bufmgr_gem->vma_cache.next,
104220131375Smrg				      vma_list);
104320131375Smrg		assert(bo_gem->map_count == 0);
104420131375Smrg		DRMLISTDELINIT(&bo_gem->vma_list);
104520131375Smrg
104620131375Smrg		if (bo_gem->mem_virtual) {
104720131375Smrg			munmap(bo_gem->mem_virtual, bo_gem->bo.size);
104820131375Smrg			bo_gem->mem_virtual = NULL;
104920131375Smrg			bufmgr_gem->vma_count--;
105020131375Smrg		}
105120131375Smrg		if (bo_gem->gtt_virtual) {
105220131375Smrg			munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
105320131375Smrg			bo_gem->gtt_virtual = NULL;
105420131375Smrg			bufmgr_gem->vma_count--;
105520131375Smrg		}
105620131375Smrg	}
105720131375Smrg}
105820131375Smrg
105920131375Smrgstatic void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
106020131375Smrg				       drm_intel_bo_gem *bo_gem)
106120131375Smrg{
106220131375Smrg	bufmgr_gem->vma_open--;
106320131375Smrg	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
106420131375Smrg	if (bo_gem->mem_virtual)
106520131375Smrg		bufmgr_gem->vma_count++;
106620131375Smrg	if (bo_gem->gtt_virtual)
106720131375Smrg		bufmgr_gem->vma_count++;
106820131375Smrg	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
106920131375Smrg}
107020131375Smrg
107120131375Smrgstatic void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
107220131375Smrg				      drm_intel_bo_gem *bo_gem)
107320131375Smrg{
107420131375Smrg	bufmgr_gem->vma_open++;
107520131375Smrg	DRMLISTDEL(&bo_gem->vma_list);
107620131375Smrg	if (bo_gem->mem_virtual)
107720131375Smrg		bufmgr_gem->vma_count--;
107820131375Smrg	if (bo_gem->gtt_virtual)
107920131375Smrg		bufmgr_gem->vma_count--;
108020131375Smrg	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
108120131375Smrg}
108220131375Smrg
108322944501Smrgstatic void
108422944501Smrgdrm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
108522944501Smrg{
108622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
108722944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
108822944501Smrg	struct drm_intel_gem_bo_bucket *bucket;
108922944501Smrg	int i;
109022944501Smrg
109122944501Smrg	/* Unreference all the target buffers */
109222944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
1093aaba2545Smrg		if (bo_gem->reloc_target_info[i].bo != bo) {
1094aaba2545Smrg			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1095aaba2545Smrg								  reloc_target_info[i].bo,
1096aaba2545Smrg								  time);
1097aaba2545Smrg		}
109822944501Smrg	}
109922944501Smrg	bo_gem->reloc_count = 0;
110020131375Smrg	bo_gem->used_as_reloc_target = false;
110122944501Smrg
110222944501Smrg	DBG("bo_unreference final: %d (%s)\n",
110322944501Smrg	    bo_gem->gem_handle, bo_gem->name);
110422944501Smrg
110522944501Smrg	/* release memory associated with this object */
110622944501Smrg	if (bo_gem->reloc_target_info) {
110722944501Smrg		free(bo_gem->reloc_target_info);
110822944501Smrg		bo_gem->reloc_target_info = NULL;
110922944501Smrg	}
111022944501Smrg	if (bo_gem->relocs) {
111122944501Smrg		free(bo_gem->relocs);
111222944501Smrg		bo_gem->relocs = NULL;
111322944501Smrg	}
111422944501Smrg
111520131375Smrg	/* Clear any left-over mappings */
111620131375Smrg	if (bo_gem->map_count) {
111720131375Smrg		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
111820131375Smrg		bo_gem->map_count = 0;
111920131375Smrg		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
112020131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
112120131375Smrg	}
112220131375Smrg
112320131375Smrg	DRMLISTDEL(&bo_gem->name_list);
112420131375Smrg
112522944501Smrg	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
112622944501Smrg	/* Put the buffer into our internal cache for reuse if we can. */
112722944501Smrg	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
112822944501Smrg	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
112922944501Smrg					      I915_MADV_DONTNEED)) {
113022944501Smrg		bo_gem->free_time = time;
113122944501Smrg
113222944501Smrg		bo_gem->name = NULL;
113322944501Smrg		bo_gem->validate_index = -1;
113422944501Smrg
113522944501Smrg		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
113622944501Smrg	} else {
113722944501Smrg		drm_intel_gem_bo_free(bo);
113822944501Smrg	}
113922944501Smrg}
114022944501Smrg
114122944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
114222944501Smrg						      time_t time)
114322944501Smrg{
114422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
114522944501Smrg
114622944501Smrg	assert(atomic_read(&bo_gem->refcount) > 0);
114722944501Smrg	if (atomic_dec_and_test(&bo_gem->refcount))
114822944501Smrg		drm_intel_gem_bo_unreference_final(bo, time);
114922944501Smrg}
115022944501Smrg
115122944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
115222944501Smrg{
115322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
115422944501Smrg
115522944501Smrg	assert(atomic_read(&bo_gem->refcount) > 0);
115622944501Smrg	if (atomic_dec_and_test(&bo_gem->refcount)) {
115722944501Smrg		drm_intel_bufmgr_gem *bufmgr_gem =
115822944501Smrg		    (drm_intel_bufmgr_gem *) bo->bufmgr;
115922944501Smrg		struct timespec time;
116022944501Smrg
116122944501Smrg		clock_gettime(CLOCK_MONOTONIC, &time);
116222944501Smrg
116322944501Smrg		pthread_mutex_lock(&bufmgr_gem->lock);
116422944501Smrg		drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
11656d98c517Smrg		drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
116622944501Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
116722944501Smrg	}
116822944501Smrg}
116922944501Smrg
117022944501Smrgstatic int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
117122944501Smrg{
117222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
117322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
117422944501Smrg	struct drm_i915_gem_set_domain set_domain;
117522944501Smrg	int ret;
117622944501Smrg
117722944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
117822944501Smrg
117920131375Smrg	if (bo_gem->map_count++ == 0)
118020131375Smrg		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
118120131375Smrg
118222944501Smrg	if (!bo_gem->mem_virtual) {
118322944501Smrg		struct drm_i915_gem_mmap mmap_arg;
118422944501Smrg
118520131375Smrg		DBG("bo_map: %d (%s), map_count=%d\n",
118620131375Smrg		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
118722944501Smrg
118820131375Smrg		VG_CLEAR(mmap_arg);
118922944501Smrg		mmap_arg.handle = bo_gem->gem_handle;
119022944501Smrg		mmap_arg.offset = 0;
119122944501Smrg		mmap_arg.size = bo->size;
11926d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd,
11936d98c517Smrg			       DRM_IOCTL_I915_GEM_MMAP,
11946d98c517Smrg			       &mmap_arg);
119522944501Smrg		if (ret != 0) {
119622944501Smrg			ret = -errno;
11979ce4edccSmrg			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
11989ce4edccSmrg			    __FILE__, __LINE__, bo_gem->gem_handle,
11999ce4edccSmrg			    bo_gem->name, strerror(errno));
120020131375Smrg			if (--bo_gem->map_count == 0)
120120131375Smrg				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
120222944501Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
120322944501Smrg			return ret;
120422944501Smrg		}
120520131375Smrg		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
120622944501Smrg		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
120722944501Smrg	}
120822944501Smrg	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
120922944501Smrg	    bo_gem->mem_virtual);
121022944501Smrg	bo->virtual = bo_gem->mem_virtual;
121122944501Smrg
121220131375Smrg	VG_CLEAR(set_domain);
121322944501Smrg	set_domain.handle = bo_gem->gem_handle;
121422944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
121522944501Smrg	if (write_enable)
121622944501Smrg		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
121722944501Smrg	else
121822944501Smrg		set_domain.write_domain = 0;
12196d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
12206d98c517Smrg		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
12216d98c517Smrg		       &set_domain);
122222944501Smrg	if (ret != 0) {
12239ce4edccSmrg		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
12249ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle,
12259ce4edccSmrg		    strerror(errno));
122622944501Smrg	}
122722944501Smrg
122820131375Smrg	if (write_enable)
122920131375Smrg		bo_gem->mapped_cpu_write = true;
123020131375Smrg
123120131375Smrg	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
123220131375Smrg	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
123322944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
123422944501Smrg
123522944501Smrg	return 0;
123622944501Smrg}
123722944501Smrg
123820131375Smrgstatic int
123920131375Smrgmap_gtt(drm_intel_bo *bo)
124022944501Smrg{
124122944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
124222944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
124322944501Smrg	int ret;
124422944501Smrg
124520131375Smrg	if (bo_gem->map_count++ == 0)
124620131375Smrg		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
124722944501Smrg
124822944501Smrg	/* Get a mapping of the buffer if we haven't before. */
124922944501Smrg	if (bo_gem->gtt_virtual == NULL) {
125022944501Smrg		struct drm_i915_gem_mmap_gtt mmap_arg;
125122944501Smrg
125220131375Smrg		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
125320131375Smrg		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
125422944501Smrg
125520131375Smrg		VG_CLEAR(mmap_arg);
125622944501Smrg		mmap_arg.handle = bo_gem->gem_handle;
125722944501Smrg
125822944501Smrg		/* Get the fake offset back... */
12596d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd,
12606d98c517Smrg			       DRM_IOCTL_I915_GEM_MMAP_GTT,
12616d98c517Smrg			       &mmap_arg);
126222944501Smrg		if (ret != 0) {
126322944501Smrg			ret = -errno;
12649ce4edccSmrg			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
12659ce4edccSmrg			    __FILE__, __LINE__,
12669ce4edccSmrg			    bo_gem->gem_handle, bo_gem->name,
12679ce4edccSmrg			    strerror(errno));
126820131375Smrg			if (--bo_gem->map_count == 0)
126920131375Smrg				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
127022944501Smrg			return ret;
127122944501Smrg		}
127222944501Smrg
127322944501Smrg		/* and mmap it */
1274aec75c42Sriastradh		ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size,
1275aec75c42Sriastradh		    &bo_gem->gtt_virtual);
1276aec75c42Sriastradh		if (ret) {
127722944501Smrg			bo_gem->gtt_virtual = NULL;
12789ce4edccSmrg			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
12799ce4edccSmrg			    __FILE__, __LINE__,
12809ce4edccSmrg			    bo_gem->gem_handle, bo_gem->name,
12819ce4edccSmrg			    strerror(errno));
128220131375Smrg			if (--bo_gem->map_count == 0)
128320131375Smrg				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
128422944501Smrg			return ret;
128522944501Smrg		}
128622944501Smrg	}
128722944501Smrg
128822944501Smrg	bo->virtual = bo_gem->gtt_virtual;
128922944501Smrg
129022944501Smrg	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
129122944501Smrg	    bo_gem->gtt_virtual);
129222944501Smrg
129320131375Smrg	return 0;
129420131375Smrg}
129520131375Smrg
129620131375Smrgint drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
129720131375Smrg{
129820131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
129920131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
130020131375Smrg	struct drm_i915_gem_set_domain set_domain;
130120131375Smrg	int ret;
130220131375Smrg
130320131375Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
130420131375Smrg
130520131375Smrg	ret = map_gtt(bo);
130620131375Smrg	if (ret) {
130720131375Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
130820131375Smrg		return ret;
130920131375Smrg	}
131020131375Smrg
131120131375Smrg	/* Now move it to the GTT domain so that the GPU and CPU
131220131375Smrg	 * caches are flushed and the GPU isn't actively using the
131320131375Smrg	 * buffer.
131420131375Smrg	 *
131520131375Smrg	 * The pagefault handler does this domain change for us when
131620131375Smrg	 * it has unbound the BO from the GTT, but it's up to us to
131720131375Smrg	 * tell it when we're about to use things if we had done
131820131375Smrg	 * rendering and it still happens to be bound to the GTT.
131920131375Smrg	 */
132020131375Smrg	VG_CLEAR(set_domain);
132122944501Smrg	set_domain.handle = bo_gem->gem_handle;
132222944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
132322944501Smrg	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
13246d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
13256d98c517Smrg		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
13266d98c517Smrg		       &set_domain);
132722944501Smrg	if (ret != 0) {
13289ce4edccSmrg		DBG("%s:%d: Error setting domain %d: %s\n",
13299ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle,
13309ce4edccSmrg		    strerror(errno));
133122944501Smrg	}
133222944501Smrg
133320131375Smrg	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
133420131375Smrg	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
133522944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
133622944501Smrg
13376d98c517Smrg	return 0;
133822944501Smrg}
133922944501Smrg
134020131375Smrg/**
134120131375Smrg * Performs a mapping of the buffer object like the normal GTT
134220131375Smrg * mapping, but avoids waiting for the GPU to be done reading from or
134320131375Smrg * rendering to the buffer.
134420131375Smrg *
134520131375Smrg * This is used in the implementation of GL_ARB_map_buffer_range: The
134620131375Smrg * user asks to create a buffer, then does a mapping, fills some
134720131375Smrg * space, runs a drawing command, then asks to map it again without
134820131375Smrg * synchronizing because it guarantees that it won't write over the
134920131375Smrg * data that the GPU is busy using (or, more specifically, that if it
135020131375Smrg * does write over the data, it acknowledges that rendering is
135120131375Smrg * undefined).
135220131375Smrg */
135320131375Smrg
135420131375Smrgint drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
135522944501Smrg{
135622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
135720131375Smrg#ifdef HAVE_VALGRIND
135820131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
135920131375Smrg#endif
136020131375Smrg	int ret;
136122944501Smrg
136220131375Smrg	/* If the CPU cache isn't coherent with the GTT, then use a
136320131375Smrg	 * regular synchronized mapping.  The problem is that we don't
136420131375Smrg	 * track where the buffer was last used on the CPU side in
136520131375Smrg	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
136620131375Smrg	 * we would potentially corrupt the buffer even when the user
136720131375Smrg	 * does reasonable things.
136820131375Smrg	 */
136920131375Smrg	if (!bufmgr_gem->has_llc)
137020131375Smrg		return drm_intel_gem_bo_map_gtt(bo);
137122944501Smrg
137222944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
137320131375Smrg
137420131375Smrg	ret = map_gtt(bo);
137520131375Smrg	if (ret == 0) {
137620131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
137720131375Smrg		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
137820131375Smrg	}
137920131375Smrg
138022944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
138122944501Smrg
138222944501Smrg	return ret;
138322944501Smrg}
138422944501Smrg
138522944501Smrgstatic int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
138622944501Smrg{
138722944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
138822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
138920131375Smrg	int ret = 0;
139022944501Smrg
139122944501Smrg	if (bo == NULL)
139222944501Smrg		return 0;
139322944501Smrg
139422944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
139522944501Smrg
139620131375Smrg	if (bo_gem->map_count <= 0) {
139720131375Smrg		DBG("attempted to unmap an unmapped bo\n");
139820131375Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
139920131375Smrg		/* Preserve the old behaviour of just treating this as a
140020131375Smrg		 * no-op rather than reporting the error.
140120131375Smrg		 */
140220131375Smrg		return 0;
140320131375Smrg	}
140420131375Smrg
140520131375Smrg	if (bo_gem->mapped_cpu_write) {
140620131375Smrg		struct drm_i915_gem_sw_finish sw_finish;
140720131375Smrg
140820131375Smrg		/* Cause a flush to happen if the buffer's pinned for
140920131375Smrg		 * scanout, so the results show up in a timely manner.
141020131375Smrg		 * Unlike GTT set domains, this only does work if the
141120131375Smrg		 * buffer should be scanout-related.
141220131375Smrg		 */
141320131375Smrg		VG_CLEAR(sw_finish);
141420131375Smrg		sw_finish.handle = bo_gem->gem_handle;
141520131375Smrg		ret = drmIoctl(bufmgr_gem->fd,
141620131375Smrg			       DRM_IOCTL_I915_GEM_SW_FINISH,
141720131375Smrg			       &sw_finish);
141820131375Smrg		ret = ret == -1 ? -errno : 0;
141920131375Smrg
142020131375Smrg		bo_gem->mapped_cpu_write = false;
142120131375Smrg	}
142222944501Smrg
142320131375Smrg	/* We need to unmap after every innovation as we cannot track
142420131375Smrg	 * an open vma for every bo as that will exhaasut the system
142520131375Smrg	 * limits and cause later failures.
142620131375Smrg	 */
142720131375Smrg	if (--bo_gem->map_count == 0) {
142820131375Smrg		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
142920131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
143020131375Smrg		bo->virtual = NULL;
143120131375Smrg	}
143222944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
143322944501Smrg
143422944501Smrg	return ret;
143522944501Smrg}
143622944501Smrg
143720131375Smrgint drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
143820131375Smrg{
143920131375Smrg	return drm_intel_gem_bo_unmap(bo);
144020131375Smrg}
144120131375Smrg
144222944501Smrgstatic int
144322944501Smrgdrm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
144422944501Smrg			 unsigned long size, const void *data)
144522944501Smrg{
144622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
144722944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
144822944501Smrg	struct drm_i915_gem_pwrite pwrite;
144922944501Smrg	int ret;
145022944501Smrg
145120131375Smrg	VG_CLEAR(pwrite);
145222944501Smrg	pwrite.handle = bo_gem->gem_handle;
145322944501Smrg	pwrite.offset = offset;
145422944501Smrg	pwrite.size = size;
145522944501Smrg	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
14566d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
14576d98c517Smrg		       DRM_IOCTL_I915_GEM_PWRITE,
14586d98c517Smrg		       &pwrite);
145922944501Smrg	if (ret != 0) {
146022944501Smrg		ret = -errno;
14619ce4edccSmrg		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
14629ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
14639ce4edccSmrg		    (int)size, strerror(errno));
146422944501Smrg	}
146522944501Smrg
146622944501Smrg	return ret;
146722944501Smrg}
146822944501Smrg
146922944501Smrgstatic int
147022944501Smrgdrm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
147122944501Smrg{
147222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
147322944501Smrg	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
147422944501Smrg	int ret;
147522944501Smrg
147620131375Smrg	VG_CLEAR(get_pipe_from_crtc_id);
147722944501Smrg	get_pipe_from_crtc_id.crtc_id = crtc_id;
14786d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
14796d98c517Smrg		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
14806d98c517Smrg		       &get_pipe_from_crtc_id);
148122944501Smrg	if (ret != 0) {
148222944501Smrg		/* We return -1 here to signal that we don't
148322944501Smrg		 * know which pipe is associated with this crtc.
148422944501Smrg		 * This lets the caller know that this information
148522944501Smrg		 * isn't available; using the wrong pipe for
148622944501Smrg		 * vblank waiting can cause the chipset to lock up
148722944501Smrg		 */
148822944501Smrg		return -1;
148922944501Smrg	}
149022944501Smrg
149122944501Smrg	return get_pipe_from_crtc_id.pipe;
149222944501Smrg}
149322944501Smrg
149422944501Smrgstatic int
149522944501Smrgdrm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
149622944501Smrg			     unsigned long size, void *data)
149722944501Smrg{
149822944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
149922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
150022944501Smrg	struct drm_i915_gem_pread pread;
150122944501Smrg	int ret;
150222944501Smrg
150320131375Smrg	VG_CLEAR(pread);
150422944501Smrg	pread.handle = bo_gem->gem_handle;
150522944501Smrg	pread.offset = offset;
150622944501Smrg	pread.size = size;
150722944501Smrg	pread.data_ptr = (uint64_t) (uintptr_t) data;
15086d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
15096d98c517Smrg		       DRM_IOCTL_I915_GEM_PREAD,
15106d98c517Smrg		       &pread);
151122944501Smrg	if (ret != 0) {
151222944501Smrg		ret = -errno;
15139ce4edccSmrg		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
15149ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
15159ce4edccSmrg		    (int)size, strerror(errno));
151622944501Smrg	}
151722944501Smrg
151822944501Smrg	return ret;
151922944501Smrg}
152022944501Smrg
15219ce4edccSmrg/** Waits for all GPU rendering with the object to have completed. */
152222944501Smrgstatic void
152322944501Smrgdrm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
152422944501Smrg{
15259ce4edccSmrg	drm_intel_gem_bo_start_gtt_access(bo, 1);
152622944501Smrg}
152722944501Smrg
152820131375Smrg/**
152920131375Smrg * Waits on a BO for the given amount of time.
153020131375Smrg *
153120131375Smrg * @bo: buffer object to wait for
153220131375Smrg * @timeout_ns: amount of time to wait in nanoseconds.
153320131375Smrg *   If value is less than 0, an infinite wait will occur.
153420131375Smrg *
153520131375Smrg * Returns 0 if the wait was successful ie. the last batch referencing the
153620131375Smrg * object has completed within the allotted time. Otherwise some negative return
153720131375Smrg * value describes the error. Of particular interest is -ETIME when the wait has
153820131375Smrg * failed to yield the desired result.
153920131375Smrg *
154020131375Smrg * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
154120131375Smrg * the operation to give up after a certain amount of time. Another subtle
154220131375Smrg * difference is the internal locking semantics are different (this variant does
154320131375Smrg * not hold the lock for the duration of the wait). This makes the wait subject
154420131375Smrg * to a larger userspace race window.
154520131375Smrg *
154620131375Smrg * The implementation shall wait until the object is no longer actively
154720131375Smrg * referenced within a batch buffer at the time of the call. The wait will
154820131375Smrg * not guarantee that the buffer is re-issued via another thread, or an flinked
154920131375Smrg * handle. Userspace must make sure this race does not occur if such precision
155020131375Smrg * is important.
155120131375Smrg */
155220131375Smrgint drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
155320131375Smrg{
155420131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
155520131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
155620131375Smrg	struct drm_i915_gem_wait wait;
155720131375Smrg	int ret;
155820131375Smrg
155920131375Smrg	if (!bufmgr_gem->has_wait_timeout) {
156020131375Smrg		DBG("%s:%d: Timed wait is not supported. Falling back to "
156120131375Smrg		    "infinite wait\n", __FILE__, __LINE__);
156220131375Smrg		if (timeout_ns) {
156320131375Smrg			drm_intel_gem_bo_wait_rendering(bo);
156420131375Smrg			return 0;
156520131375Smrg		} else {
156620131375Smrg			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
156720131375Smrg		}
156820131375Smrg	}
156920131375Smrg
157020131375Smrg	wait.bo_handle = bo_gem->gem_handle;
157120131375Smrg	wait.timeout_ns = timeout_ns;
157220131375Smrg	wait.flags = 0;
157320131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
157420131375Smrg	if (ret == -1)
157520131375Smrg		return -errno;
157620131375Smrg
157720131375Smrg	return ret;
157820131375Smrg}
157920131375Smrg
158022944501Smrg/**
158122944501Smrg * Sets the object to the GTT read and possibly write domain, used by the X
158222944501Smrg * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
158322944501Smrg *
158422944501Smrg * In combination with drm_intel_gem_bo_pin() and manual fence management, we
158522944501Smrg * can do tiled pixmaps this way.
158622944501Smrg */
158722944501Smrgvoid
158822944501Smrgdrm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
158922944501Smrg{
159022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
159122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
159222944501Smrg	struct drm_i915_gem_set_domain set_domain;
159322944501Smrg	int ret;
159422944501Smrg
159520131375Smrg	VG_CLEAR(set_domain);
159622944501Smrg	set_domain.handle = bo_gem->gem_handle;
159722944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
159822944501Smrg	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
15996d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
16006d98c517Smrg		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
16016d98c517Smrg		       &set_domain);
160222944501Smrg	if (ret != 0) {
16039ce4edccSmrg		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
16049ce4edccSmrg		    __FILE__, __LINE__, bo_gem->gem_handle,
16059ce4edccSmrg		    set_domain.read_domains, set_domain.write_domain,
16069ce4edccSmrg		    strerror(errno));
160722944501Smrg	}
160822944501Smrg}
160922944501Smrg
161022944501Smrgstatic void
161122944501Smrgdrm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
161222944501Smrg{
161322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
161422944501Smrg	int i;
161522944501Smrg
161622944501Smrg	free(bufmgr_gem->exec2_objects);
161722944501Smrg	free(bufmgr_gem->exec_objects);
161822944501Smrg	free(bufmgr_gem->exec_bos);
161920131375Smrg	free(bufmgr_gem->aub_filename);
162022944501Smrg
162122944501Smrg	pthread_mutex_destroy(&bufmgr_gem->lock);
162222944501Smrg
162322944501Smrg	/* Free any cached buffer objects we were going to reuse */
1624aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
162522944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
162622944501Smrg		    &bufmgr_gem->cache_bucket[i];
162722944501Smrg		drm_intel_bo_gem *bo_gem;
162822944501Smrg
162922944501Smrg		while (!DRMLISTEMPTY(&bucket->head)) {
163022944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
163122944501Smrg					      bucket->head.next, head);
163222944501Smrg			DRMLISTDEL(&bo_gem->head);
163322944501Smrg
163422944501Smrg			drm_intel_gem_bo_free(&bo_gem->bo);
163522944501Smrg		}
163622944501Smrg	}
163722944501Smrg
163822944501Smrg	free(bufmgr);
163922944501Smrg}
164022944501Smrg
164122944501Smrg/**
164222944501Smrg * Adds the target buffer to the validation list and adds the relocation
164322944501Smrg * to the reloc_buffer's relocation list.
164422944501Smrg *
164522944501Smrg * The relocation entry at the given offset must already contain the
164622944501Smrg * precomputed relocation value, because the kernel will optimize out
164722944501Smrg * the relocation entry write when the buffer hasn't moved from the
164822944501Smrg * last known offset in target_bo.
164922944501Smrg */
165022944501Smrgstatic int
165122944501Smrgdo_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
165222944501Smrg		 drm_intel_bo *target_bo, uint32_t target_offset,
165322944501Smrg		 uint32_t read_domains, uint32_t write_domain,
165420131375Smrg		 bool need_fence)
165522944501Smrg{
165622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
165722944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
165822944501Smrg	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
165920131375Smrg	bool fenced_command;
166022944501Smrg
166122944501Smrg	if (bo_gem->has_error)
166222944501Smrg		return -ENOMEM;
166322944501Smrg
166422944501Smrg	if (target_bo_gem->has_error) {
166520131375Smrg		bo_gem->has_error = true;
166622944501Smrg		return -ENOMEM;
166722944501Smrg	}
166822944501Smrg
166922944501Smrg	/* We never use HW fences for rendering on 965+ */
167022944501Smrg	if (bufmgr_gem->gen >= 4)
167120131375Smrg		need_fence = false;
167222944501Smrg
16739ce4edccSmrg	fenced_command = need_fence;
16749ce4edccSmrg	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
167520131375Smrg		need_fence = false;
16769ce4edccSmrg
167722944501Smrg	/* Create a new relocation list if needed */
167822944501Smrg	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
167922944501Smrg		return -ENOMEM;
168022944501Smrg
168122944501Smrg	/* Check overflow */
168222944501Smrg	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
168322944501Smrg
168422944501Smrg	/* Check args */
168522944501Smrg	assert(offset <= bo->size - 4);
168622944501Smrg	assert((write_domain & (write_domain - 1)) == 0);
168722944501Smrg
168822944501Smrg	/* Make sure that we're not adding a reloc to something whose size has
168922944501Smrg	 * already been accounted for.
169022944501Smrg	 */
169122944501Smrg	assert(!bo_gem->used_as_reloc_target);
1692aaba2545Smrg	if (target_bo_gem != bo_gem) {
169320131375Smrg		target_bo_gem->used_as_reloc_target = true;
1694aaba2545Smrg		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1695aaba2545Smrg	}
169622944501Smrg	/* An object needing a fence is a tiled buffer, so it won't have
169722944501Smrg	 * relocs to other buffers.
169822944501Smrg	 */
169922944501Smrg	if (need_fence)
170022944501Smrg		target_bo_gem->reloc_tree_fences = 1;
170122944501Smrg	bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
170222944501Smrg
170322944501Smrg	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
170422944501Smrg	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
170522944501Smrg	bo_gem->relocs[bo_gem->reloc_count].target_handle =
170622944501Smrg	    target_bo_gem->gem_handle;
170722944501Smrg	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
170822944501Smrg	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
170920131375Smrg	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
171022944501Smrg
171122944501Smrg	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1712aaba2545Smrg	if (target_bo != bo)
1713aaba2545Smrg		drm_intel_gem_bo_reference(target_bo);
17149ce4edccSmrg	if (fenced_command)
171522944501Smrg		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
171622944501Smrg			DRM_INTEL_RELOC_FENCE;
171722944501Smrg	else
171822944501Smrg		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
171922944501Smrg
172022944501Smrg	bo_gem->reloc_count++;
172122944501Smrg
172222944501Smrg	return 0;
172322944501Smrg}
172422944501Smrg
172522944501Smrgstatic int
172622944501Smrgdrm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
172722944501Smrg			    drm_intel_bo *target_bo, uint32_t target_offset,
172822944501Smrg			    uint32_t read_domains, uint32_t write_domain)
172922944501Smrg{
173022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
173122944501Smrg
173222944501Smrg	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
173322944501Smrg				read_domains, write_domain,
173422944501Smrg				!bufmgr_gem->fenced_relocs);
173522944501Smrg}
173622944501Smrg
173722944501Smrgstatic int
173822944501Smrgdrm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
173922944501Smrg				  drm_intel_bo *target_bo,
174022944501Smrg				  uint32_t target_offset,
174122944501Smrg				  uint32_t read_domains, uint32_t write_domain)
174222944501Smrg{
174322944501Smrg	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
174420131375Smrg				read_domains, write_domain, true);
174520131375Smrg}
174620131375Smrg
174720131375Smrgint
174820131375Smrgdrm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
174920131375Smrg{
175020131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
175120131375Smrg
175220131375Smrg	return bo_gem->reloc_count;
175320131375Smrg}
175420131375Smrg
175520131375Smrg/**
175620131375Smrg * Removes existing relocation entries in the BO after "start".
175720131375Smrg *
175820131375Smrg * This allows a user to avoid a two-step process for state setup with
175920131375Smrg * counting up all the buffer objects and doing a
176020131375Smrg * drm_intel_bufmgr_check_aperture_space() before emitting any of the
176120131375Smrg * relocations for the state setup.  Instead, save the state of the
176220131375Smrg * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
176320131375Smrg * state, and then check if it still fits in the aperture.
176420131375Smrg *
176520131375Smrg * Any further drm_intel_bufmgr_check_aperture_space() queries
176620131375Smrg * involving this buffer in the tree are undefined after this call.
176720131375Smrg */
176820131375Smrgvoid
176920131375Smrgdrm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
177020131375Smrg{
177120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
177220131375Smrg	int i;
177320131375Smrg	struct timespec time;
177420131375Smrg
177520131375Smrg	clock_gettime(CLOCK_MONOTONIC, &time);
177620131375Smrg
177720131375Smrg	assert(bo_gem->reloc_count >= start);
177820131375Smrg	/* Unreference the cleared target buffers */
177920131375Smrg	for (i = start; i < bo_gem->reloc_count; i++) {
178020131375Smrg		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
178120131375Smrg		if (&target_bo_gem->bo != bo) {
178220131375Smrg			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
178320131375Smrg			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
178420131375Smrg								  time.tv_sec);
178520131375Smrg		}
178620131375Smrg	}
178720131375Smrg	bo_gem->reloc_count = start;
178822944501Smrg}
178922944501Smrg
179022944501Smrg/**
179122944501Smrg * Walk the tree of relocations rooted at BO and accumulate the list of
179222944501Smrg * validations to be performed and update the relocation buffers with
179322944501Smrg * index values into the validation list.
179422944501Smrg */
179522944501Smrgstatic void
179622944501Smrgdrm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
179722944501Smrg{
179822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
179922944501Smrg	int i;
180022944501Smrg
180122944501Smrg	if (bo_gem->relocs == NULL)
180222944501Smrg		return;
180322944501Smrg
180422944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
180522944501Smrg		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
180622944501Smrg
1807aaba2545Smrg		if (target_bo == bo)
1808aaba2545Smrg			continue;
1809aaba2545Smrg
181020131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
181120131375Smrg
181222944501Smrg		/* Continue walking the tree depth-first. */
181322944501Smrg		drm_intel_gem_bo_process_reloc(target_bo);
181422944501Smrg
181522944501Smrg		/* Add the target to the validate list */
181622944501Smrg		drm_intel_add_validate_buffer(target_bo);
181722944501Smrg	}
181822944501Smrg}
181922944501Smrg
182022944501Smrgstatic void
182122944501Smrgdrm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
182222944501Smrg{
182322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
182422944501Smrg	int i;
182522944501Smrg
182622944501Smrg	if (bo_gem->relocs == NULL)
182722944501Smrg		return;
182822944501Smrg
182922944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
183022944501Smrg		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
183122944501Smrg		int need_fence;
183222944501Smrg
1833aaba2545Smrg		if (target_bo == bo)
1834aaba2545Smrg			continue;
1835aaba2545Smrg
183620131375Smrg		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
183720131375Smrg
183822944501Smrg		/* Continue walking the tree depth-first. */
183922944501Smrg		drm_intel_gem_bo_process_reloc2(target_bo);
184022944501Smrg
184122944501Smrg		need_fence = (bo_gem->reloc_target_info[i].flags &
184222944501Smrg			      DRM_INTEL_RELOC_FENCE);
184322944501Smrg
184422944501Smrg		/* Add the target to the validate list */
184522944501Smrg		drm_intel_add_validate_buffer2(target_bo, need_fence);
184622944501Smrg	}
184722944501Smrg}
184822944501Smrg
184922944501Smrg
185022944501Smrgstatic void
185122944501Smrgdrm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
185222944501Smrg{
185322944501Smrg	int i;
185422944501Smrg
185522944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
185622944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
185722944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
185822944501Smrg
185922944501Smrg		/* Update the buffer offset */
186020131375Smrg		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
1861d82d45b3Sjoerg			DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n",
1862d82d45b3Sjoerg			    bo_gem->gem_handle, bo_gem->name,
1863d82d45b3Sjoerg			    (unsigned long long)bo->offset64,
186422944501Smrg			    (unsigned long long)bufmgr_gem->exec_objects[i].
186522944501Smrg			    offset);
186620131375Smrg			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
186722944501Smrg			bo->offset = bufmgr_gem->exec_objects[i].offset;
186822944501Smrg		}
186922944501Smrg	}
187022944501Smrg}
187122944501Smrg
187222944501Smrgstatic void
187322944501Smrgdrm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
187422944501Smrg{
187522944501Smrg	int i;
187622944501Smrg
187722944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
187822944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
187922944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
188022944501Smrg
188122944501Smrg		/* Update the buffer offset */
188220131375Smrg		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
1883d82d45b3Sjoerg			DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n",
1884d82d45b3Sjoerg			    bo_gem->gem_handle, bo_gem->name,
1885d82d45b3Sjoerg			    (unsigned long long)bo->offset64,
188622944501Smrg			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
188720131375Smrg			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
188822944501Smrg			bo->offset = bufmgr_gem->exec2_objects[i].offset;
188922944501Smrg		}
189022944501Smrg	}
189122944501Smrg}
189222944501Smrg
189320131375Smrgstatic void
189420131375Smrgaub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
189520131375Smrg{
189620131375Smrg	fwrite(&data, 1, 4, bufmgr_gem->aub_file);
189720131375Smrg}
189820131375Smrg
189920131375Smrgstatic void
190020131375Smrgaub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
190120131375Smrg{
190220131375Smrg	fwrite(data, 1, size, bufmgr_gem->aub_file);
190320131375Smrg}
190420131375Smrg
190520131375Smrgstatic void
190620131375Smrgaub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
190722944501Smrg{
190822944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
190922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
191020131375Smrg	uint32_t *data;
191120131375Smrg	unsigned int i;
191222944501Smrg
191320131375Smrg	data = malloc(bo->size);
191420131375Smrg	drm_intel_bo_get_subdata(bo, offset, size, data);
191522944501Smrg
191620131375Smrg	/* Easy mode: write out bo with no relocations */
191720131375Smrg	if (!bo_gem->reloc_count) {
191820131375Smrg		aub_out_data(bufmgr_gem, data, size);
191920131375Smrg		free(data);
192020131375Smrg		return;
192120131375Smrg	}
192222944501Smrg
192320131375Smrg	/* Otherwise, handle the relocations while writing. */
192420131375Smrg	for (i = 0; i < size / 4; i++) {
192520131375Smrg		int r;
192620131375Smrg		for (r = 0; r < bo_gem->reloc_count; r++) {
192720131375Smrg			struct drm_i915_gem_relocation_entry *reloc;
192820131375Smrg			drm_intel_reloc_target *info;
192922944501Smrg
193020131375Smrg			reloc = &bo_gem->relocs[r];
193120131375Smrg			info = &bo_gem->reloc_target_info[r];
193222944501Smrg
193320131375Smrg			if (reloc->offset == offset + i * 4) {
193420131375Smrg				drm_intel_bo_gem *target_gem;
193520131375Smrg				uint32_t val;
193622944501Smrg
193720131375Smrg				target_gem = (drm_intel_bo_gem *)info->bo;
193822944501Smrg
193920131375Smrg				val = reloc->delta;
194020131375Smrg				val += target_gem->aub_offset;
194122944501Smrg
194220131375Smrg				aub_out(bufmgr_gem, val);
194320131375Smrg				data[i] = val;
194420131375Smrg				break;
194520131375Smrg			}
194620131375Smrg		}
194720131375Smrg		if (r == bo_gem->reloc_count) {
194820131375Smrg			/* no relocation, just the data */
194920131375Smrg			aub_out(bufmgr_gem, data[i]);
195020131375Smrg		}
195122944501Smrg	}
195222944501Smrg
195320131375Smrg	free(data);
195422944501Smrg}
195522944501Smrg
195620131375Smrgstatic void
195720131375Smrgaub_bo_get_address(drm_intel_bo *bo)
195822944501Smrg{
195920131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
196020131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
196122944501Smrg
196220131375Smrg	/* Give the object a graphics address in the AUB file.  We
196320131375Smrg	 * don't just use the GEM object address because we do AUB
196420131375Smrg	 * dumping before execution -- we want to successfully log
196520131375Smrg	 * when the hardware might hang, and we might even want to aub
196620131375Smrg	 * capture for a driver trying to execute on a different
196720131375Smrg	 * generation of hardware by disabling the actual kernel exec
196820131375Smrg	 * call.
196920131375Smrg	 */
197020131375Smrg	bo_gem->aub_offset = bufmgr_gem->aub_offset;
197120131375Smrg	bufmgr_gem->aub_offset += bo->size;
197220131375Smrg	/* XXX: Handle aperture overflow. */
197320131375Smrg	assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
197420131375Smrg}
197520131375Smrg
197620131375Smrgstatic void
197720131375Smrgaub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
197820131375Smrg		      uint32_t offset, uint32_t size)
197920131375Smrg{
198020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
198120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
198220131375Smrg
198320131375Smrg	aub_out(bufmgr_gem,
198420131375Smrg		CMD_AUB_TRACE_HEADER_BLOCK |
198520131375Smrg		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
198620131375Smrg	aub_out(bufmgr_gem,
198720131375Smrg		AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
198820131375Smrg	aub_out(bufmgr_gem, subtype);
198920131375Smrg	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
199020131375Smrg	aub_out(bufmgr_gem, size);
199120131375Smrg	if (bufmgr_gem->gen >= 8)
199220131375Smrg		aub_out(bufmgr_gem, 0);
199320131375Smrg	aub_write_bo_data(bo, offset, size);
199420131375Smrg}
199520131375Smrg
199620131375Smrg/**
199720131375Smrg * Break up large objects into multiple writes.  Otherwise a 128kb VBO
199820131375Smrg * would overflow the 16 bits of size field in the packet header and
199920131375Smrg * everything goes badly after that.
200020131375Smrg */
200120131375Smrgstatic void
200220131375Smrgaub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
200320131375Smrg			    uint32_t offset, uint32_t size)
200420131375Smrg{
200520131375Smrg	uint32_t block_size;
200620131375Smrg	uint32_t sub_offset;
200720131375Smrg
200820131375Smrg	for (sub_offset = 0; sub_offset < size; sub_offset += block_size) {
200920131375Smrg		block_size = size - sub_offset;
201020131375Smrg
201120131375Smrg		if (block_size > 8 * 4096)
201220131375Smrg			block_size = 8 * 4096;
201320131375Smrg
201420131375Smrg		aub_write_trace_block(bo, type, subtype, offset + sub_offset,
201520131375Smrg				      block_size);
201620131375Smrg	}
201720131375Smrg}
201820131375Smrg
201920131375Smrgstatic void
202020131375Smrgaub_write_bo(drm_intel_bo *bo)
202120131375Smrg{
202220131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
202320131375Smrg	uint32_t offset = 0;
202420131375Smrg	unsigned i;
202520131375Smrg
202620131375Smrg	aub_bo_get_address(bo);
202720131375Smrg
202820131375Smrg	/* Write out each annotated section separately. */
202920131375Smrg	for (i = 0; i < bo_gem->aub_annotation_count; ++i) {
203020131375Smrg		drm_intel_aub_annotation *annotation =
203120131375Smrg			&bo_gem->aub_annotations[i];
203220131375Smrg		uint32_t ending_offset = annotation->ending_offset;
203320131375Smrg		if (ending_offset > bo->size)
203420131375Smrg			ending_offset = bo->size;
203520131375Smrg		if (ending_offset > offset) {
203620131375Smrg			aub_write_large_trace_block(bo, annotation->type,
203720131375Smrg						    annotation->subtype,
203820131375Smrg						    offset,
203920131375Smrg						    ending_offset - offset);
204020131375Smrg			offset = ending_offset;
204120131375Smrg		}
204220131375Smrg	}
204320131375Smrg
204420131375Smrg	/* Write out any remaining unannotated data */
204520131375Smrg	if (offset < bo->size) {
204620131375Smrg		aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
204720131375Smrg					    offset, bo->size - offset);
204820131375Smrg	}
204920131375Smrg}
205020131375Smrg
205120131375Smrg/*
205220131375Smrg * Make a ringbuffer on fly and dump it
205320131375Smrg */
205420131375Smrgstatic void
205520131375Smrgaub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
205620131375Smrg			  uint32_t batch_buffer, int ring_flag)
205720131375Smrg{
205820131375Smrg	uint32_t ringbuffer[4096];
205920131375Smrg	int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
206020131375Smrg	int ring_count = 0;
206120131375Smrg
206220131375Smrg	if (ring_flag == I915_EXEC_BSD)
206320131375Smrg		ring = AUB_TRACE_TYPE_RING_PRB1;
206420131375Smrg	else if (ring_flag == I915_EXEC_BLT)
206520131375Smrg		ring = AUB_TRACE_TYPE_RING_PRB2;
206620131375Smrg
206720131375Smrg	/* Make a ring buffer to execute our batchbuffer. */
206820131375Smrg	memset(ringbuffer, 0, sizeof(ringbuffer));
206920131375Smrg	if (bufmgr_gem->gen >= 8) {
207020131375Smrg		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2);
207120131375Smrg		ringbuffer[ring_count++] = batch_buffer;
207220131375Smrg		ringbuffer[ring_count++] = 0;
207320131375Smrg	} else {
207420131375Smrg		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
207520131375Smrg		ringbuffer[ring_count++] = batch_buffer;
207620131375Smrg	}
207720131375Smrg
207820131375Smrg	/* Write out the ring.  This appears to trigger execution of
207920131375Smrg	 * the ring in the simulator.
208020131375Smrg	 */
208120131375Smrg	aub_out(bufmgr_gem,
208220131375Smrg		CMD_AUB_TRACE_HEADER_BLOCK |
208320131375Smrg		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
208420131375Smrg	aub_out(bufmgr_gem,
208520131375Smrg		AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
208620131375Smrg	aub_out(bufmgr_gem, 0); /* general/surface subtype */
208720131375Smrg	aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
208820131375Smrg	aub_out(bufmgr_gem, ring_count * 4);
208920131375Smrg	if (bufmgr_gem->gen >= 8)
209020131375Smrg		aub_out(bufmgr_gem, 0);
209120131375Smrg
209220131375Smrg	/* FIXME: Need some flush operations here? */
209320131375Smrg	aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
209420131375Smrg
209520131375Smrg	/* Update offset pointer */
209620131375Smrg	bufmgr_gem->aub_offset += 4096;
209720131375Smrg}
209820131375Smrg
209920131375Smrgvoid
210020131375Smrgdrm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
210120131375Smrg			      int x1, int y1, int width, int height,
210220131375Smrg			      enum aub_dump_bmp_format format,
210320131375Smrg			      int pitch, int offset)
210420131375Smrg{
210520131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
210620131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
210720131375Smrg	uint32_t cpp;
210820131375Smrg
210920131375Smrg	switch (format) {
211020131375Smrg	case AUB_DUMP_BMP_FORMAT_8BIT:
211120131375Smrg		cpp = 1;
211220131375Smrg		break;
211320131375Smrg	case AUB_DUMP_BMP_FORMAT_ARGB_4444:
211420131375Smrg		cpp = 2;
211520131375Smrg		break;
211620131375Smrg	case AUB_DUMP_BMP_FORMAT_ARGB_0888:
211720131375Smrg	case AUB_DUMP_BMP_FORMAT_ARGB_8888:
211820131375Smrg		cpp = 4;
211920131375Smrg		break;
212020131375Smrg	default:
212120131375Smrg		printf("Unknown AUB dump format %d\n", format);
212220131375Smrg		return;
212320131375Smrg	}
212420131375Smrg
212520131375Smrg	if (!bufmgr_gem->aub_file)
212620131375Smrg		return;
212720131375Smrg
212820131375Smrg	aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
212920131375Smrg	aub_out(bufmgr_gem, (y1 << 16) | x1);
213020131375Smrg	aub_out(bufmgr_gem,
213120131375Smrg		(format << 24) |
213220131375Smrg		(cpp << 19) |
213320131375Smrg		pitch / 4);
213420131375Smrg	aub_out(bufmgr_gem, (height << 16) | width);
213520131375Smrg	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
213620131375Smrg	aub_out(bufmgr_gem,
213720131375Smrg		((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
213820131375Smrg		((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
213920131375Smrg}
214020131375Smrg
214120131375Smrgstatic void
214220131375Smrgaub_exec(drm_intel_bo *bo, int ring_flag, int used)
214320131375Smrg{
214420131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
214520131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
214620131375Smrg	int i;
214720131375Smrg	bool batch_buffer_needs_annotations;
214820131375Smrg
214920131375Smrg	if (!bufmgr_gem->aub_file)
215020131375Smrg		return;
215120131375Smrg
215220131375Smrg	/* If batch buffer is not annotated, annotate it the best we
215320131375Smrg	 * can.
215420131375Smrg	 */
215520131375Smrg	batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0;
215620131375Smrg	if (batch_buffer_needs_annotations) {
215720131375Smrg		drm_intel_aub_annotation annotations[2] = {
215820131375Smrg			{ AUB_TRACE_TYPE_BATCH, 0, used },
215920131375Smrg			{ AUB_TRACE_TYPE_NOTYPE, 0, bo->size }
216020131375Smrg		};
216120131375Smrg		drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2);
216220131375Smrg	}
216320131375Smrg
216420131375Smrg	/* Write out all buffers to AUB memory */
216520131375Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
216620131375Smrg		aub_write_bo(bufmgr_gem->exec_bos[i]);
216720131375Smrg	}
216820131375Smrg
216920131375Smrg	/* Remove any annotations we added */
217020131375Smrg	if (batch_buffer_needs_annotations)
217120131375Smrg		drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0);
217220131375Smrg
217320131375Smrg	/* Dump ring buffer */
217420131375Smrg	aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
217520131375Smrg
217620131375Smrg	fflush(bufmgr_gem->aub_file);
217720131375Smrg
217820131375Smrg	/*
217920131375Smrg	 * One frame has been dumped. So reset the aub_offset for the next frame.
218020131375Smrg	 *
218120131375Smrg	 * FIXME: Can we do this?
218220131375Smrg	 */
218320131375Smrg	bufmgr_gem->aub_offset = 0x10000;
218420131375Smrg}
218520131375Smrg
218620131375Smrgstatic int
218720131375Smrgdrm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
218820131375Smrg		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
218920131375Smrg{
219020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
219120131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
219220131375Smrg	struct drm_i915_gem_execbuffer execbuf;
219320131375Smrg	int ret, i;
219420131375Smrg
219520131375Smrg	if (bo_gem->has_error)
219620131375Smrg		return -ENOMEM;
219720131375Smrg
219820131375Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
219920131375Smrg	/* Update indices and set up the validate list. */
220020131375Smrg	drm_intel_gem_bo_process_reloc(bo);
220120131375Smrg
220220131375Smrg	/* Add the batch buffer to the validation list.  There are no
220320131375Smrg	 * relocations pointing to it.
220420131375Smrg	 */
220520131375Smrg	drm_intel_add_validate_buffer(bo);
220620131375Smrg
220720131375Smrg	VG_CLEAR(execbuf);
220820131375Smrg	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
220920131375Smrg	execbuf.buffer_count = bufmgr_gem->exec_count;
221020131375Smrg	execbuf.batch_start_offset = 0;
221120131375Smrg	execbuf.batch_len = used;
221220131375Smrg	execbuf.cliprects_ptr = (uintptr_t) cliprects;
221320131375Smrg	execbuf.num_cliprects = num_cliprects;
221420131375Smrg	execbuf.DR1 = 0;
221520131375Smrg	execbuf.DR4 = DR4;
221620131375Smrg
221720131375Smrg	ret = drmIoctl(bufmgr_gem->fd,
221820131375Smrg		       DRM_IOCTL_I915_GEM_EXECBUFFER,
221920131375Smrg		       &execbuf);
222020131375Smrg	if (ret != 0) {
222120131375Smrg		ret = -errno;
222220131375Smrg		if (errno == ENOSPC) {
222320131375Smrg			DBG("Execbuffer fails to pin. "
222420131375Smrg			    "Estimate: %u. Actual: %u. Available: %u\n",
222520131375Smrg			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
222620131375Smrg							       bufmgr_gem->
222720131375Smrg							       exec_count),
222820131375Smrg			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
222920131375Smrg							      bufmgr_gem->
223020131375Smrg							      exec_count),
223120131375Smrg			    (unsigned int)bufmgr_gem->gtt_size);
223220131375Smrg		}
223320131375Smrg	}
223420131375Smrg	drm_intel_update_buffer_offsets(bufmgr_gem);
223520131375Smrg
223620131375Smrg	if (bufmgr_gem->bufmgr.debug)
223720131375Smrg		drm_intel_gem_dump_validation_list(bufmgr_gem);
223820131375Smrg
223920131375Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
224020131375Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
224120131375Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
224220131375Smrg
224320131375Smrg		bo_gem->idle = false;
224420131375Smrg
224520131375Smrg		/* Disconnect the buffer from the validate list */
224620131375Smrg		bo_gem->validate_index = -1;
224720131375Smrg		bufmgr_gem->exec_bos[i] = NULL;
224820131375Smrg	}
224920131375Smrg	bufmgr_gem->exec_count = 0;
225020131375Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
225120131375Smrg
225220131375Smrg	return ret;
225320131375Smrg}
225420131375Smrg
225520131375Smrgstatic int
225620131375Smrgdo_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
225720131375Smrg	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
225820131375Smrg	 unsigned int flags)
225920131375Smrg{
226020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
226120131375Smrg	struct drm_i915_gem_execbuffer2 execbuf;
226220131375Smrg	int ret = 0;
226320131375Smrg	int i;
226420131375Smrg
226520131375Smrg	switch (flags & 0x7) {
226620131375Smrg	default:
226720131375Smrg		return -EINVAL;
226820131375Smrg	case I915_EXEC_BLT:
22699ce4edccSmrg		if (!bufmgr_gem->has_blt)
22709ce4edccSmrg			return -EINVAL;
22719ce4edccSmrg		break;
22729ce4edccSmrg	case I915_EXEC_BSD:
22739ce4edccSmrg		if (!bufmgr_gem->has_bsd)
22749ce4edccSmrg			return -EINVAL;
22759ce4edccSmrg		break;
227620131375Smrg	case I915_EXEC_VEBOX:
227720131375Smrg		if (!bufmgr_gem->has_vebox)
227820131375Smrg			return -EINVAL;
227920131375Smrg		break;
22809ce4edccSmrg	case I915_EXEC_RENDER:
22819ce4edccSmrg	case I915_EXEC_DEFAULT:
22829ce4edccSmrg		break;
22839ce4edccSmrg	}
2284aaba2545Smrg
228522944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
228622944501Smrg	/* Update indices and set up the validate list. */
228722944501Smrg	drm_intel_gem_bo_process_reloc2(bo);
228822944501Smrg
228922944501Smrg	/* Add the batch buffer to the validation list.  There are no relocations
229022944501Smrg	 * pointing to it.
229122944501Smrg	 */
229222944501Smrg	drm_intel_add_validate_buffer2(bo, 0);
229322944501Smrg
229420131375Smrg	VG_CLEAR(execbuf);
229522944501Smrg	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
229622944501Smrg	execbuf.buffer_count = bufmgr_gem->exec_count;
229722944501Smrg	execbuf.batch_start_offset = 0;
229822944501Smrg	execbuf.batch_len = used;
229922944501Smrg	execbuf.cliprects_ptr = (uintptr_t)cliprects;
230022944501Smrg	execbuf.num_cliprects = num_cliprects;
230122944501Smrg	execbuf.DR1 = 0;
230222944501Smrg	execbuf.DR4 = DR4;
230320131375Smrg	execbuf.flags = flags;
230420131375Smrg	if (ctx == NULL)
230520131375Smrg		i915_execbuffer2_set_context_id(execbuf, 0);
230620131375Smrg	else
230720131375Smrg		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
230822944501Smrg	execbuf.rsvd2 = 0;
230922944501Smrg
231020131375Smrg	aub_exec(bo, flags, used);
231120131375Smrg
231220131375Smrg	if (bufmgr_gem->no_exec)
231320131375Smrg		goto skip_execution;
231420131375Smrg
23156d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
23166d98c517Smrg		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
23176d98c517Smrg		       &execbuf);
231822944501Smrg	if (ret != 0) {
231922944501Smrg		ret = -errno;
23206d98c517Smrg		if (ret == -ENOSPC) {
23219ce4edccSmrg			DBG("Execbuffer fails to pin. "
23229ce4edccSmrg			    "Estimate: %u. Actual: %u. Available: %u\n",
23239ce4edccSmrg			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
23249ce4edccSmrg							       bufmgr_gem->exec_count),
23259ce4edccSmrg			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
23269ce4edccSmrg							      bufmgr_gem->exec_count),
23279ce4edccSmrg			    (unsigned int) bufmgr_gem->gtt_size);
232822944501Smrg		}
232922944501Smrg	}
233022944501Smrg	drm_intel_update_buffer_offsets2(bufmgr_gem);
233122944501Smrg
233220131375Smrgskip_execution:
233322944501Smrg	if (bufmgr_gem->bufmgr.debug)
233422944501Smrg		drm_intel_gem_dump_validation_list(bufmgr_gem);
233522944501Smrg
233622944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
233722944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
233822944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
233922944501Smrg
234020131375Smrg		bo_gem->idle = false;
234120131375Smrg
234222944501Smrg		/* Disconnect the buffer from the validate list */
234322944501Smrg		bo_gem->validate_index = -1;
234422944501Smrg		bufmgr_gem->exec_bos[i] = NULL;
234522944501Smrg	}
234622944501Smrg	bufmgr_gem->exec_count = 0;
234722944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
234822944501Smrg
234922944501Smrg	return ret;
235022944501Smrg}
235122944501Smrg
2352aaba2545Smrgstatic int
2353aaba2545Smrgdrm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2354aaba2545Smrg		       drm_clip_rect_t *cliprects, int num_cliprects,
2355aaba2545Smrg		       int DR4)
2356aaba2545Smrg{
235720131375Smrg	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
235820131375Smrg			I915_EXEC_RENDER);
235920131375Smrg}
236020131375Smrg
236120131375Smrgstatic int
236220131375Smrgdrm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
236320131375Smrg			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
236420131375Smrg			unsigned int flags)
236520131375Smrg{
236620131375Smrg	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
236720131375Smrg			flags);
236820131375Smrg}
236920131375Smrg
237020131375Smrgint
237120131375Smrgdrm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
237220131375Smrg			      int used, unsigned int flags)
237320131375Smrg{
237420131375Smrg	return do_exec2(bo, used, ctx, NULL, 0, 0, flags);
2375aaba2545Smrg}
2376aaba2545Smrg
237722944501Smrgstatic int
237822944501Smrgdrm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
237922944501Smrg{
238022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
238122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
238222944501Smrg	struct drm_i915_gem_pin pin;
238322944501Smrg	int ret;
238422944501Smrg
238520131375Smrg	VG_CLEAR(pin);
238622944501Smrg	pin.handle = bo_gem->gem_handle;
238722944501Smrg	pin.alignment = alignment;
238822944501Smrg
23896d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
23906d98c517Smrg		       DRM_IOCTL_I915_GEM_PIN,
23916d98c517Smrg		       &pin);
239222944501Smrg	if (ret != 0)
239322944501Smrg		return -errno;
239422944501Smrg
239520131375Smrg	bo->offset64 = pin.offset;
239622944501Smrg	bo->offset = pin.offset;
239722944501Smrg	return 0;
239822944501Smrg}
239922944501Smrg
240022944501Smrgstatic int
240122944501Smrgdrm_intel_gem_bo_unpin(drm_intel_bo *bo)
240222944501Smrg{
240322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
240422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
240522944501Smrg	struct drm_i915_gem_unpin unpin;
240622944501Smrg	int ret;
240722944501Smrg
240820131375Smrg	VG_CLEAR(unpin);
240922944501Smrg	unpin.handle = bo_gem->gem_handle;
241022944501Smrg
24116d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
241222944501Smrg	if (ret != 0)
241322944501Smrg		return -errno;
241422944501Smrg
241522944501Smrg	return 0;
241622944501Smrg}
241722944501Smrg
241822944501Smrgstatic int
24196d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
24206d98c517Smrg				     uint32_t tiling_mode,
24216d98c517Smrg				     uint32_t stride)
242222944501Smrg{
242322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
242422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
242522944501Smrg	struct drm_i915_gem_set_tiling set_tiling;
242622944501Smrg	int ret;
242722944501Smrg
24286d98c517Smrg	if (bo_gem->global_name == 0 &&
24296d98c517Smrg	    tiling_mode == bo_gem->tiling_mode &&
24306d98c517Smrg	    stride == bo_gem->stride)
243122944501Smrg		return 0;
243222944501Smrg
243322944501Smrg	memset(&set_tiling, 0, sizeof(set_tiling));
243422944501Smrg	do {
24356d98c517Smrg		/* set_tiling is slightly broken and overwrites the
24366d98c517Smrg		 * input on the error path, so we have to open code
24376d98c517Smrg		 * rmIoctl.
24386d98c517Smrg		 */
24396d98c517Smrg		set_tiling.handle = bo_gem->gem_handle;
24406d98c517Smrg		set_tiling.tiling_mode = tiling_mode;
244122944501Smrg		set_tiling.stride = stride;
244222944501Smrg
244322944501Smrg		ret = ioctl(bufmgr_gem->fd,
244422944501Smrg			    DRM_IOCTL_I915_GEM_SET_TILING,
244522944501Smrg			    &set_tiling);
24466d98c517Smrg	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
24476d98c517Smrg	if (ret == -1)
24486d98c517Smrg		return -errno;
24496d98c517Smrg
24506d98c517Smrg	bo_gem->tiling_mode = set_tiling.tiling_mode;
24516d98c517Smrg	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
24526d98c517Smrg	bo_gem->stride = set_tiling.stride;
24536d98c517Smrg	return 0;
24546d98c517Smrg}
24556d98c517Smrg
24566d98c517Smrgstatic int
24576d98c517Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
24586d98c517Smrg			    uint32_t stride)
24596d98c517Smrg{
24606d98c517Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
24616d98c517Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
24626d98c517Smrg	int ret;
24636d98c517Smrg
24646d98c517Smrg	/* Linear buffers have no stride. By ensuring that we only ever use
24656d98c517Smrg	 * stride 0 with linear buffers, we simplify our code.
24666d98c517Smrg	 */
24676d98c517Smrg	if (*tiling_mode == I915_TILING_NONE)
24686d98c517Smrg		stride = 0;
24696d98c517Smrg
24706d98c517Smrg	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
24716d98c517Smrg	if (ret == 0)
2472aaba2545Smrg		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
247322944501Smrg
247422944501Smrg	*tiling_mode = bo_gem->tiling_mode;
2475aaba2545Smrg	return ret;
247622944501Smrg}
247722944501Smrg
247822944501Smrgstatic int
247922944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
248022944501Smrg			    uint32_t * swizzle_mode)
248122944501Smrg{
248222944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
248322944501Smrg
248422944501Smrg	*tiling_mode = bo_gem->tiling_mode;
248522944501Smrg	*swizzle_mode = bo_gem->swizzle_mode;
248622944501Smrg	return 0;
248722944501Smrg}
248822944501Smrg
248920131375Smrgdrm_intel_bo *
249020131375Smrgdrm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
249120131375Smrg{
249220131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
249320131375Smrg	int ret;
249420131375Smrg	uint32_t handle;
249520131375Smrg	drm_intel_bo_gem *bo_gem;
249620131375Smrg	struct drm_i915_gem_get_tiling get_tiling;
249720131375Smrg	drmMMListHead *list;
249820131375Smrg
249920131375Smrg	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
250020131375Smrg
250120131375Smrg	/*
250220131375Smrg	 * See if the kernel has already returned this buffer to us. Just as
250320131375Smrg	 * for named buffers, we must not create two bo's pointing at the same
250420131375Smrg	 * kernel object
250520131375Smrg	 */
250620131375Smrg	for (list = bufmgr_gem->named.next;
250720131375Smrg	     list != &bufmgr_gem->named;
250820131375Smrg	     list = list->next) {
250920131375Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
251020131375Smrg		if (bo_gem->gem_handle == handle) {
251120131375Smrg			drm_intel_gem_bo_reference(&bo_gem->bo);
251220131375Smrg			return &bo_gem->bo;
251320131375Smrg		}
251420131375Smrg	}
251520131375Smrg
251620131375Smrg	if (ret) {
251720131375Smrg	  fprintf(stderr,"ret is %d %d\n", ret, errno);
251820131375Smrg		return NULL;
251920131375Smrg	}
252020131375Smrg
252120131375Smrg	bo_gem = calloc(1, sizeof(*bo_gem));
252220131375Smrg	if (!bo_gem)
252320131375Smrg		return NULL;
252420131375Smrg
252520131375Smrg	/* Determine size of bo.  The fd-to-handle ioctl really should
252620131375Smrg	 * return the size, but it doesn't.  If we have kernel 3.12 or
252720131375Smrg	 * later, we can lseek on the prime fd to get the size.  Older
252820131375Smrg	 * kernels will just fail, in which case we fall back to the
252920131375Smrg	 * provided (estimated or guess size). */
253020131375Smrg	ret = lseek(prime_fd, 0, SEEK_END);
253120131375Smrg	if (ret != -1)
253220131375Smrg		bo_gem->bo.size = ret;
253320131375Smrg	else
253420131375Smrg		bo_gem->bo.size = size;
253520131375Smrg
253620131375Smrg	bo_gem->bo.handle = handle;
253720131375Smrg	bo_gem->bo.bufmgr = bufmgr;
253820131375Smrg
253920131375Smrg	bo_gem->gem_handle = handle;
254020131375Smrg
254120131375Smrg	atomic_set(&bo_gem->refcount, 1);
254220131375Smrg
254320131375Smrg	bo_gem->name = "prime";
254420131375Smrg	bo_gem->validate_index = -1;
254520131375Smrg	bo_gem->reloc_tree_fences = 0;
254620131375Smrg	bo_gem->used_as_reloc_target = false;
254720131375Smrg	bo_gem->has_error = false;
254820131375Smrg	bo_gem->reusable = false;
254920131375Smrg
255020131375Smrg	DRMINITLISTHEAD(&bo_gem->vma_list);
255120131375Smrg	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
255220131375Smrg
255320131375Smrg	VG_CLEAR(get_tiling);
255420131375Smrg	get_tiling.handle = bo_gem->gem_handle;
255520131375Smrg	ret = drmIoctl(bufmgr_gem->fd,
255620131375Smrg		       DRM_IOCTL_I915_GEM_GET_TILING,
255720131375Smrg		       &get_tiling);
255820131375Smrg	if (ret != 0) {
255920131375Smrg		drm_intel_gem_bo_unreference(&bo_gem->bo);
256020131375Smrg		return NULL;
256120131375Smrg	}
256220131375Smrg	bo_gem->tiling_mode = get_tiling.tiling_mode;
256320131375Smrg	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
256420131375Smrg	/* XXX stride is unknown */
256520131375Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
256620131375Smrg
256720131375Smrg	return &bo_gem->bo;
256820131375Smrg}
256920131375Smrg
257020131375Smrgint
257120131375Smrgdrm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
257220131375Smrg{
257320131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
257420131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
257520131375Smrg
257620131375Smrg        if (DRMLISTEMPTY(&bo_gem->name_list))
257720131375Smrg                DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
257820131375Smrg
257920131375Smrg	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
258020131375Smrg			       DRM_CLOEXEC, prime_fd) != 0)
258120131375Smrg		return -errno;
258220131375Smrg
258320131375Smrg	bo_gem->reusable = false;
258420131375Smrg
258520131375Smrg	return 0;
258620131375Smrg}
258720131375Smrg
258822944501Smrgstatic int
258922944501Smrgdrm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
259022944501Smrg{
259122944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
259222944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
259322944501Smrg	int ret;
259422944501Smrg
259522944501Smrg	if (!bo_gem->global_name) {
259620131375Smrg		struct drm_gem_flink flink;
259720131375Smrg
259820131375Smrg		VG_CLEAR(flink);
259922944501Smrg		flink.handle = bo_gem->gem_handle;
260022944501Smrg
26016d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
260222944501Smrg		if (ret != 0)
260322944501Smrg			return -errno;
260420131375Smrg
260522944501Smrg		bo_gem->global_name = flink.name;
260620131375Smrg		bo_gem->reusable = false;
260720131375Smrg
260820131375Smrg                if (DRMLISTEMPTY(&bo_gem->name_list))
260920131375Smrg                        DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
261022944501Smrg	}
261122944501Smrg
261222944501Smrg	*name = bo_gem->global_name;
261322944501Smrg	return 0;
261422944501Smrg}
261522944501Smrg
261622944501Smrg/**
261722944501Smrg * Enables unlimited caching of buffer objects for reuse.
261822944501Smrg *
261922944501Smrg * This is potentially very memory expensive, as the cache at each bucket
262022944501Smrg * size is only bounded by how many buffers of that size we've managed to have
262122944501Smrg * in flight at once.
262222944501Smrg */
262322944501Smrgvoid
262422944501Smrgdrm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
262522944501Smrg{
262622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
262722944501Smrg
262820131375Smrg	bufmgr_gem->bo_reuse = true;
262922944501Smrg}
263022944501Smrg
263122944501Smrg/**
263222944501Smrg * Enable use of fenced reloc type.
263322944501Smrg *
263422944501Smrg * New code should enable this to avoid unnecessary fence register
263522944501Smrg * allocation.  If this option is not enabled, all relocs will have fence
263622944501Smrg * register allocated.
263722944501Smrg */
263822944501Smrgvoid
263922944501Smrgdrm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
264022944501Smrg{
264122944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
264222944501Smrg
264322944501Smrg	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
264420131375Smrg		bufmgr_gem->fenced_relocs = true;
264522944501Smrg}
264622944501Smrg
264722944501Smrg/**
264822944501Smrg * Return the additional aperture space required by the tree of buffer objects
264922944501Smrg * rooted at bo.
265022944501Smrg */
265122944501Smrgstatic int
265222944501Smrgdrm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
265322944501Smrg{
265422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
265522944501Smrg	int i;
265622944501Smrg	int total = 0;
265722944501Smrg
265822944501Smrg	if (bo == NULL || bo_gem->included_in_check_aperture)
265922944501Smrg		return 0;
266022944501Smrg
266122944501Smrg	total += bo->size;
266220131375Smrg	bo_gem->included_in_check_aperture = true;
266322944501Smrg
266422944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++)
266522944501Smrg		total +=
266622944501Smrg		    drm_intel_gem_bo_get_aperture_space(bo_gem->
266722944501Smrg							reloc_target_info[i].bo);
266822944501Smrg
266922944501Smrg	return total;
267022944501Smrg}
267122944501Smrg
267222944501Smrg/**
267322944501Smrg * Count the number of buffers in this list that need a fence reg
267422944501Smrg *
267522944501Smrg * If the count is greater than the number of available regs, we'll have
267622944501Smrg * to ask the caller to resubmit a batch with fewer tiled buffers.
267722944501Smrg *
267822944501Smrg * This function over-counts if the same buffer is used multiple times.
267922944501Smrg */
268022944501Smrgstatic unsigned int
268122944501Smrgdrm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
268222944501Smrg{
268322944501Smrg	int i;
268422944501Smrg	unsigned int total = 0;
268522944501Smrg
268622944501Smrg	for (i = 0; i < count; i++) {
268722944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
268822944501Smrg
268922944501Smrg		if (bo_gem == NULL)
269022944501Smrg			continue;
269122944501Smrg
269222944501Smrg		total += bo_gem->reloc_tree_fences;
269322944501Smrg	}
269422944501Smrg	return total;
269522944501Smrg}
269622944501Smrg
269722944501Smrg/**
269822944501Smrg * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
269922944501Smrg * for the next drm_intel_bufmgr_check_aperture_space() call.
270022944501Smrg */
270122944501Smrgstatic void
270222944501Smrgdrm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
270322944501Smrg{
270422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
270522944501Smrg	int i;
270622944501Smrg
270722944501Smrg	if (bo == NULL || !bo_gem->included_in_check_aperture)
270822944501Smrg		return;
270922944501Smrg
271020131375Smrg	bo_gem->included_in_check_aperture = false;
271122944501Smrg
271222944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++)
271322944501Smrg		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
271422944501Smrg							   reloc_target_info[i].bo);
271522944501Smrg}
271622944501Smrg
271722944501Smrg/**
271822944501Smrg * Return a conservative estimate for the amount of aperture required
271922944501Smrg * for a collection of buffers. This may double-count some buffers.
272022944501Smrg */
272122944501Smrgstatic unsigned int
272222944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
272322944501Smrg{
272422944501Smrg	int i;
272522944501Smrg	unsigned int total = 0;
272622944501Smrg
272722944501Smrg	for (i = 0; i < count; i++) {
272822944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
272922944501Smrg		if (bo_gem != NULL)
273022944501Smrg			total += bo_gem->reloc_tree_size;
273122944501Smrg	}
273222944501Smrg	return total;
273322944501Smrg}
273422944501Smrg
273522944501Smrg/**
273622944501Smrg * Return the amount of aperture needed for a collection of buffers.
273722944501Smrg * This avoids double counting any buffers, at the cost of looking
273822944501Smrg * at every buffer in the set.
273922944501Smrg */
274022944501Smrgstatic unsigned int
274122944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
274222944501Smrg{
274322944501Smrg	int i;
274422944501Smrg	unsigned int total = 0;
274522944501Smrg
274622944501Smrg	for (i = 0; i < count; i++) {
274722944501Smrg		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
274822944501Smrg		/* For the first buffer object in the array, we get an
274922944501Smrg		 * accurate count back for its reloc_tree size (since nothing
275022944501Smrg		 * had been flagged as being counted yet).  We can save that
275122944501Smrg		 * value out as a more conservative reloc_tree_size that
275222944501Smrg		 * avoids double-counting target buffers.  Since the first
275322944501Smrg		 * buffer happens to usually be the batch buffer in our
275422944501Smrg		 * callers, this can pull us back from doing the tree
275522944501Smrg		 * walk on every new batch emit.
275622944501Smrg		 */
275722944501Smrg		if (i == 0) {
275822944501Smrg			drm_intel_bo_gem *bo_gem =
275922944501Smrg			    (drm_intel_bo_gem *) bo_array[i];
276022944501Smrg			bo_gem->reloc_tree_size = total;
276122944501Smrg		}
276222944501Smrg	}
276322944501Smrg
276422944501Smrg	for (i = 0; i < count; i++)
276522944501Smrg		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
276622944501Smrg	return total;
276722944501Smrg}
276822944501Smrg
276922944501Smrg/**
277022944501Smrg * Return -1 if the batchbuffer should be flushed before attempting to
277122944501Smrg * emit rendering referencing the buffers pointed to by bo_array.
277222944501Smrg *
277322944501Smrg * This is required because if we try to emit a batchbuffer with relocations
277422944501Smrg * to a tree of buffers that won't simultaneously fit in the aperture,
277522944501Smrg * the rendering will return an error at a point where the software is not
277622944501Smrg * prepared to recover from it.
277722944501Smrg *
277822944501Smrg * However, we also want to emit the batchbuffer significantly before we reach
277922944501Smrg * the limit, as a series of batchbuffers each of which references buffers
278022944501Smrg * covering almost all of the aperture means that at each emit we end up
278122944501Smrg * waiting to evict a buffer from the last rendering, and we get synchronous
278222944501Smrg * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
278322944501Smrg * get better parallelism.
278422944501Smrg */
278522944501Smrgstatic int
278622944501Smrgdrm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
278722944501Smrg{
278822944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem =
278922944501Smrg	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
279022944501Smrg	unsigned int total = 0;
279122944501Smrg	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
279222944501Smrg	int total_fences;
279322944501Smrg
279422944501Smrg	/* Check for fence reg constraints if necessary */
279522944501Smrg	if (bufmgr_gem->available_fences) {
279622944501Smrg		total_fences = drm_intel_gem_total_fences(bo_array, count);
279722944501Smrg		if (total_fences > bufmgr_gem->available_fences)
279822944501Smrg			return -ENOSPC;
279922944501Smrg	}
280022944501Smrg
280122944501Smrg	total = drm_intel_gem_estimate_batch_space(bo_array, count);
280222944501Smrg
280322944501Smrg	if (total > threshold)
280422944501Smrg		total = drm_intel_gem_compute_batch_space(bo_array, count);
280522944501Smrg
280622944501Smrg	if (total > threshold) {
280722944501Smrg		DBG("check_space: overflowed available aperture, "
280822944501Smrg		    "%dkb vs %dkb\n",
280922944501Smrg		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
281022944501Smrg		return -ENOSPC;
281122944501Smrg	} else {
281222944501Smrg		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
281322944501Smrg		    (int)bufmgr_gem->gtt_size / 1024);
281422944501Smrg		return 0;
281522944501Smrg	}
281622944501Smrg}
281722944501Smrg
281822944501Smrg/*
281922944501Smrg * Disable buffer reuse for objects which are shared with the kernel
282022944501Smrg * as scanout buffers
282122944501Smrg */
282222944501Smrgstatic int
282322944501Smrgdrm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
282422944501Smrg{
282522944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
282622944501Smrg
282720131375Smrg	bo_gem->reusable = false;
282822944501Smrg	return 0;
282922944501Smrg}
283022944501Smrg
2831aaba2545Smrgstatic int
2832aaba2545Smrgdrm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
2833aaba2545Smrg{
2834aaba2545Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2835aaba2545Smrg
2836aaba2545Smrg	return bo_gem->reusable;
2837aaba2545Smrg}
2838aaba2545Smrg
283922944501Smrgstatic int
284022944501Smrg_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
284122944501Smrg{
284222944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
284322944501Smrg	int i;
284422944501Smrg
284522944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
284622944501Smrg		if (bo_gem->reloc_target_info[i].bo == target_bo)
284722944501Smrg			return 1;
2848aaba2545Smrg		if (bo == bo_gem->reloc_target_info[i].bo)
2849aaba2545Smrg			continue;
285022944501Smrg		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
285122944501Smrg						target_bo))
285222944501Smrg			return 1;
285322944501Smrg	}
285422944501Smrg
285522944501Smrg	return 0;
285622944501Smrg}
285722944501Smrg
285822944501Smrg/** Return true if target_bo is referenced by bo's relocation tree. */
285922944501Smrgstatic int
286022944501Smrgdrm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
286122944501Smrg{
286222944501Smrg	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
286322944501Smrg
286422944501Smrg	if (bo == NULL || target_bo == NULL)
286522944501Smrg		return 0;
286622944501Smrg	if (target_bo_gem->used_as_reloc_target)
286722944501Smrg		return _drm_intel_gem_bo_references(bo, target_bo);
286822944501Smrg	return 0;
286922944501Smrg}
287022944501Smrg
2871aaba2545Smrgstatic void
2872aaba2545Smrgadd_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
2873aaba2545Smrg{
2874aaba2545Smrg	unsigned int i = bufmgr_gem->num_buckets;
2875aaba2545Smrg
2876aaba2545Smrg	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
2877aaba2545Smrg
2878aaba2545Smrg	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
2879aaba2545Smrg	bufmgr_gem->cache_bucket[i].size = size;
2880aaba2545Smrg	bufmgr_gem->num_buckets++;
2881aaba2545Smrg}
2882aaba2545Smrg
2883aaba2545Smrgstatic void
2884aaba2545Smrginit_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
2885aaba2545Smrg{
2886aaba2545Smrg	unsigned long size, cache_max_size = 64 * 1024 * 1024;
2887aaba2545Smrg
2888aaba2545Smrg	/* OK, so power of two buckets was too wasteful of memory.
2889aaba2545Smrg	 * Give 3 other sizes between each power of two, to hopefully
2890aaba2545Smrg	 * cover things accurately enough.  (The alternative is
2891aaba2545Smrg	 * probably to just go for exact matching of sizes, and assume
2892aaba2545Smrg	 * that for things like composited window resize the tiled
2893aaba2545Smrg	 * width/height alignment and rounding of sizes to pages will
2894aaba2545Smrg	 * get us useful cache hit rates anyway)
2895aaba2545Smrg	 */
2896aaba2545Smrg	add_bucket(bufmgr_gem, 4096);
2897aaba2545Smrg	add_bucket(bufmgr_gem, 4096 * 2);
2898aaba2545Smrg	add_bucket(bufmgr_gem, 4096 * 3);
2899aaba2545Smrg
2900aaba2545Smrg	/* Initialize the linked lists for BO reuse cache. */
2901aaba2545Smrg	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2902aaba2545Smrg		add_bucket(bufmgr_gem, size);
2903aaba2545Smrg
2904aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 1 / 4);
2905aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 2 / 4);
2906aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 3 / 4);
2907aaba2545Smrg	}
2908aaba2545Smrg}
2909aaba2545Smrg
291020131375Smrgvoid
291120131375Smrgdrm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
291220131375Smrg{
291320131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
291420131375Smrg
291520131375Smrg	bufmgr_gem->vma_max = limit;
291620131375Smrg
291720131375Smrg	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
291820131375Smrg}
291920131375Smrg
292020131375Smrg/**
292120131375Smrg * Get the PCI ID for the device.  This can be overridden by setting the
292220131375Smrg * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
292320131375Smrg */
292420131375Smrgstatic int
292520131375Smrgget_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
292620131375Smrg{
292720131375Smrg	char *devid_override;
292820131375Smrg	int devid;
292920131375Smrg	int ret;
293020131375Smrg	drm_i915_getparam_t gp;
293120131375Smrg
293220131375Smrg	if (geteuid() == getuid()) {
293320131375Smrg		devid_override = getenv("INTEL_DEVID_OVERRIDE");
293420131375Smrg		if (devid_override) {
293520131375Smrg			bufmgr_gem->no_exec = true;
293620131375Smrg			return strtod(devid_override, NULL);
293720131375Smrg		}
293820131375Smrg	}
293920131375Smrg
294020131375Smrg	VG_CLEAR(devid);
294120131375Smrg	VG_CLEAR(gp);
294220131375Smrg	gp.param = I915_PARAM_CHIPSET_ID;
294320131375Smrg	gp.value = &devid;
294420131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
294520131375Smrg	if (ret) {
294620131375Smrg		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
294720131375Smrg		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
294820131375Smrg	}
294920131375Smrg	return devid;
295020131375Smrg}
295120131375Smrg
295220131375Smrgint
295320131375Smrgdrm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
295420131375Smrg{
295520131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
295620131375Smrg
295720131375Smrg	return bufmgr_gem->pci_device;
295820131375Smrg}
295920131375Smrg
296020131375Smrg/**
296120131375Smrg * Sets the AUB filename.
296220131375Smrg *
296320131375Smrg * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
296420131375Smrg * for it to have any effect.
296520131375Smrg */
296620131375Smrgvoid
296720131375Smrgdrm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
296820131375Smrg				      const char *filename)
296920131375Smrg{
297020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
297120131375Smrg
297220131375Smrg	free(bufmgr_gem->aub_filename);
297320131375Smrg	if (filename)
297420131375Smrg		bufmgr_gem->aub_filename = strdup(filename);
297520131375Smrg}
297620131375Smrg
297720131375Smrg/**
297820131375Smrg * Sets up AUB dumping.
297920131375Smrg *
298020131375Smrg * This is a trace file format that can be used with the simulator.
298120131375Smrg * Packets are emitted in a format somewhat like GPU command packets.
298220131375Smrg * You can set up a GTT and upload your objects into the referenced
298320131375Smrg * space, then send off batchbuffers and get BMPs out the other end.
298420131375Smrg */
298520131375Smrgvoid
298620131375Smrgdrm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
298720131375Smrg{
298820131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
298920131375Smrg	int entry = 0x200003;
299020131375Smrg	int i;
299120131375Smrg	int gtt_size = 0x10000;
299220131375Smrg	const char *filename;
299320131375Smrg
299420131375Smrg	if (!enable) {
299520131375Smrg		if (bufmgr_gem->aub_file) {
299620131375Smrg			fclose(bufmgr_gem->aub_file);
299720131375Smrg			bufmgr_gem->aub_file = NULL;
299820131375Smrg		}
299920131375Smrg		return;
300020131375Smrg	}
300120131375Smrg
300220131375Smrg	if (geteuid() != getuid())
300320131375Smrg		return;
300420131375Smrg
300520131375Smrg	if (bufmgr_gem->aub_filename)
300620131375Smrg		filename = bufmgr_gem->aub_filename;
300720131375Smrg	else
300820131375Smrg		filename = "intel.aub";
300920131375Smrg	bufmgr_gem->aub_file = fopen(filename, "w+");
301020131375Smrg	if (!bufmgr_gem->aub_file)
301120131375Smrg		return;
301220131375Smrg
301320131375Smrg	/* Start allocating objects from just after the GTT. */
301420131375Smrg	bufmgr_gem->aub_offset = gtt_size;
301520131375Smrg
301620131375Smrg	/* Start with a (required) version packet. */
301720131375Smrg	aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
301820131375Smrg	aub_out(bufmgr_gem,
301920131375Smrg		(4 << AUB_HEADER_MAJOR_SHIFT) |
302020131375Smrg		(0 << AUB_HEADER_MINOR_SHIFT));
302120131375Smrg	for (i = 0; i < 8; i++) {
302220131375Smrg		aub_out(bufmgr_gem, 0); /* app name */
302320131375Smrg	}
302420131375Smrg	aub_out(bufmgr_gem, 0); /* timestamp */
302520131375Smrg	aub_out(bufmgr_gem, 0); /* timestamp */
302620131375Smrg	aub_out(bufmgr_gem, 0); /* comment len */
302720131375Smrg
302820131375Smrg	/* Set up the GTT. The max we can handle is 256M */
302920131375Smrg	aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
303020131375Smrg	aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE);
303120131375Smrg	aub_out(bufmgr_gem, 0); /* subtype */
303220131375Smrg	aub_out(bufmgr_gem, 0); /* offset */
303320131375Smrg	aub_out(bufmgr_gem, gtt_size); /* size */
303420131375Smrg	if (bufmgr_gem->gen >= 8)
303520131375Smrg		aub_out(bufmgr_gem, 0);
303620131375Smrg	for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
303720131375Smrg		aub_out(bufmgr_gem, entry);
303820131375Smrg	}
303920131375Smrg}
304020131375Smrg
304120131375Smrgdrm_intel_context *
304220131375Smrgdrm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
304320131375Smrg{
304420131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
304520131375Smrg	struct drm_i915_gem_context_create create;
304620131375Smrg	drm_intel_context *context = NULL;
304720131375Smrg	int ret;
304820131375Smrg
304920131375Smrg	context = calloc(1, sizeof(*context));
305020131375Smrg	if (!context)
305120131375Smrg		return NULL;
305220131375Smrg
305320131375Smrg	VG_CLEAR(create);
305420131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
305520131375Smrg	if (ret != 0) {
305620131375Smrg		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
305720131375Smrg		    strerror(errno));
305820131375Smrg		free(context);
305920131375Smrg		return NULL;
306020131375Smrg	}
306120131375Smrg
306220131375Smrg	context->ctx_id = create.ctx_id;
306320131375Smrg	context->bufmgr = bufmgr;
306420131375Smrg
306520131375Smrg	return context;
306620131375Smrg}
306720131375Smrg
306820131375Smrgvoid
306920131375Smrgdrm_intel_gem_context_destroy(drm_intel_context *ctx)
307020131375Smrg{
307120131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
307220131375Smrg	struct drm_i915_gem_context_destroy destroy;
307320131375Smrg	int ret;
307420131375Smrg
307520131375Smrg	if (ctx == NULL)
307620131375Smrg		return;
307720131375Smrg
307820131375Smrg	VG_CLEAR(destroy);
307920131375Smrg
308020131375Smrg	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
308120131375Smrg	destroy.ctx_id = ctx->ctx_id;
308220131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
308320131375Smrg		       &destroy);
308420131375Smrg	if (ret != 0)
308520131375Smrg		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
308620131375Smrg			strerror(errno));
308720131375Smrg
308820131375Smrg	free(ctx);
308920131375Smrg}
309020131375Smrg
309120131375Smrgint
309220131375Smrgdrm_intel_get_reset_stats(drm_intel_context *ctx,
309320131375Smrg			  uint32_t *reset_count,
309420131375Smrg			  uint32_t *active,
309520131375Smrg			  uint32_t *pending)
309620131375Smrg{
309720131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
309820131375Smrg	struct drm_i915_reset_stats stats;
309920131375Smrg	int ret;
310020131375Smrg
310120131375Smrg	if (ctx == NULL)
310220131375Smrg		return -EINVAL;
310320131375Smrg
310420131375Smrg	memset(&stats, 0, sizeof(stats));
310520131375Smrg
310620131375Smrg	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
310720131375Smrg	stats.ctx_id = ctx->ctx_id;
310820131375Smrg	ret = drmIoctl(bufmgr_gem->fd,
310920131375Smrg		       DRM_IOCTL_I915_GET_RESET_STATS,
311020131375Smrg		       &stats);
311120131375Smrg	if (ret == 0) {
311220131375Smrg		if (reset_count != NULL)
311320131375Smrg			*reset_count = stats.reset_count;
311420131375Smrg
311520131375Smrg		if (active != NULL)
311620131375Smrg			*active = stats.batch_active;
311720131375Smrg
311820131375Smrg		if (pending != NULL)
311920131375Smrg			*pending = stats.batch_pending;
312020131375Smrg	}
312120131375Smrg
312220131375Smrg	return ret;
312320131375Smrg}
312420131375Smrg
312520131375Smrgint
312620131375Smrgdrm_intel_reg_read(drm_intel_bufmgr *bufmgr,
312720131375Smrg		   uint32_t offset,
312820131375Smrg		   uint64_t *result)
312920131375Smrg{
313020131375Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
313120131375Smrg	struct drm_i915_reg_read reg_read;
313220131375Smrg	int ret;
313320131375Smrg
313420131375Smrg	VG_CLEAR(reg_read);
313520131375Smrg	reg_read.offset = offset;
313620131375Smrg
313720131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
313820131375Smrg
313920131375Smrg	*result = reg_read.val;
314020131375Smrg	return ret;
314120131375Smrg}
314220131375Smrg
314320131375Smrg
314420131375Smrg/**
314520131375Smrg * Annotate the given bo for use in aub dumping.
314620131375Smrg *
314720131375Smrg * \param annotations is an array of drm_intel_aub_annotation objects
314820131375Smrg * describing the type of data in various sections of the bo.  Each
314920131375Smrg * element of the array specifies the type and subtype of a section of
315020131375Smrg * the bo, and the past-the-end offset of that section.  The elements
315120131375Smrg * of \c annotations must be sorted so that ending_offset is
315220131375Smrg * increasing.
315320131375Smrg *
315420131375Smrg * \param count is the number of elements in the \c annotations array.
315520131375Smrg * If \c count is zero, then \c annotations will not be dereferenced.
315620131375Smrg *
315720131375Smrg * Annotations are copied into a private data structure, so caller may
315820131375Smrg * re-use the memory pointed to by \c annotations after the call
315920131375Smrg * returns.
316020131375Smrg *
316120131375Smrg * Annotations are stored for the lifetime of the bo; to reset to the
316220131375Smrg * default state (no annotations), call this function with a \c count
316320131375Smrg * of zero.
316420131375Smrg */
316520131375Smrgvoid
316620131375Smrgdrm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
316720131375Smrg					 drm_intel_aub_annotation *annotations,
316820131375Smrg					 unsigned count)
316920131375Smrg{
317020131375Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
317120131375Smrg	unsigned size = sizeof(*annotations) * count;
317220131375Smrg	drm_intel_aub_annotation *new_annotations =
317320131375Smrg		count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL;
317420131375Smrg	if (new_annotations == NULL) {
317520131375Smrg		free(bo_gem->aub_annotations);
317620131375Smrg		bo_gem->aub_annotations = NULL;
317720131375Smrg		bo_gem->aub_annotation_count = 0;
317820131375Smrg		return;
317920131375Smrg	}
318020131375Smrg	memcpy(new_annotations, annotations, size);
318120131375Smrg	bo_gem->aub_annotations = new_annotations;
318220131375Smrg	bo_gem->aub_annotation_count = count;
318320131375Smrg}
318420131375Smrg
318522944501Smrg/**
318622944501Smrg * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
318722944501Smrg * and manage map buffer objections.
318822944501Smrg *
318922944501Smrg * \param fd File descriptor of the opened DRM device.
319022944501Smrg */
319122944501Smrgdrm_intel_bufmgr *
319222944501Smrgdrm_intel_bufmgr_gem_init(int fd, int batch_size)
319322944501Smrg{
319422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
319522944501Smrg	struct drm_i915_gem_get_aperture aperture;
319622944501Smrg	drm_i915_getparam_t gp;
319720131375Smrg	int ret, tmp;
319820131375Smrg	bool exec2 = false;
319922944501Smrg
320022944501Smrg	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
320122944501Smrg	if (bufmgr_gem == NULL)
320222944501Smrg		return NULL;
320322944501Smrg
320422944501Smrg	bufmgr_gem->fd = fd;
320522944501Smrg
320622944501Smrg	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
320722944501Smrg		free(bufmgr_gem);
320822944501Smrg		return NULL;
320922944501Smrg	}
321022944501Smrg
32116d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd,
32126d98c517Smrg		       DRM_IOCTL_I915_GEM_GET_APERTURE,
32136d98c517Smrg		       &aperture);
321422944501Smrg
321522944501Smrg	if (ret == 0)
321622944501Smrg		bufmgr_gem->gtt_size = aperture.aper_available_size;
321722944501Smrg	else {
321822944501Smrg		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
321922944501Smrg			strerror(errno));
322022944501Smrg		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
322122944501Smrg		fprintf(stderr, "Assuming %dkB available aperture size.\n"
322222944501Smrg			"May lead to reduced performance or incorrect "
322322944501Smrg			"rendering.\n",
322422944501Smrg			(int)bufmgr_gem->gtt_size / 1024);
322522944501Smrg	}
322622944501Smrg
322720131375Smrg	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
322822944501Smrg
322920131375Smrg	if (IS_GEN2(bufmgr_gem->pci_device))
323022944501Smrg		bufmgr_gem->gen = 2;
323120131375Smrg	else if (IS_GEN3(bufmgr_gem->pci_device))
323222944501Smrg		bufmgr_gem->gen = 3;
323320131375Smrg	else if (IS_GEN4(bufmgr_gem->pci_device))
323422944501Smrg		bufmgr_gem->gen = 4;
323520131375Smrg	else if (IS_GEN5(bufmgr_gem->pci_device))
323620131375Smrg		bufmgr_gem->gen = 5;
323720131375Smrg	else if (IS_GEN6(bufmgr_gem->pci_device))
323822944501Smrg		bufmgr_gem->gen = 6;
323920131375Smrg	else if (IS_GEN7(bufmgr_gem->pci_device))
324020131375Smrg		bufmgr_gem->gen = 7;
324120131375Smrg	else if (IS_GEN8(bufmgr_gem->pci_device))
324220131375Smrg		bufmgr_gem->gen = 8;
324320131375Smrg	else {
324420131375Smrg		free(bufmgr_gem);
324520131375Smrg		return NULL;
324620131375Smrg	}
324720131375Smrg
324820131375Smrg	if (IS_GEN3(bufmgr_gem->pci_device) &&
324920131375Smrg	    bufmgr_gem->gtt_size > 256*1024*1024) {
325020131375Smrg		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
325120131375Smrg		 * be used for tiled blits. To simplify the accounting, just
325220131375Smrg		 * substract the unmappable part (fixed to 256MB on all known
325320131375Smrg		 * gen3 devices) if the kernel advertises it. */
325420131375Smrg		bufmgr_gem->gtt_size -= 256*1024*1024;
325520131375Smrg	}
325620131375Smrg
325720131375Smrg	VG_CLEAR(gp);
325820131375Smrg	gp.value = &tmp;
325922944501Smrg
326022944501Smrg	gp.param = I915_PARAM_HAS_EXECBUF2;
32616d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
326222944501Smrg	if (!ret)
326320131375Smrg		exec2 = true;
326422944501Smrg
3265aaba2545Smrg	gp.param = I915_PARAM_HAS_BSD;
32666d98c517Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
32679ce4edccSmrg	bufmgr_gem->has_bsd = ret == 0;
32689ce4edccSmrg
32699ce4edccSmrg	gp.param = I915_PARAM_HAS_BLT;
32709ce4edccSmrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
32719ce4edccSmrg	bufmgr_gem->has_blt = ret == 0;
32729ce4edccSmrg
32739ce4edccSmrg	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
32749ce4edccSmrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
32759ce4edccSmrg	bufmgr_gem->has_relaxed_fencing = ret == 0;
3276aaba2545Smrg
327720131375Smrg	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
327820131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
327920131375Smrg	bufmgr_gem->has_wait_timeout = ret == 0;
328020131375Smrg
328120131375Smrg	gp.param = I915_PARAM_HAS_LLC;
328220131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
328320131375Smrg	if (ret != 0) {
328420131375Smrg		/* Kernel does not supports HAS_LLC query, fallback to GPU
328520131375Smrg		 * generation detection and assume that we have LLC on GEN6/7
328620131375Smrg		 */
328720131375Smrg		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
328820131375Smrg				IS_GEN7(bufmgr_gem->pci_device));
328920131375Smrg	} else
329020131375Smrg		bufmgr_gem->has_llc = *gp.value;
329120131375Smrg
329220131375Smrg	gp.param = I915_PARAM_HAS_VEBOX;
329320131375Smrg	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
329420131375Smrg	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
329520131375Smrg
329622944501Smrg	if (bufmgr_gem->gen < 4) {
329722944501Smrg		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
329822944501Smrg		gp.value = &bufmgr_gem->available_fences;
32996d98c517Smrg		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
330022944501Smrg		if (ret) {
330122944501Smrg			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
330222944501Smrg				errno);
330322944501Smrg			fprintf(stderr, "param: %d, val: %d\n", gp.param,
330422944501Smrg				*gp.value);
330522944501Smrg			bufmgr_gem->available_fences = 0;
330622944501Smrg		} else {
330722944501Smrg			/* XXX The kernel reports the total number of fences,
330822944501Smrg			 * including any that may be pinned.
330922944501Smrg			 *
331022944501Smrg			 * We presume that there will be at least one pinned
331122944501Smrg			 * fence for the scanout buffer, but there may be more
331222944501Smrg			 * than one scanout and the user may be manually
331322944501Smrg			 * pinning buffers. Let's move to execbuffer2 and
331422944501Smrg			 * thereby forget the insanity of using fences...
331522944501Smrg			 */
331622944501Smrg			bufmgr_gem->available_fences -= 2;
331722944501Smrg			if (bufmgr_gem->available_fences < 0)
331822944501Smrg				bufmgr_gem->available_fences = 0;
331922944501Smrg		}
332022944501Smrg	}
332122944501Smrg
332222944501Smrg	/* Let's go with one relocation per every 2 dwords (but round down a bit
332322944501Smrg	 * since a power of two will mean an extra page allocation for the reloc
332422944501Smrg	 * buffer).
332522944501Smrg	 *
332622944501Smrg	 * Every 4 was too few for the blender benchmark.
332722944501Smrg	 */
332822944501Smrg	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
332922944501Smrg
333022944501Smrg	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
333122944501Smrg	bufmgr_gem->bufmgr.bo_alloc_for_render =
333222944501Smrg	    drm_intel_gem_bo_alloc_for_render;
333322944501Smrg	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
333422944501Smrg	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
333522944501Smrg	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
333622944501Smrg	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
333722944501Smrg	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
333822944501Smrg	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
333922944501Smrg	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
334022944501Smrg	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
334122944501Smrg	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
334222944501Smrg	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
334322944501Smrg	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
334422944501Smrg	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
334522944501Smrg	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
334622944501Smrg	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
334722944501Smrg	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
334822944501Smrg	/* Use the new one if available */
3349aaba2545Smrg	if (exec2) {
335022944501Smrg		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
33519ce4edccSmrg		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3352aaba2545Smrg	} else
335322944501Smrg		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
335422944501Smrg	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
335522944501Smrg	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
335622944501Smrg	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
335722944501Smrg	bufmgr_gem->bufmgr.debug = 0;
335822944501Smrg	bufmgr_gem->bufmgr.check_aperture_space =
335922944501Smrg	    drm_intel_gem_check_aperture_space;
336022944501Smrg	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3361aaba2545Smrg	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
336222944501Smrg	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
336322944501Smrg	    drm_intel_gem_get_pipe_from_crtc_id;
336422944501Smrg	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
336522944501Smrg
336620131375Smrg	DRMINITLISTHEAD(&bufmgr_gem->named);
3367aaba2545Smrg	init_cache_buckets(bufmgr_gem);
336822944501Smrg
336920131375Smrg	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
337020131375Smrg	bufmgr_gem->vma_max = -1; /* unlimited by default */
337120131375Smrg
337222944501Smrg	return &bufmgr_gem->bufmgr;
337322944501Smrg}
3374