intel_bufmgr_gem.c revision aaba2545
122944501Smrg/**************************************************************************
222944501Smrg *
322944501Smrg * Copyright � 2007 Red Hat Inc.
422944501Smrg * Copyright � 2007 Intel Corporation
522944501Smrg * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
622944501Smrg * All Rights Reserved.
722944501Smrg *
822944501Smrg * Permission is hereby granted, free of charge, to any person obtaining a
922944501Smrg * copy of this software and associated documentation files (the
1022944501Smrg * "Software"), to deal in the Software without restriction, including
1122944501Smrg * without limitation the rights to use, copy, modify, merge, publish,
1222944501Smrg * distribute, sub license, and/or sell copies of the Software, and to
1322944501Smrg * permit persons to whom the Software is furnished to do so, subject to
1422944501Smrg * the following conditions:
1522944501Smrg *
1622944501Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1722944501Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1822944501Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
1922944501Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
2022944501Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
2122944501Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2222944501Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
2322944501Smrg *
2422944501Smrg * The above copyright notice and this permission notice (including the
2522944501Smrg * next paragraph) shall be included in all copies or substantial portions
2622944501Smrg * of the Software.
2722944501Smrg *
2822944501Smrg *
2922944501Smrg **************************************************************************/
3022944501Smrg/*
3122944501Smrg * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
3222944501Smrg *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
3322944501Smrg *	    Eric Anholt <eric@anholt.net>
3422944501Smrg *	    Dave Airlie <airlied@linux.ie>
3522944501Smrg */
3622944501Smrg
3722944501Smrg#ifdef HAVE_CONFIG_H
3822944501Smrg#include "config.h"
3922944501Smrg#endif
4022944501Smrg
4122944501Smrg#include <xf86drm.h>
4222944501Smrg#include <xf86atomic.h>
4322944501Smrg#include <fcntl.h>
4422944501Smrg#include <stdio.h>
4522944501Smrg#include <stdlib.h>
4622944501Smrg#include <string.h>
4722944501Smrg#include <unistd.h>
4822944501Smrg#include <assert.h>
4922944501Smrg#include <pthread.h>
502e6867f6Smrg#include <stddef.h>
5122944501Smrg#include <sys/ioctl.h>
5222944501Smrg#include <sys/mman.h>
5322944501Smrg#include <sys/stat.h>
5422944501Smrg#include <sys/types.h>
5522944501Smrg
5622944501Smrg#include "errno.h"
5722944501Smrg#include "libdrm_lists.h"
5822944501Smrg#include "intel_bufmgr.h"
5922944501Smrg#include "intel_bufmgr_priv.h"
6022944501Smrg#include "intel_chipset.h"
6122944501Smrg#include "string.h"
6222944501Smrg
6322944501Smrg#include "i915_drm.h"
6422944501Smrg
6522944501Smrg#define DBG(...) do {					\
6622944501Smrg	if (bufmgr_gem->bufmgr.debug)			\
6722944501Smrg		fprintf(stderr, __VA_ARGS__);		\
6822944501Smrg} while (0)
6922944501Smrg
70aaba2545Smrg#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
71aaba2545Smrg
7222944501Smrgtypedef struct _drm_intel_bo_gem drm_intel_bo_gem;
7322944501Smrg
7422944501Smrgstruct drm_intel_gem_bo_bucket {
7522944501Smrg	drmMMListHead head;
7622944501Smrg	unsigned long size;
7722944501Smrg};
7822944501Smrg
7922944501Smrgtypedef struct _drm_intel_bufmgr_gem {
8022944501Smrg	drm_intel_bufmgr bufmgr;
8122944501Smrg
8222944501Smrg	int fd;
8322944501Smrg
8422944501Smrg	int max_relocs;
8522944501Smrg
8622944501Smrg	pthread_mutex_t lock;
8722944501Smrg
8822944501Smrg	struct drm_i915_gem_exec_object *exec_objects;
8922944501Smrg	struct drm_i915_gem_exec_object2 *exec2_objects;
9022944501Smrg	drm_intel_bo **exec_bos;
9122944501Smrg	int exec_size;
9222944501Smrg	int exec_count;
9322944501Smrg
9422944501Smrg	/** Array of lists of cached gem objects of power-of-two sizes */
95aaba2545Smrg	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
96aaba2545Smrg	int num_buckets;
9722944501Smrg
9822944501Smrg	uint64_t gtt_size;
9922944501Smrg	int available_fences;
10022944501Smrg	int pci_device;
10122944501Smrg	int gen;
10222944501Smrg	char bo_reuse;
10322944501Smrg	char fenced_relocs;
10422944501Smrg} drm_intel_bufmgr_gem;
10522944501Smrg
10622944501Smrg#define DRM_INTEL_RELOC_FENCE (1<<0)
10722944501Smrg
10822944501Smrgtypedef struct _drm_intel_reloc_target_info {
10922944501Smrg	drm_intel_bo *bo;
11022944501Smrg	int flags;
11122944501Smrg} drm_intel_reloc_target;
11222944501Smrg
11322944501Smrgstruct _drm_intel_bo_gem {
11422944501Smrg	drm_intel_bo bo;
11522944501Smrg
11622944501Smrg	atomic_t refcount;
11722944501Smrg	uint32_t gem_handle;
11822944501Smrg	const char *name;
11922944501Smrg
12022944501Smrg	/**
12122944501Smrg	 * Kenel-assigned global name for this object
12222944501Smrg	 */
12322944501Smrg	unsigned int global_name;
12422944501Smrg
12522944501Smrg	/**
12622944501Smrg	 * Index of the buffer within the validation list while preparing a
12722944501Smrg	 * batchbuffer execution.
12822944501Smrg	 */
12922944501Smrg	int validate_index;
13022944501Smrg
13122944501Smrg	/**
13222944501Smrg	 * Current tiling mode
13322944501Smrg	 */
13422944501Smrg	uint32_t tiling_mode;
13522944501Smrg	uint32_t swizzle_mode;
13622944501Smrg
13722944501Smrg	time_t free_time;
13822944501Smrg
13922944501Smrg	/** Array passed to the DRM containing relocation information. */
14022944501Smrg	struct drm_i915_gem_relocation_entry *relocs;
14122944501Smrg	/**
14222944501Smrg	 * Array of info structs corresponding to relocs[i].target_handle etc
14322944501Smrg	 */
14422944501Smrg	drm_intel_reloc_target *reloc_target_info;
14522944501Smrg	/** Number of entries in relocs */
14622944501Smrg	int reloc_count;
14722944501Smrg	/** Mapped address for the buffer, saved across map/unmap cycles */
14822944501Smrg	void *mem_virtual;
14922944501Smrg	/** GTT virtual address for the buffer, saved across map/unmap cycles */
15022944501Smrg	void *gtt_virtual;
15122944501Smrg
15222944501Smrg	/** BO cache list */
15322944501Smrg	drmMMListHead head;
15422944501Smrg
15522944501Smrg	/**
15622944501Smrg	 * Boolean of whether this BO and its children have been included in
15722944501Smrg	 * the current drm_intel_bufmgr_check_aperture_space() total.
15822944501Smrg	 */
15922944501Smrg	char included_in_check_aperture;
16022944501Smrg
16122944501Smrg	/**
16222944501Smrg	 * Boolean of whether this buffer has been used as a relocation
16322944501Smrg	 * target and had its size accounted for, and thus can't have any
16422944501Smrg	 * further relocations added to it.
16522944501Smrg	 */
16622944501Smrg	char used_as_reloc_target;
16722944501Smrg
16822944501Smrg	/**
16922944501Smrg	 * Boolean of whether we have encountered an error whilst building the relocation tree.
17022944501Smrg	 */
17122944501Smrg	char has_error;
17222944501Smrg
17322944501Smrg	/**
17422944501Smrg	 * Boolean of whether this buffer can be re-used
17522944501Smrg	 */
17622944501Smrg	char reusable;
17722944501Smrg
17822944501Smrg	/**
17922944501Smrg	 * Size in bytes of this buffer and its relocation descendents.
18022944501Smrg	 *
18122944501Smrg	 * Used to avoid costly tree walking in
18222944501Smrg	 * drm_intel_bufmgr_check_aperture in the common case.
18322944501Smrg	 */
18422944501Smrg	int reloc_tree_size;
18522944501Smrg
18622944501Smrg	/**
18722944501Smrg	 * Number of potential fence registers required by this buffer and its
18822944501Smrg	 * relocations.
18922944501Smrg	 */
19022944501Smrg	int reloc_tree_fences;
19122944501Smrg};
19222944501Smrg
19322944501Smrgstatic unsigned int
19422944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
19522944501Smrg
19622944501Smrgstatic unsigned int
19722944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
19822944501Smrg
19922944501Smrgstatic int
20022944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
20122944501Smrg			    uint32_t * swizzle_mode);
20222944501Smrg
20322944501Smrgstatic int
20422944501Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
20522944501Smrg			    uint32_t stride);
20622944501Smrg
20722944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
20822944501Smrg						      time_t time);
20922944501Smrg
21022944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
21122944501Smrg
21222944501Smrgstatic void drm_intel_gem_bo_free(drm_intel_bo *bo);
21322944501Smrg
21422944501Smrgstatic unsigned long
21522944501Smrgdrm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
21622944501Smrg			   uint32_t *tiling_mode)
21722944501Smrg{
21822944501Smrg	unsigned long min_size, max_size;
21922944501Smrg	unsigned long i;
22022944501Smrg
22122944501Smrg	if (*tiling_mode == I915_TILING_NONE)
22222944501Smrg		return size;
22322944501Smrg
22422944501Smrg	/* 965+ just need multiples of page size for tiling */
22522944501Smrg	if (bufmgr_gem->gen >= 4)
22622944501Smrg		return ROUND_UP_TO(size, 4096);
22722944501Smrg
22822944501Smrg	/* Older chips need powers of two, of at least 512k or 1M */
22922944501Smrg	if (bufmgr_gem->gen == 3) {
23022944501Smrg		min_size = 1024*1024;
23122944501Smrg		max_size = 128*1024*1024;
23222944501Smrg	} else {
23322944501Smrg		min_size = 512*1024;
23422944501Smrg		max_size = 64*1024*1024;
23522944501Smrg	}
23622944501Smrg
23722944501Smrg	if (size > max_size) {
23822944501Smrg		*tiling_mode = I915_TILING_NONE;
23922944501Smrg		return size;
24022944501Smrg	}
24122944501Smrg
24222944501Smrg	for (i = min_size; i < size; i <<= 1)
24322944501Smrg		;
24422944501Smrg
24522944501Smrg	return i;
24622944501Smrg}
24722944501Smrg
24822944501Smrg/*
24922944501Smrg * Round a given pitch up to the minimum required for X tiling on a
25022944501Smrg * given chip.  We use 512 as the minimum to allow for a later tiling
25122944501Smrg * change.
25222944501Smrg */
25322944501Smrgstatic unsigned long
25422944501Smrgdrm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
25522944501Smrg			    unsigned long pitch, uint32_t tiling_mode)
25622944501Smrg{
25722944501Smrg	unsigned long tile_width;
25822944501Smrg	unsigned long i;
25922944501Smrg
26022944501Smrg	/* If untiled, then just align it so that we can do rendering
26122944501Smrg	 * to it with the 3D engine.
26222944501Smrg	 */
26322944501Smrg	if (tiling_mode == I915_TILING_NONE)
26422944501Smrg		return ALIGN(pitch, 64);
26522944501Smrg
26622944501Smrg	if (tiling_mode == I915_TILING_X)
26722944501Smrg		tile_width = 512;
26822944501Smrg	else
26922944501Smrg		tile_width = 128;
27022944501Smrg
27122944501Smrg	/* 965 is flexible */
27222944501Smrg	if (bufmgr_gem->gen >= 4)
27322944501Smrg		return ROUND_UP_TO(pitch, tile_width);
27422944501Smrg
27522944501Smrg	/* Pre-965 needs power of two tile width */
27622944501Smrg	for (i = tile_width; i < pitch; i <<= 1)
27722944501Smrg		;
27822944501Smrg
27922944501Smrg	return i;
28022944501Smrg}
28122944501Smrg
28222944501Smrgstatic struct drm_intel_gem_bo_bucket *
28322944501Smrgdrm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
28422944501Smrg				 unsigned long size)
28522944501Smrg{
28622944501Smrg	int i;
28722944501Smrg
288aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
28922944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
29022944501Smrg		    &bufmgr_gem->cache_bucket[i];
29122944501Smrg		if (bucket->size >= size) {
29222944501Smrg			return bucket;
29322944501Smrg		}
29422944501Smrg	}
29522944501Smrg
29622944501Smrg	return NULL;
29722944501Smrg}
29822944501Smrg
29922944501Smrgstatic void
30022944501Smrgdrm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
30122944501Smrg{
30222944501Smrg	int i, j;
30322944501Smrg
30422944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
30522944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
30622944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
30722944501Smrg
30822944501Smrg		if (bo_gem->relocs == NULL) {
30922944501Smrg			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
31022944501Smrg			    bo_gem->name);
31122944501Smrg			continue;
31222944501Smrg		}
31322944501Smrg
31422944501Smrg		for (j = 0; j < bo_gem->reloc_count; j++) {
31522944501Smrg			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
31622944501Smrg			drm_intel_bo_gem *target_gem =
31722944501Smrg			    (drm_intel_bo_gem *) target_bo;
31822944501Smrg
31922944501Smrg			DBG("%2d: %d (%s)@0x%08llx -> "
32022944501Smrg			    "%d (%s)@0x%08lx + 0x%08x\n",
32122944501Smrg			    i,
32222944501Smrg			    bo_gem->gem_handle, bo_gem->name,
32322944501Smrg			    (unsigned long long)bo_gem->relocs[j].offset,
32422944501Smrg			    target_gem->gem_handle,
32522944501Smrg			    target_gem->name,
32622944501Smrg			    target_bo->offset,
32722944501Smrg			    bo_gem->relocs[j].delta);
32822944501Smrg		}
32922944501Smrg	}
33022944501Smrg}
33122944501Smrg
33222944501Smrgstatic inline void
33322944501Smrgdrm_intel_gem_bo_reference(drm_intel_bo *bo)
33422944501Smrg{
33522944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
33622944501Smrg
33722944501Smrg	assert(atomic_read(&bo_gem->refcount) > 0);
33822944501Smrg	atomic_inc(&bo_gem->refcount);
33922944501Smrg}
34022944501Smrg
34122944501Smrg/**
34222944501Smrg * Adds the given buffer to the list of buffers to be validated (moved into the
34322944501Smrg * appropriate memory type) with the next batch submission.
34422944501Smrg *
34522944501Smrg * If a buffer is validated multiple times in a batch submission, it ends up
34622944501Smrg * with the intersection of the memory type flags and the union of the
34722944501Smrg * access flags.
34822944501Smrg */
34922944501Smrgstatic void
35022944501Smrgdrm_intel_add_validate_buffer(drm_intel_bo *bo)
35122944501Smrg{
35222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
35322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
35422944501Smrg	int index;
35522944501Smrg
35622944501Smrg	if (bo_gem->validate_index != -1)
35722944501Smrg		return;
35822944501Smrg
35922944501Smrg	/* Extend the array of validation entries as necessary. */
36022944501Smrg	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
36122944501Smrg		int new_size = bufmgr_gem->exec_size * 2;
36222944501Smrg
36322944501Smrg		if (new_size == 0)
36422944501Smrg			new_size = 5;
36522944501Smrg
36622944501Smrg		bufmgr_gem->exec_objects =
36722944501Smrg		    realloc(bufmgr_gem->exec_objects,
36822944501Smrg			    sizeof(*bufmgr_gem->exec_objects) * new_size);
36922944501Smrg		bufmgr_gem->exec_bos =
37022944501Smrg		    realloc(bufmgr_gem->exec_bos,
37122944501Smrg			    sizeof(*bufmgr_gem->exec_bos) * new_size);
37222944501Smrg		bufmgr_gem->exec_size = new_size;
37322944501Smrg	}
37422944501Smrg
37522944501Smrg	index = bufmgr_gem->exec_count;
37622944501Smrg	bo_gem->validate_index = index;
37722944501Smrg	/* Fill in array entry */
37822944501Smrg	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
37922944501Smrg	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
38022944501Smrg	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
38122944501Smrg	bufmgr_gem->exec_objects[index].alignment = 0;
38222944501Smrg	bufmgr_gem->exec_objects[index].offset = 0;
38322944501Smrg	bufmgr_gem->exec_bos[index] = bo;
38422944501Smrg	bufmgr_gem->exec_count++;
38522944501Smrg}
38622944501Smrg
38722944501Smrgstatic void
38822944501Smrgdrm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
38922944501Smrg{
39022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
39122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
39222944501Smrg	int index;
39322944501Smrg
39422944501Smrg	if (bo_gem->validate_index != -1) {
39522944501Smrg		if (need_fence)
39622944501Smrg			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
39722944501Smrg				EXEC_OBJECT_NEEDS_FENCE;
39822944501Smrg		return;
39922944501Smrg	}
40022944501Smrg
40122944501Smrg	/* Extend the array of validation entries as necessary. */
40222944501Smrg	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
40322944501Smrg		int new_size = bufmgr_gem->exec_size * 2;
40422944501Smrg
40522944501Smrg		if (new_size == 0)
40622944501Smrg			new_size = 5;
40722944501Smrg
40822944501Smrg		bufmgr_gem->exec2_objects =
40922944501Smrg			realloc(bufmgr_gem->exec2_objects,
41022944501Smrg				sizeof(*bufmgr_gem->exec2_objects) * new_size);
41122944501Smrg		bufmgr_gem->exec_bos =
41222944501Smrg			realloc(bufmgr_gem->exec_bos,
41322944501Smrg				sizeof(*bufmgr_gem->exec_bos) * new_size);
41422944501Smrg		bufmgr_gem->exec_size = new_size;
41522944501Smrg	}
41622944501Smrg
41722944501Smrg	index = bufmgr_gem->exec_count;
41822944501Smrg	bo_gem->validate_index = index;
41922944501Smrg	/* Fill in array entry */
42022944501Smrg	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
42122944501Smrg	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
42222944501Smrg	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
42322944501Smrg	bufmgr_gem->exec2_objects[index].alignment = 0;
42422944501Smrg	bufmgr_gem->exec2_objects[index].offset = 0;
42522944501Smrg	bufmgr_gem->exec_bos[index] = bo;
42622944501Smrg	bufmgr_gem->exec2_objects[index].flags = 0;
42722944501Smrg	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
42822944501Smrg	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
42922944501Smrg	if (need_fence) {
43022944501Smrg		bufmgr_gem->exec2_objects[index].flags |=
43122944501Smrg			EXEC_OBJECT_NEEDS_FENCE;
43222944501Smrg	}
43322944501Smrg	bufmgr_gem->exec_count++;
43422944501Smrg}
43522944501Smrg
43622944501Smrg#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
43722944501Smrg	sizeof(uint32_t))
43822944501Smrg
43922944501Smrgstatic void
44022944501Smrgdrm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
44122944501Smrg				      drm_intel_bo_gem *bo_gem)
44222944501Smrg{
44322944501Smrg	int size;
44422944501Smrg
44522944501Smrg	assert(!bo_gem->used_as_reloc_target);
44622944501Smrg
44722944501Smrg	/* The older chipsets are far-less flexible in terms of tiling,
44822944501Smrg	 * and require tiled buffer to be size aligned in the aperture.
44922944501Smrg	 * This means that in the worst possible case we will need a hole
45022944501Smrg	 * twice as large as the object in order for it to fit into the
45122944501Smrg	 * aperture. Optimal packing is for wimps.
45222944501Smrg	 */
45322944501Smrg	size = bo_gem->bo.size;
45422944501Smrg	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE)
45522944501Smrg		size *= 2;
45622944501Smrg
45722944501Smrg	bo_gem->reloc_tree_size = size;
45822944501Smrg}
45922944501Smrg
46022944501Smrgstatic int
46122944501Smrgdrm_intel_setup_reloc_list(drm_intel_bo *bo)
46222944501Smrg{
46322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
46422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
46522944501Smrg	unsigned int max_relocs = bufmgr_gem->max_relocs;
46622944501Smrg
46722944501Smrg	if (bo->size / 4 < max_relocs)
46822944501Smrg		max_relocs = bo->size / 4;
46922944501Smrg
47022944501Smrg	bo_gem->relocs = malloc(max_relocs *
47122944501Smrg				sizeof(struct drm_i915_gem_relocation_entry));
47222944501Smrg	bo_gem->reloc_target_info = malloc(max_relocs *
473aaba2545Smrg					   sizeof(drm_intel_reloc_target));
47422944501Smrg	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
47522944501Smrg		bo_gem->has_error = 1;
47622944501Smrg
47722944501Smrg		free (bo_gem->relocs);
47822944501Smrg		bo_gem->relocs = NULL;
47922944501Smrg
48022944501Smrg		free (bo_gem->reloc_target_info);
48122944501Smrg		bo_gem->reloc_target_info = NULL;
48222944501Smrg
48322944501Smrg		return 1;
48422944501Smrg	}
48522944501Smrg
48622944501Smrg	return 0;
48722944501Smrg}
48822944501Smrg
48922944501Smrgstatic int
49022944501Smrgdrm_intel_gem_bo_busy(drm_intel_bo *bo)
49122944501Smrg{
49222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
49322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
49422944501Smrg	struct drm_i915_gem_busy busy;
49522944501Smrg	int ret;
49622944501Smrg
49722944501Smrg	memset(&busy, 0, sizeof(busy));
49822944501Smrg	busy.handle = bo_gem->gem_handle;
49922944501Smrg
50022944501Smrg	do {
50122944501Smrg		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
50222944501Smrg	} while (ret == -1 && errno == EINTR);
50322944501Smrg
50422944501Smrg	return (ret == 0 && busy.busy);
50522944501Smrg}
50622944501Smrg
50722944501Smrgstatic int
50822944501Smrgdrm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
50922944501Smrg				  drm_intel_bo_gem *bo_gem, int state)
51022944501Smrg{
51122944501Smrg	struct drm_i915_gem_madvise madv;
51222944501Smrg
51322944501Smrg	madv.handle = bo_gem->gem_handle;
51422944501Smrg	madv.madv = state;
51522944501Smrg	madv.retained = 1;
51622944501Smrg	ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
51722944501Smrg
51822944501Smrg	return madv.retained;
51922944501Smrg}
52022944501Smrg
52122944501Smrgstatic int
52222944501Smrgdrm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
52322944501Smrg{
52422944501Smrg	return drm_intel_gem_bo_madvise_internal
52522944501Smrg		((drm_intel_bufmgr_gem *) bo->bufmgr,
52622944501Smrg		 (drm_intel_bo_gem *) bo,
52722944501Smrg		 madv);
52822944501Smrg}
52922944501Smrg
53022944501Smrg/* drop the oldest entries that have been purged by the kernel */
53122944501Smrgstatic void
53222944501Smrgdrm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
53322944501Smrg				    struct drm_intel_gem_bo_bucket *bucket)
53422944501Smrg{
53522944501Smrg	while (!DRMLISTEMPTY(&bucket->head)) {
53622944501Smrg		drm_intel_bo_gem *bo_gem;
53722944501Smrg
53822944501Smrg		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
53922944501Smrg				      bucket->head.next, head);
54022944501Smrg		if (drm_intel_gem_bo_madvise_internal
54122944501Smrg		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
54222944501Smrg			break;
54322944501Smrg
54422944501Smrg		DRMLISTDEL(&bo_gem->head);
54522944501Smrg		drm_intel_gem_bo_free(&bo_gem->bo);
54622944501Smrg	}
54722944501Smrg}
54822944501Smrg
54922944501Smrgstatic drm_intel_bo *
55022944501Smrgdrm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
55122944501Smrg				const char *name,
55222944501Smrg				unsigned long size,
55322944501Smrg				unsigned long flags)
55422944501Smrg{
55522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
55622944501Smrg	drm_intel_bo_gem *bo_gem;
55722944501Smrg	unsigned int page_size = getpagesize();
55822944501Smrg	int ret;
55922944501Smrg	struct drm_intel_gem_bo_bucket *bucket;
56022944501Smrg	int alloc_from_cache;
56122944501Smrg	unsigned long bo_size;
56222944501Smrg	int for_render = 0;
56322944501Smrg
56422944501Smrg	if (flags & BO_ALLOC_FOR_RENDER)
56522944501Smrg		for_render = 1;
56622944501Smrg
56722944501Smrg	/* Round the allocated size up to a power of two number of pages. */
56822944501Smrg	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
56922944501Smrg
57022944501Smrg	/* If we don't have caching at this size, don't actually round the
57122944501Smrg	 * allocation up.
57222944501Smrg	 */
57322944501Smrg	if (bucket == NULL) {
57422944501Smrg		bo_size = size;
57522944501Smrg		if (bo_size < page_size)
57622944501Smrg			bo_size = page_size;
57722944501Smrg	} else {
57822944501Smrg		bo_size = bucket->size;
57922944501Smrg	}
58022944501Smrg
58122944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
58222944501Smrg	/* Get a buffer out of the cache if available */
58322944501Smrgretry:
58422944501Smrg	alloc_from_cache = 0;
58522944501Smrg	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
58622944501Smrg		if (for_render) {
58722944501Smrg			/* Allocate new render-target BOs from the tail (MRU)
58822944501Smrg			 * of the list, as it will likely be hot in the GPU
58922944501Smrg			 * cache and in the aperture for us.
59022944501Smrg			 */
59122944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
59222944501Smrg					      bucket->head.prev, head);
59322944501Smrg			DRMLISTDEL(&bo_gem->head);
59422944501Smrg			alloc_from_cache = 1;
59522944501Smrg		} else {
59622944501Smrg			/* For non-render-target BOs (where we're probably
59722944501Smrg			 * going to map it first thing in order to fill it
59822944501Smrg			 * with data), check if the last BO in the cache is
59922944501Smrg			 * unbusy, and only reuse in that case. Otherwise,
60022944501Smrg			 * allocating a new buffer is probably faster than
60122944501Smrg			 * waiting for the GPU to finish.
60222944501Smrg			 */
60322944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
60422944501Smrg					      bucket->head.next, head);
60522944501Smrg			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
60622944501Smrg				alloc_from_cache = 1;
60722944501Smrg				DRMLISTDEL(&bo_gem->head);
60822944501Smrg			}
60922944501Smrg		}
61022944501Smrg
61122944501Smrg		if (alloc_from_cache) {
61222944501Smrg			if (!drm_intel_gem_bo_madvise_internal
61322944501Smrg			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
61422944501Smrg				drm_intel_gem_bo_free(&bo_gem->bo);
61522944501Smrg				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
61622944501Smrg								    bucket);
61722944501Smrg				goto retry;
61822944501Smrg			}
61922944501Smrg		}
62022944501Smrg	}
62122944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
62222944501Smrg
62322944501Smrg	if (!alloc_from_cache) {
62422944501Smrg		struct drm_i915_gem_create create;
62522944501Smrg
62622944501Smrg		bo_gem = calloc(1, sizeof(*bo_gem));
62722944501Smrg		if (!bo_gem)
62822944501Smrg			return NULL;
62922944501Smrg
63022944501Smrg		bo_gem->bo.size = bo_size;
63122944501Smrg		memset(&create, 0, sizeof(create));
63222944501Smrg		create.size = bo_size;
63322944501Smrg
63422944501Smrg		do {
63522944501Smrg			ret = ioctl(bufmgr_gem->fd,
63622944501Smrg				    DRM_IOCTL_I915_GEM_CREATE,
63722944501Smrg				    &create);
63822944501Smrg		} while (ret == -1 && errno == EINTR);
63922944501Smrg		bo_gem->gem_handle = create.handle;
64022944501Smrg		bo_gem->bo.handle = bo_gem->gem_handle;
64122944501Smrg		if (ret != 0) {
64222944501Smrg			free(bo_gem);
64322944501Smrg			return NULL;
64422944501Smrg		}
64522944501Smrg		bo_gem->bo.bufmgr = bufmgr;
64622944501Smrg	}
64722944501Smrg
64822944501Smrg	bo_gem->name = name;
64922944501Smrg	atomic_set(&bo_gem->refcount, 1);
65022944501Smrg	bo_gem->validate_index = -1;
65122944501Smrg	bo_gem->reloc_tree_fences = 0;
65222944501Smrg	bo_gem->used_as_reloc_target = 0;
65322944501Smrg	bo_gem->has_error = 0;
65422944501Smrg	bo_gem->tiling_mode = I915_TILING_NONE;
65522944501Smrg	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
65622944501Smrg	bo_gem->reusable = 1;
65722944501Smrg
65822944501Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
65922944501Smrg
66022944501Smrg	DBG("bo_create: buf %d (%s) %ldb\n",
66122944501Smrg	    bo_gem->gem_handle, bo_gem->name, size);
66222944501Smrg
66322944501Smrg	return &bo_gem->bo;
66422944501Smrg}
66522944501Smrg
66622944501Smrgstatic drm_intel_bo *
66722944501Smrgdrm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
66822944501Smrg				  const char *name,
66922944501Smrg				  unsigned long size,
67022944501Smrg				  unsigned int alignment)
67122944501Smrg{
67222944501Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
67322944501Smrg					       BO_ALLOC_FOR_RENDER);
67422944501Smrg}
67522944501Smrg
67622944501Smrgstatic drm_intel_bo *
67722944501Smrgdrm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
67822944501Smrg		       const char *name,
67922944501Smrg		       unsigned long size,
68022944501Smrg		       unsigned int alignment)
68122944501Smrg{
68222944501Smrg	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0);
68322944501Smrg}
68422944501Smrg
68522944501Smrgstatic drm_intel_bo *
68622944501Smrgdrm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
68722944501Smrg			     int x, int y, int cpp, uint32_t *tiling_mode,
68822944501Smrg			     unsigned long *pitch, unsigned long flags)
68922944501Smrg{
69022944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
69122944501Smrg	drm_intel_bo *bo;
692aaba2545Smrg	unsigned long size, stride;
693aaba2545Smrg	uint32_t tiling;
69422944501Smrg	int ret;
69522944501Smrg
696aaba2545Smrg	do {
697aaba2545Smrg		unsigned long aligned_y;
698aaba2545Smrg
699aaba2545Smrg		tiling = *tiling_mode;
700aaba2545Smrg
701aaba2545Smrg		/* If we're tiled, our allocations are in 8 or 32-row blocks,
702aaba2545Smrg		 * so failure to align our height means that we won't allocate
703aaba2545Smrg		 * enough pages.
704aaba2545Smrg		 *
705aaba2545Smrg		 * If we're untiled, we still have to align to 2 rows high
706aaba2545Smrg		 * because the data port accesses 2x2 blocks even if the
707aaba2545Smrg		 * bottom row isn't to be rendered, so failure to align means
708aaba2545Smrg		 * we could walk off the end of the GTT and fault.  This is
709aaba2545Smrg		 * documented on 965, and may be the case on older chipsets
710aaba2545Smrg		 * too so we try to be careful.
711aaba2545Smrg		 */
712aaba2545Smrg		aligned_y = y;
713aaba2545Smrg		if (tiling == I915_TILING_NONE)
714aaba2545Smrg			aligned_y = ALIGN(y, 2);
715aaba2545Smrg		else if (tiling == I915_TILING_X)
716aaba2545Smrg			aligned_y = ALIGN(y, 8);
717aaba2545Smrg		else if (tiling == I915_TILING_Y)
718aaba2545Smrg			aligned_y = ALIGN(y, 32);
719aaba2545Smrg
720aaba2545Smrg		stride = x * cpp;
721aaba2545Smrg		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling);
722aaba2545Smrg		size = stride * aligned_y;
723aaba2545Smrg		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
724aaba2545Smrg	} while (*tiling_mode != tiling);
72522944501Smrg
72622944501Smrg	bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags);
72722944501Smrg	if (!bo)
72822944501Smrg		return NULL;
72922944501Smrg
73022944501Smrg	ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride);
73122944501Smrg	if (ret != 0) {
73222944501Smrg		drm_intel_gem_bo_unreference(bo);
73322944501Smrg		return NULL;
73422944501Smrg	}
73522944501Smrg
73622944501Smrg	*pitch = stride;
73722944501Smrg
73822944501Smrg	return bo;
73922944501Smrg}
74022944501Smrg
74122944501Smrg/**
74222944501Smrg * Returns a drm_intel_bo wrapping the given buffer object handle.
74322944501Smrg *
74422944501Smrg * This can be used when one application needs to pass a buffer object
74522944501Smrg * to another.
74622944501Smrg */
74722944501Smrgdrm_intel_bo *
74822944501Smrgdrm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
74922944501Smrg				  const char *name,
75022944501Smrg				  unsigned int handle)
75122944501Smrg{
75222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
75322944501Smrg	drm_intel_bo_gem *bo_gem;
75422944501Smrg	int ret;
75522944501Smrg	struct drm_gem_open open_arg;
75622944501Smrg	struct drm_i915_gem_get_tiling get_tiling;
75722944501Smrg
75822944501Smrg	bo_gem = calloc(1, sizeof(*bo_gem));
75922944501Smrg	if (!bo_gem)
76022944501Smrg		return NULL;
76122944501Smrg
76222944501Smrg	memset(&open_arg, 0, sizeof(open_arg));
76322944501Smrg	open_arg.name = handle;
76422944501Smrg	do {
76522944501Smrg		ret = ioctl(bufmgr_gem->fd,
76622944501Smrg			    DRM_IOCTL_GEM_OPEN,
76722944501Smrg			    &open_arg);
76822944501Smrg	} while (ret == -1 && errno == EINTR);
76922944501Smrg	if (ret != 0) {
77022944501Smrg		fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
77122944501Smrg			name, handle, strerror(errno));
77222944501Smrg		free(bo_gem);
77322944501Smrg		return NULL;
77422944501Smrg	}
77522944501Smrg	bo_gem->bo.size = open_arg.size;
77622944501Smrg	bo_gem->bo.offset = 0;
77722944501Smrg	bo_gem->bo.virtual = NULL;
77822944501Smrg	bo_gem->bo.bufmgr = bufmgr;
77922944501Smrg	bo_gem->name = name;
78022944501Smrg	atomic_set(&bo_gem->refcount, 1);
78122944501Smrg	bo_gem->validate_index = -1;
78222944501Smrg	bo_gem->gem_handle = open_arg.handle;
78322944501Smrg	bo_gem->global_name = handle;
78422944501Smrg	bo_gem->reusable = 0;
78522944501Smrg
78622944501Smrg	memset(&get_tiling, 0, sizeof(get_tiling));
78722944501Smrg	get_tiling.handle = bo_gem->gem_handle;
78822944501Smrg	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
78922944501Smrg	if (ret != 0) {
79022944501Smrg		drm_intel_gem_bo_unreference(&bo_gem->bo);
79122944501Smrg		return NULL;
79222944501Smrg	}
79322944501Smrg	bo_gem->tiling_mode = get_tiling.tiling_mode;
79422944501Smrg	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
79522944501Smrg	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
79622944501Smrg
79722944501Smrg	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
79822944501Smrg
79922944501Smrg	return &bo_gem->bo;
80022944501Smrg}
80122944501Smrg
80222944501Smrgstatic void
80322944501Smrgdrm_intel_gem_bo_free(drm_intel_bo *bo)
80422944501Smrg{
80522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
80622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
80722944501Smrg	struct drm_gem_close close;
80822944501Smrg	int ret;
80922944501Smrg
81022944501Smrg	if (bo_gem->mem_virtual)
81122944501Smrg		munmap(bo_gem->mem_virtual, bo_gem->bo.size);
81222944501Smrg	if (bo_gem->gtt_virtual)
81322944501Smrg		munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
81422944501Smrg
81522944501Smrg	/* Close this object */
81622944501Smrg	memset(&close, 0, sizeof(close));
81722944501Smrg	close.handle = bo_gem->gem_handle;
81822944501Smrg	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
81922944501Smrg	if (ret != 0) {
82022944501Smrg		fprintf(stderr,
82122944501Smrg			"DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
82222944501Smrg			bo_gem->gem_handle, bo_gem->name, strerror(errno));
82322944501Smrg	}
82422944501Smrg	free(bo);
82522944501Smrg}
82622944501Smrg
82722944501Smrg/** Frees all cached buffers significantly older than @time. */
82822944501Smrgstatic void
82922944501Smrgdrm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
83022944501Smrg{
83122944501Smrg	int i;
83222944501Smrg
833aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
83422944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
83522944501Smrg		    &bufmgr_gem->cache_bucket[i];
83622944501Smrg
83722944501Smrg		while (!DRMLISTEMPTY(&bucket->head)) {
83822944501Smrg			drm_intel_bo_gem *bo_gem;
83922944501Smrg
84022944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
84122944501Smrg					      bucket->head.next, head);
84222944501Smrg			if (time - bo_gem->free_time <= 1)
84322944501Smrg				break;
84422944501Smrg
84522944501Smrg			DRMLISTDEL(&bo_gem->head);
84622944501Smrg
84722944501Smrg			drm_intel_gem_bo_free(&bo_gem->bo);
84822944501Smrg		}
84922944501Smrg	}
85022944501Smrg}
85122944501Smrg
85222944501Smrgstatic void
85322944501Smrgdrm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
85422944501Smrg{
85522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
85622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
85722944501Smrg	struct drm_intel_gem_bo_bucket *bucket;
85822944501Smrg	uint32_t tiling_mode;
85922944501Smrg	int i;
86022944501Smrg
86122944501Smrg	/* Unreference all the target buffers */
86222944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
863aaba2545Smrg		if (bo_gem->reloc_target_info[i].bo != bo) {
864aaba2545Smrg			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
865aaba2545Smrg								  reloc_target_info[i].bo,
866aaba2545Smrg								  time);
867aaba2545Smrg		}
86822944501Smrg	}
86922944501Smrg	bo_gem->reloc_count = 0;
87022944501Smrg	bo_gem->used_as_reloc_target = 0;
87122944501Smrg
87222944501Smrg	DBG("bo_unreference final: %d (%s)\n",
87322944501Smrg	    bo_gem->gem_handle, bo_gem->name);
87422944501Smrg
87522944501Smrg	/* release memory associated with this object */
87622944501Smrg	if (bo_gem->reloc_target_info) {
87722944501Smrg		free(bo_gem->reloc_target_info);
87822944501Smrg		bo_gem->reloc_target_info = NULL;
87922944501Smrg	}
88022944501Smrg	if (bo_gem->relocs) {
88122944501Smrg		free(bo_gem->relocs);
88222944501Smrg		bo_gem->relocs = NULL;
88322944501Smrg	}
88422944501Smrg
88522944501Smrg	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
88622944501Smrg	/* Put the buffer into our internal cache for reuse if we can. */
88722944501Smrg	tiling_mode = I915_TILING_NONE;
88822944501Smrg	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
88922944501Smrg	    drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 &&
89022944501Smrg	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
89122944501Smrg					      I915_MADV_DONTNEED)) {
89222944501Smrg		bo_gem->free_time = time;
89322944501Smrg
89422944501Smrg		bo_gem->name = NULL;
89522944501Smrg		bo_gem->validate_index = -1;
89622944501Smrg
89722944501Smrg		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
89822944501Smrg
89922944501Smrg		drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time);
90022944501Smrg	} else {
90122944501Smrg		drm_intel_gem_bo_free(bo);
90222944501Smrg	}
90322944501Smrg}
90422944501Smrg
90522944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
90622944501Smrg						      time_t time)
90722944501Smrg{
90822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
90922944501Smrg
91022944501Smrg	assert(atomic_read(&bo_gem->refcount) > 0);
91122944501Smrg	if (atomic_dec_and_test(&bo_gem->refcount))
91222944501Smrg		drm_intel_gem_bo_unreference_final(bo, time);
91322944501Smrg}
91422944501Smrg
91522944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
91622944501Smrg{
91722944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
91822944501Smrg
91922944501Smrg	assert(atomic_read(&bo_gem->refcount) > 0);
92022944501Smrg	if (atomic_dec_and_test(&bo_gem->refcount)) {
92122944501Smrg		drm_intel_bufmgr_gem *bufmgr_gem =
92222944501Smrg		    (drm_intel_bufmgr_gem *) bo->bufmgr;
92322944501Smrg		struct timespec time;
92422944501Smrg
92522944501Smrg		clock_gettime(CLOCK_MONOTONIC, &time);
92622944501Smrg
92722944501Smrg		pthread_mutex_lock(&bufmgr_gem->lock);
92822944501Smrg		drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
92922944501Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
93022944501Smrg	}
93122944501Smrg}
93222944501Smrg
93322944501Smrgstatic int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
93422944501Smrg{
93522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
93622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
93722944501Smrg	struct drm_i915_gem_set_domain set_domain;
93822944501Smrg	int ret;
93922944501Smrg
94022944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
94122944501Smrg
94222944501Smrg	/* Allow recursive mapping. Mesa may recursively map buffers with
94322944501Smrg	 * nested display loops.
94422944501Smrg	 */
94522944501Smrg	if (!bo_gem->mem_virtual) {
94622944501Smrg		struct drm_i915_gem_mmap mmap_arg;
94722944501Smrg
94822944501Smrg		DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name);
94922944501Smrg
95022944501Smrg		memset(&mmap_arg, 0, sizeof(mmap_arg));
95122944501Smrg		mmap_arg.handle = bo_gem->gem_handle;
95222944501Smrg		mmap_arg.offset = 0;
95322944501Smrg		mmap_arg.size = bo->size;
95422944501Smrg		do {
95522944501Smrg			ret = ioctl(bufmgr_gem->fd,
95622944501Smrg				    DRM_IOCTL_I915_GEM_MMAP,
95722944501Smrg				    &mmap_arg);
95822944501Smrg		} while (ret == -1 && errno == EINTR);
95922944501Smrg		if (ret != 0) {
96022944501Smrg			ret = -errno;
96122944501Smrg			fprintf(stderr,
96222944501Smrg				"%s:%d: Error mapping buffer %d (%s): %s .\n",
96322944501Smrg				__FILE__, __LINE__, bo_gem->gem_handle,
96422944501Smrg				bo_gem->name, strerror(errno));
96522944501Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
96622944501Smrg			return ret;
96722944501Smrg		}
96822944501Smrg		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
96922944501Smrg	}
97022944501Smrg	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
97122944501Smrg	    bo_gem->mem_virtual);
97222944501Smrg	bo->virtual = bo_gem->mem_virtual;
97322944501Smrg
97422944501Smrg	set_domain.handle = bo_gem->gem_handle;
97522944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
97622944501Smrg	if (write_enable)
97722944501Smrg		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
97822944501Smrg	else
97922944501Smrg		set_domain.write_domain = 0;
98022944501Smrg	do {
98122944501Smrg		ret = ioctl(bufmgr_gem->fd,
98222944501Smrg			    DRM_IOCTL_I915_GEM_SET_DOMAIN,
98322944501Smrg			    &set_domain);
98422944501Smrg	} while (ret == -1 && errno == EINTR);
98522944501Smrg	if (ret != 0) {
98622944501Smrg		ret = -errno;
98722944501Smrg		fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n",
98822944501Smrg			__FILE__, __LINE__, bo_gem->gem_handle,
98922944501Smrg			strerror(errno));
99022944501Smrg		pthread_mutex_unlock(&bufmgr_gem->lock);
99122944501Smrg		return ret;
99222944501Smrg	}
99322944501Smrg
99422944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
99522944501Smrg
99622944501Smrg	return 0;
99722944501Smrg}
99822944501Smrg
99922944501Smrgint drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
100022944501Smrg{
100122944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
100222944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
100322944501Smrg	struct drm_i915_gem_set_domain set_domain;
100422944501Smrg	int ret;
100522944501Smrg
100622944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
100722944501Smrg
100822944501Smrg	/* Get a mapping of the buffer if we haven't before. */
100922944501Smrg	if (bo_gem->gtt_virtual == NULL) {
101022944501Smrg		struct drm_i915_gem_mmap_gtt mmap_arg;
101122944501Smrg
101222944501Smrg		DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle,
101322944501Smrg		    bo_gem->name);
101422944501Smrg
101522944501Smrg		memset(&mmap_arg, 0, sizeof(mmap_arg));
101622944501Smrg		mmap_arg.handle = bo_gem->gem_handle;
101722944501Smrg
101822944501Smrg		/* Get the fake offset back... */
101922944501Smrg		do {
102022944501Smrg			ret = ioctl(bufmgr_gem->fd,
102122944501Smrg				    DRM_IOCTL_I915_GEM_MMAP_GTT,
102222944501Smrg				    &mmap_arg);
102322944501Smrg		} while (ret == -1 && errno == EINTR);
102422944501Smrg		if (ret != 0) {
102522944501Smrg			ret = -errno;
102622944501Smrg			fprintf(stderr,
102722944501Smrg				"%s:%d: Error preparing buffer map %d (%s): %s .\n",
102822944501Smrg				__FILE__, __LINE__,
102922944501Smrg				bo_gem->gem_handle, bo_gem->name,
103022944501Smrg				strerror(errno));
103122944501Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
103222944501Smrg			return ret;
103322944501Smrg		}
103422944501Smrg
103522944501Smrg		/* and mmap it */
103622944501Smrg		bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
103722944501Smrg					   MAP_SHARED, bufmgr_gem->fd,
103822944501Smrg					   mmap_arg.offset);
103922944501Smrg		if (bo_gem->gtt_virtual == MAP_FAILED) {
104022944501Smrg			bo_gem->gtt_virtual = NULL;
104122944501Smrg			ret = -errno;
104222944501Smrg			fprintf(stderr,
104322944501Smrg				"%s:%d: Error mapping buffer %d (%s): %s .\n",
104422944501Smrg				__FILE__, __LINE__,
104522944501Smrg				bo_gem->gem_handle, bo_gem->name,
104622944501Smrg				strerror(errno));
104722944501Smrg			pthread_mutex_unlock(&bufmgr_gem->lock);
104822944501Smrg			return ret;
104922944501Smrg		}
105022944501Smrg	}
105122944501Smrg
105222944501Smrg	bo->virtual = bo_gem->gtt_virtual;
105322944501Smrg
105422944501Smrg	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
105522944501Smrg	    bo_gem->gtt_virtual);
105622944501Smrg
105722944501Smrg	/* Now move it to the GTT domain so that the CPU caches are flushed */
105822944501Smrg	set_domain.handle = bo_gem->gem_handle;
105922944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
106022944501Smrg	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
106122944501Smrg	do {
106222944501Smrg		ret = ioctl(bufmgr_gem->fd,
106322944501Smrg			    DRM_IOCTL_I915_GEM_SET_DOMAIN,
106422944501Smrg			    &set_domain);
106522944501Smrg	} while (ret == -1 && errno == EINTR);
106622944501Smrg
106722944501Smrg	if (ret != 0) {
106822944501Smrg		ret = -errno;
106922944501Smrg		fprintf(stderr, "%s:%d: Error setting domain %d: %s\n",
107022944501Smrg			__FILE__, __LINE__, bo_gem->gem_handle,
107122944501Smrg			strerror(errno));
107222944501Smrg	}
107322944501Smrg
107422944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
107522944501Smrg
107622944501Smrg	return ret;
107722944501Smrg}
107822944501Smrg
107922944501Smrgint drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
108022944501Smrg{
108122944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
108222944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
108322944501Smrg	int ret = 0;
108422944501Smrg
108522944501Smrg	if (bo == NULL)
108622944501Smrg		return 0;
108722944501Smrg
108822944501Smrg	assert(bo_gem->gtt_virtual != NULL);
108922944501Smrg
109022944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
109122944501Smrg	bo->virtual = NULL;
109222944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
109322944501Smrg
109422944501Smrg	return ret;
109522944501Smrg}
109622944501Smrg
109722944501Smrgstatic int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
109822944501Smrg{
109922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
110022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
110122944501Smrg	struct drm_i915_gem_sw_finish sw_finish;
110222944501Smrg	int ret;
110322944501Smrg
110422944501Smrg	if (bo == NULL)
110522944501Smrg		return 0;
110622944501Smrg
110722944501Smrg	assert(bo_gem->mem_virtual != NULL);
110822944501Smrg
110922944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
111022944501Smrg
111122944501Smrg	/* Cause a flush to happen if the buffer's pinned for scanout, so the
111222944501Smrg	 * results show up in a timely manner.
111322944501Smrg	 */
111422944501Smrg	sw_finish.handle = bo_gem->gem_handle;
111522944501Smrg	do {
111622944501Smrg		ret = ioctl(bufmgr_gem->fd,
111722944501Smrg			    DRM_IOCTL_I915_GEM_SW_FINISH,
111822944501Smrg			    &sw_finish);
111922944501Smrg	} while (ret == -1 && errno == EINTR);
112022944501Smrg	ret = ret == -1 ? -errno : 0;
112122944501Smrg
112222944501Smrg	bo->virtual = NULL;
112322944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
112422944501Smrg
112522944501Smrg	return ret;
112622944501Smrg}
112722944501Smrg
112822944501Smrgstatic int
112922944501Smrgdrm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
113022944501Smrg			 unsigned long size, const void *data)
113122944501Smrg{
113222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
113322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
113422944501Smrg	struct drm_i915_gem_pwrite pwrite;
113522944501Smrg	int ret;
113622944501Smrg
113722944501Smrg	memset(&pwrite, 0, sizeof(pwrite));
113822944501Smrg	pwrite.handle = bo_gem->gem_handle;
113922944501Smrg	pwrite.offset = offset;
114022944501Smrg	pwrite.size = size;
114122944501Smrg	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
114222944501Smrg	do {
114322944501Smrg		ret = ioctl(bufmgr_gem->fd,
114422944501Smrg			    DRM_IOCTL_I915_GEM_PWRITE,
114522944501Smrg			    &pwrite);
114622944501Smrg	} while (ret == -1 && errno == EINTR);
114722944501Smrg	if (ret != 0) {
114822944501Smrg		ret = -errno;
114922944501Smrg		fprintf(stderr,
115022944501Smrg			"%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
115122944501Smrg			__FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
115222944501Smrg			(int)size, strerror(errno));
115322944501Smrg	}
115422944501Smrg
115522944501Smrg	return ret;
115622944501Smrg}
115722944501Smrg
115822944501Smrgstatic int
115922944501Smrgdrm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
116022944501Smrg{
116122944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
116222944501Smrg	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
116322944501Smrg	int ret;
116422944501Smrg
116522944501Smrg	get_pipe_from_crtc_id.crtc_id = crtc_id;
116622944501Smrg	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
116722944501Smrg		    &get_pipe_from_crtc_id);
116822944501Smrg	if (ret != 0) {
116922944501Smrg		/* We return -1 here to signal that we don't
117022944501Smrg		 * know which pipe is associated with this crtc.
117122944501Smrg		 * This lets the caller know that this information
117222944501Smrg		 * isn't available; using the wrong pipe for
117322944501Smrg		 * vblank waiting can cause the chipset to lock up
117422944501Smrg		 */
117522944501Smrg		return -1;
117622944501Smrg	}
117722944501Smrg
117822944501Smrg	return get_pipe_from_crtc_id.pipe;
117922944501Smrg}
118022944501Smrg
118122944501Smrgstatic int
118222944501Smrgdrm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
118322944501Smrg			     unsigned long size, void *data)
118422944501Smrg{
118522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
118622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
118722944501Smrg	struct drm_i915_gem_pread pread;
118822944501Smrg	int ret;
118922944501Smrg
119022944501Smrg	memset(&pread, 0, sizeof(pread));
119122944501Smrg	pread.handle = bo_gem->gem_handle;
119222944501Smrg	pread.offset = offset;
119322944501Smrg	pread.size = size;
119422944501Smrg	pread.data_ptr = (uint64_t) (uintptr_t) data;
119522944501Smrg	do {
119622944501Smrg		ret = ioctl(bufmgr_gem->fd,
119722944501Smrg			    DRM_IOCTL_I915_GEM_PREAD,
119822944501Smrg			    &pread);
119922944501Smrg	} while (ret == -1 && errno == EINTR);
120022944501Smrg	if (ret != 0) {
120122944501Smrg		ret = -errno;
120222944501Smrg		fprintf(stderr,
120322944501Smrg			"%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
120422944501Smrg			__FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
120522944501Smrg			(int)size, strerror(errno));
120622944501Smrg	}
120722944501Smrg
120822944501Smrg	return ret;
120922944501Smrg}
121022944501Smrg
121122944501Smrg/** Waits for all GPU rendering to the object to have completed. */
121222944501Smrgstatic void
121322944501Smrgdrm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
121422944501Smrg{
121522944501Smrg	drm_intel_gem_bo_start_gtt_access(bo, 0);
121622944501Smrg}
121722944501Smrg
121822944501Smrg/**
121922944501Smrg * Sets the object to the GTT read and possibly write domain, used by the X
122022944501Smrg * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
122122944501Smrg *
122222944501Smrg * In combination with drm_intel_gem_bo_pin() and manual fence management, we
122322944501Smrg * can do tiled pixmaps this way.
122422944501Smrg */
122522944501Smrgvoid
122622944501Smrgdrm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
122722944501Smrg{
122822944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
122922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
123022944501Smrg	struct drm_i915_gem_set_domain set_domain;
123122944501Smrg	int ret;
123222944501Smrg
123322944501Smrg	set_domain.handle = bo_gem->gem_handle;
123422944501Smrg	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
123522944501Smrg	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
123622944501Smrg	do {
123722944501Smrg		ret = ioctl(bufmgr_gem->fd,
123822944501Smrg			    DRM_IOCTL_I915_GEM_SET_DOMAIN,
123922944501Smrg			    &set_domain);
124022944501Smrg	} while (ret == -1 && errno == EINTR);
124122944501Smrg	if (ret != 0) {
124222944501Smrg		fprintf(stderr,
124322944501Smrg			"%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
124422944501Smrg			__FILE__, __LINE__, bo_gem->gem_handle,
124522944501Smrg			set_domain.read_domains, set_domain.write_domain,
124622944501Smrg			strerror(errno));
124722944501Smrg	}
124822944501Smrg}
124922944501Smrg
125022944501Smrgstatic void
125122944501Smrgdrm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
125222944501Smrg{
125322944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
125422944501Smrg	int i;
125522944501Smrg
125622944501Smrg	free(bufmgr_gem->exec2_objects);
125722944501Smrg	free(bufmgr_gem->exec_objects);
125822944501Smrg	free(bufmgr_gem->exec_bos);
125922944501Smrg
126022944501Smrg	pthread_mutex_destroy(&bufmgr_gem->lock);
126122944501Smrg
126222944501Smrg	/* Free any cached buffer objects we were going to reuse */
1263aaba2545Smrg	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
126422944501Smrg		struct drm_intel_gem_bo_bucket *bucket =
126522944501Smrg		    &bufmgr_gem->cache_bucket[i];
126622944501Smrg		drm_intel_bo_gem *bo_gem;
126722944501Smrg
126822944501Smrg		while (!DRMLISTEMPTY(&bucket->head)) {
126922944501Smrg			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
127022944501Smrg					      bucket->head.next, head);
127122944501Smrg			DRMLISTDEL(&bo_gem->head);
127222944501Smrg
127322944501Smrg			drm_intel_gem_bo_free(&bo_gem->bo);
127422944501Smrg		}
127522944501Smrg	}
127622944501Smrg
127722944501Smrg	free(bufmgr);
127822944501Smrg}
127922944501Smrg
128022944501Smrg/**
128122944501Smrg * Adds the target buffer to the validation list and adds the relocation
128222944501Smrg * to the reloc_buffer's relocation list.
128322944501Smrg *
128422944501Smrg * The relocation entry at the given offset must already contain the
128522944501Smrg * precomputed relocation value, because the kernel will optimize out
128622944501Smrg * the relocation entry write when the buffer hasn't moved from the
128722944501Smrg * last known offset in target_bo.
128822944501Smrg */
128922944501Smrgstatic int
129022944501Smrgdo_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
129122944501Smrg		 drm_intel_bo *target_bo, uint32_t target_offset,
129222944501Smrg		 uint32_t read_domains, uint32_t write_domain,
129322944501Smrg		 int need_fence)
129422944501Smrg{
129522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
129622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
129722944501Smrg	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
129822944501Smrg
129922944501Smrg	if (bo_gem->has_error)
130022944501Smrg		return -ENOMEM;
130122944501Smrg
130222944501Smrg	if (target_bo_gem->has_error) {
130322944501Smrg		bo_gem->has_error = 1;
130422944501Smrg		return -ENOMEM;
130522944501Smrg	}
130622944501Smrg
130722944501Smrg	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
130822944501Smrg		need_fence = 0;
130922944501Smrg
131022944501Smrg	/* We never use HW fences for rendering on 965+ */
131122944501Smrg	if (bufmgr_gem->gen >= 4)
131222944501Smrg		need_fence = 0;
131322944501Smrg
131422944501Smrg	/* Create a new relocation list if needed */
131522944501Smrg	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
131622944501Smrg		return -ENOMEM;
131722944501Smrg
131822944501Smrg	/* Check overflow */
131922944501Smrg	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
132022944501Smrg
132122944501Smrg	/* Check args */
132222944501Smrg	assert(offset <= bo->size - 4);
132322944501Smrg	assert((write_domain & (write_domain - 1)) == 0);
132422944501Smrg
132522944501Smrg	/* Make sure that we're not adding a reloc to something whose size has
132622944501Smrg	 * already been accounted for.
132722944501Smrg	 */
132822944501Smrg	assert(!bo_gem->used_as_reloc_target);
1329aaba2545Smrg	if (target_bo_gem != bo_gem) {
1330aaba2545Smrg		target_bo_gem->used_as_reloc_target = 1;
1331aaba2545Smrg		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1332aaba2545Smrg	}
133322944501Smrg	/* An object needing a fence is a tiled buffer, so it won't have
133422944501Smrg	 * relocs to other buffers.
133522944501Smrg	 */
133622944501Smrg	if (need_fence)
133722944501Smrg		target_bo_gem->reloc_tree_fences = 1;
133822944501Smrg	bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
133922944501Smrg
134022944501Smrg	/* Flag the target to disallow further relocations in it. */
134122944501Smrg
134222944501Smrg	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
134322944501Smrg	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
134422944501Smrg	bo_gem->relocs[bo_gem->reloc_count].target_handle =
134522944501Smrg	    target_bo_gem->gem_handle;
134622944501Smrg	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
134722944501Smrg	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
134822944501Smrg	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
134922944501Smrg
135022944501Smrg	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1351aaba2545Smrg	if (target_bo != bo)
1352aaba2545Smrg		drm_intel_gem_bo_reference(target_bo);
135322944501Smrg	if (need_fence)
135422944501Smrg		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
135522944501Smrg			DRM_INTEL_RELOC_FENCE;
135622944501Smrg	else
135722944501Smrg		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
135822944501Smrg
135922944501Smrg	bo_gem->reloc_count++;
136022944501Smrg
136122944501Smrg	return 0;
136222944501Smrg}
136322944501Smrg
136422944501Smrgstatic int
136522944501Smrgdrm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
136622944501Smrg			    drm_intel_bo *target_bo, uint32_t target_offset,
136722944501Smrg			    uint32_t read_domains, uint32_t write_domain)
136822944501Smrg{
136922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
137022944501Smrg
137122944501Smrg	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
137222944501Smrg				read_domains, write_domain,
137322944501Smrg				!bufmgr_gem->fenced_relocs);
137422944501Smrg}
137522944501Smrg
137622944501Smrgstatic int
137722944501Smrgdrm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
137822944501Smrg				  drm_intel_bo *target_bo,
137922944501Smrg				  uint32_t target_offset,
138022944501Smrg				  uint32_t read_domains, uint32_t write_domain)
138122944501Smrg{
138222944501Smrg	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
138322944501Smrg				read_domains, write_domain, 1);
138422944501Smrg}
138522944501Smrg
138622944501Smrg/**
138722944501Smrg * Walk the tree of relocations rooted at BO and accumulate the list of
138822944501Smrg * validations to be performed and update the relocation buffers with
138922944501Smrg * index values into the validation list.
139022944501Smrg */
139122944501Smrgstatic void
139222944501Smrgdrm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
139322944501Smrg{
139422944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
139522944501Smrg	int i;
139622944501Smrg
139722944501Smrg	if (bo_gem->relocs == NULL)
139822944501Smrg		return;
139922944501Smrg
140022944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
140122944501Smrg		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
140222944501Smrg
1403aaba2545Smrg		if (target_bo == bo)
1404aaba2545Smrg			continue;
1405aaba2545Smrg
140622944501Smrg		/* Continue walking the tree depth-first. */
140722944501Smrg		drm_intel_gem_bo_process_reloc(target_bo);
140822944501Smrg
140922944501Smrg		/* Add the target to the validate list */
141022944501Smrg		drm_intel_add_validate_buffer(target_bo);
141122944501Smrg	}
141222944501Smrg}
141322944501Smrg
141422944501Smrgstatic void
141522944501Smrgdrm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
141622944501Smrg{
141722944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
141822944501Smrg	int i;
141922944501Smrg
142022944501Smrg	if (bo_gem->relocs == NULL)
142122944501Smrg		return;
142222944501Smrg
142322944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
142422944501Smrg		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
142522944501Smrg		int need_fence;
142622944501Smrg
1427aaba2545Smrg		if (target_bo == bo)
1428aaba2545Smrg			continue;
1429aaba2545Smrg
143022944501Smrg		/* Continue walking the tree depth-first. */
143122944501Smrg		drm_intel_gem_bo_process_reloc2(target_bo);
143222944501Smrg
143322944501Smrg		need_fence = (bo_gem->reloc_target_info[i].flags &
143422944501Smrg			      DRM_INTEL_RELOC_FENCE);
143522944501Smrg
143622944501Smrg		/* Add the target to the validate list */
143722944501Smrg		drm_intel_add_validate_buffer2(target_bo, need_fence);
143822944501Smrg	}
143922944501Smrg}
144022944501Smrg
144122944501Smrg
144222944501Smrgstatic void
144322944501Smrgdrm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
144422944501Smrg{
144522944501Smrg	int i;
144622944501Smrg
144722944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
144822944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
144922944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
145022944501Smrg
145122944501Smrg		/* Update the buffer offset */
145222944501Smrg		if (bufmgr_gem->exec_objects[i].offset != bo->offset) {
145322944501Smrg			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
145422944501Smrg			    bo_gem->gem_handle, bo_gem->name, bo->offset,
145522944501Smrg			    (unsigned long long)bufmgr_gem->exec_objects[i].
145622944501Smrg			    offset);
145722944501Smrg			bo->offset = bufmgr_gem->exec_objects[i].offset;
145822944501Smrg		}
145922944501Smrg	}
146022944501Smrg}
146122944501Smrg
146222944501Smrgstatic void
146322944501Smrgdrm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
146422944501Smrg{
146522944501Smrg	int i;
146622944501Smrg
146722944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
146822944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
146922944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
147022944501Smrg
147122944501Smrg		/* Update the buffer offset */
147222944501Smrg		if (bufmgr_gem->exec2_objects[i].offset != bo->offset) {
147322944501Smrg			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
147422944501Smrg			    bo_gem->gem_handle, bo_gem->name, bo->offset,
147522944501Smrg			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
147622944501Smrg			bo->offset = bufmgr_gem->exec2_objects[i].offset;
147722944501Smrg		}
147822944501Smrg	}
147922944501Smrg}
148022944501Smrg
148122944501Smrgstatic int
148222944501Smrgdrm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
148322944501Smrg		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
148422944501Smrg{
148522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
148622944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
148722944501Smrg	struct drm_i915_gem_execbuffer execbuf;
148822944501Smrg	int ret, i;
148922944501Smrg
149022944501Smrg	if (bo_gem->has_error)
149122944501Smrg		return -ENOMEM;
149222944501Smrg
149322944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
149422944501Smrg	/* Update indices and set up the validate list. */
149522944501Smrg	drm_intel_gem_bo_process_reloc(bo);
149622944501Smrg
149722944501Smrg	/* Add the batch buffer to the validation list.  There are no
149822944501Smrg	 * relocations pointing to it.
149922944501Smrg	 */
150022944501Smrg	drm_intel_add_validate_buffer(bo);
150122944501Smrg
150222944501Smrg	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
150322944501Smrg	execbuf.buffer_count = bufmgr_gem->exec_count;
150422944501Smrg	execbuf.batch_start_offset = 0;
150522944501Smrg	execbuf.batch_len = used;
150622944501Smrg	execbuf.cliprects_ptr = (uintptr_t) cliprects;
150722944501Smrg	execbuf.num_cliprects = num_cliprects;
150822944501Smrg	execbuf.DR1 = 0;
150922944501Smrg	execbuf.DR4 = DR4;
151022944501Smrg
151122944501Smrg	do {
151222944501Smrg		ret = ioctl(bufmgr_gem->fd,
151322944501Smrg			    DRM_IOCTL_I915_GEM_EXECBUFFER,
151422944501Smrg			    &execbuf);
151522944501Smrg	} while (ret != 0 && errno == EINTR);
151622944501Smrg
151722944501Smrg	if (ret != 0) {
151822944501Smrg		ret = -errno;
151922944501Smrg		if (errno == ENOSPC) {
152022944501Smrg			fprintf(stderr,
152122944501Smrg				"Execbuffer fails to pin. "
152222944501Smrg				"Estimate: %u. Actual: %u. Available: %u\n",
152322944501Smrg				drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
152422944501Smrg								   bufmgr_gem->
152522944501Smrg								   exec_count),
152622944501Smrg				drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
152722944501Smrg								  bufmgr_gem->
152822944501Smrg								  exec_count),
152922944501Smrg				(unsigned int)bufmgr_gem->gtt_size);
153022944501Smrg		}
153122944501Smrg	}
153222944501Smrg	drm_intel_update_buffer_offsets(bufmgr_gem);
153322944501Smrg
153422944501Smrg	if (bufmgr_gem->bufmgr.debug)
153522944501Smrg		drm_intel_gem_dump_validation_list(bufmgr_gem);
153622944501Smrg
153722944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
153822944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
153922944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
154022944501Smrg
154122944501Smrg		/* Disconnect the buffer from the validate list */
154222944501Smrg		bo_gem->validate_index = -1;
154322944501Smrg		bufmgr_gem->exec_bos[i] = NULL;
154422944501Smrg	}
154522944501Smrg	bufmgr_gem->exec_count = 0;
154622944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
154722944501Smrg
154822944501Smrg	return ret;
154922944501Smrg}
155022944501Smrg
155122944501Smrgstatic int
1552aaba2545Smrgdrm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
1553aaba2545Smrg			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
1554aaba2545Smrg			int ring_flag)
155522944501Smrg{
155622944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
155722944501Smrg	struct drm_i915_gem_execbuffer2 execbuf;
155822944501Smrg	int ret, i;
155922944501Smrg
1560aaba2545Smrg	if ((ring_flag != I915_EXEC_RENDER) && (ring_flag != I915_EXEC_BSD))
1561aaba2545Smrg		return -EINVAL;
1562aaba2545Smrg
156322944501Smrg	pthread_mutex_lock(&bufmgr_gem->lock);
156422944501Smrg	/* Update indices and set up the validate list. */
156522944501Smrg	drm_intel_gem_bo_process_reloc2(bo);
156622944501Smrg
156722944501Smrg	/* Add the batch buffer to the validation list.  There are no relocations
156822944501Smrg	 * pointing to it.
156922944501Smrg	 */
157022944501Smrg	drm_intel_add_validate_buffer2(bo, 0);
157122944501Smrg
157222944501Smrg	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
157322944501Smrg	execbuf.buffer_count = bufmgr_gem->exec_count;
157422944501Smrg	execbuf.batch_start_offset = 0;
157522944501Smrg	execbuf.batch_len = used;
157622944501Smrg	execbuf.cliprects_ptr = (uintptr_t)cliprects;
157722944501Smrg	execbuf.num_cliprects = num_cliprects;
157822944501Smrg	execbuf.DR1 = 0;
157922944501Smrg	execbuf.DR4 = DR4;
1580aaba2545Smrg	execbuf.flags = ring_flag;
158122944501Smrg	execbuf.rsvd1 = 0;
158222944501Smrg	execbuf.rsvd2 = 0;
158322944501Smrg
158422944501Smrg	do {
158522944501Smrg		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
158622944501Smrg			    &execbuf);
158722944501Smrg	} while (ret != 0 && errno == EINTR);
158822944501Smrg
158922944501Smrg	if (ret != 0) {
159022944501Smrg		ret = -errno;
159122944501Smrg		if (ret == -ENOMEM) {
159222944501Smrg			fprintf(stderr,
159322944501Smrg				"Execbuffer fails to pin. "
159422944501Smrg				"Estimate: %u. Actual: %u. Available: %u\n",
159522944501Smrg				drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
159622944501Smrg								   bufmgr_gem->exec_count),
159722944501Smrg				drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
159822944501Smrg								  bufmgr_gem->exec_count),
159922944501Smrg				(unsigned int) bufmgr_gem->gtt_size);
160022944501Smrg		}
160122944501Smrg	}
160222944501Smrg	drm_intel_update_buffer_offsets2(bufmgr_gem);
160322944501Smrg
160422944501Smrg	if (bufmgr_gem->bufmgr.debug)
160522944501Smrg		drm_intel_gem_dump_validation_list(bufmgr_gem);
160622944501Smrg
160722944501Smrg	for (i = 0; i < bufmgr_gem->exec_count; i++) {
160822944501Smrg		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
160922944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
161022944501Smrg
161122944501Smrg		/* Disconnect the buffer from the validate list */
161222944501Smrg		bo_gem->validate_index = -1;
161322944501Smrg		bufmgr_gem->exec_bos[i] = NULL;
161422944501Smrg	}
161522944501Smrg	bufmgr_gem->exec_count = 0;
161622944501Smrg	pthread_mutex_unlock(&bufmgr_gem->lock);
161722944501Smrg
161822944501Smrg	return ret;
161922944501Smrg}
162022944501Smrg
1621aaba2545Smrgstatic int
1622aaba2545Smrgdrm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
1623aaba2545Smrg		       drm_clip_rect_t *cliprects, int num_cliprects,
1624aaba2545Smrg		       int DR4)
1625aaba2545Smrg{
1626aaba2545Smrg	return drm_intel_gem_bo_mrb_exec2(bo, used,
1627aaba2545Smrg					cliprects, num_cliprects, DR4,
1628aaba2545Smrg					I915_EXEC_RENDER);
1629aaba2545Smrg}
1630aaba2545Smrg
163122944501Smrgstatic int
163222944501Smrgdrm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
163322944501Smrg{
163422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
163522944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
163622944501Smrg	struct drm_i915_gem_pin pin;
163722944501Smrg	int ret;
163822944501Smrg
163922944501Smrg	memset(&pin, 0, sizeof(pin));
164022944501Smrg	pin.handle = bo_gem->gem_handle;
164122944501Smrg	pin.alignment = alignment;
164222944501Smrg
164322944501Smrg	do {
164422944501Smrg		ret = ioctl(bufmgr_gem->fd,
164522944501Smrg			    DRM_IOCTL_I915_GEM_PIN,
164622944501Smrg			    &pin);
164722944501Smrg	} while (ret == -1 && errno == EINTR);
164822944501Smrg
164922944501Smrg	if (ret != 0)
165022944501Smrg		return -errno;
165122944501Smrg
165222944501Smrg	bo->offset = pin.offset;
165322944501Smrg	return 0;
165422944501Smrg}
165522944501Smrg
165622944501Smrgstatic int
165722944501Smrgdrm_intel_gem_bo_unpin(drm_intel_bo *bo)
165822944501Smrg{
165922944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
166022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
166122944501Smrg	struct drm_i915_gem_unpin unpin;
166222944501Smrg	int ret;
166322944501Smrg
166422944501Smrg	memset(&unpin, 0, sizeof(unpin));
166522944501Smrg	unpin.handle = bo_gem->gem_handle;
166622944501Smrg
166722944501Smrg	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
166822944501Smrg	if (ret != 0)
166922944501Smrg		return -errno;
167022944501Smrg
167122944501Smrg	return 0;
167222944501Smrg}
167322944501Smrg
167422944501Smrgstatic int
167522944501Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
167622944501Smrg			    uint32_t stride)
167722944501Smrg{
167822944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
167922944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
168022944501Smrg	struct drm_i915_gem_set_tiling set_tiling;
168122944501Smrg	int ret;
168222944501Smrg
168322944501Smrg	if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode)
168422944501Smrg		return 0;
168522944501Smrg
168622944501Smrg	memset(&set_tiling, 0, sizeof(set_tiling));
168722944501Smrg	set_tiling.handle = bo_gem->gem_handle;
168822944501Smrg
168922944501Smrg	do {
169022944501Smrg		set_tiling.tiling_mode = *tiling_mode;
169122944501Smrg		set_tiling.stride = stride;
169222944501Smrg
169322944501Smrg		ret = ioctl(bufmgr_gem->fd,
169422944501Smrg			    DRM_IOCTL_I915_GEM_SET_TILING,
169522944501Smrg			    &set_tiling);
169622944501Smrg	} while (ret == -1 && errno == EINTR);
1697aaba2545Smrg	if (ret == 0) {
1698aaba2545Smrg		bo_gem->tiling_mode = set_tiling.tiling_mode;
1699aaba2545Smrg		bo_gem->swizzle_mode = set_tiling.swizzle_mode;
1700aaba2545Smrg		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
1701aaba2545Smrg	} else
1702aaba2545Smrg		ret = -errno;
170322944501Smrg
170422944501Smrg	*tiling_mode = bo_gem->tiling_mode;
1705aaba2545Smrg	return ret;
170622944501Smrg}
170722944501Smrg
170822944501Smrgstatic int
170922944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
171022944501Smrg			    uint32_t * swizzle_mode)
171122944501Smrg{
171222944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
171322944501Smrg
171422944501Smrg	*tiling_mode = bo_gem->tiling_mode;
171522944501Smrg	*swizzle_mode = bo_gem->swizzle_mode;
171622944501Smrg	return 0;
171722944501Smrg}
171822944501Smrg
171922944501Smrgstatic int
172022944501Smrgdrm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
172122944501Smrg{
172222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
172322944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
172422944501Smrg	struct drm_gem_flink flink;
172522944501Smrg	int ret;
172622944501Smrg
172722944501Smrg	if (!bo_gem->global_name) {
172822944501Smrg		memset(&flink, 0, sizeof(flink));
172922944501Smrg		flink.handle = bo_gem->gem_handle;
173022944501Smrg
173122944501Smrg		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
173222944501Smrg		if (ret != 0)
173322944501Smrg			return -errno;
173422944501Smrg		bo_gem->global_name = flink.name;
173522944501Smrg		bo_gem->reusable = 0;
173622944501Smrg	}
173722944501Smrg
173822944501Smrg	*name = bo_gem->global_name;
173922944501Smrg	return 0;
174022944501Smrg}
174122944501Smrg
174222944501Smrg/**
174322944501Smrg * Enables unlimited caching of buffer objects for reuse.
174422944501Smrg *
174522944501Smrg * This is potentially very memory expensive, as the cache at each bucket
174622944501Smrg * size is only bounded by how many buffers of that size we've managed to have
174722944501Smrg * in flight at once.
174822944501Smrg */
174922944501Smrgvoid
175022944501Smrgdrm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
175122944501Smrg{
175222944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
175322944501Smrg
175422944501Smrg	bufmgr_gem->bo_reuse = 1;
175522944501Smrg}
175622944501Smrg
175722944501Smrg/**
175822944501Smrg * Enable use of fenced reloc type.
175922944501Smrg *
176022944501Smrg * New code should enable this to avoid unnecessary fence register
176122944501Smrg * allocation.  If this option is not enabled, all relocs will have fence
176222944501Smrg * register allocated.
176322944501Smrg */
176422944501Smrgvoid
176522944501Smrgdrm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
176622944501Smrg{
176722944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
176822944501Smrg
176922944501Smrg	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
177022944501Smrg		bufmgr_gem->fenced_relocs = 1;
177122944501Smrg}
177222944501Smrg
177322944501Smrg/**
177422944501Smrg * Return the additional aperture space required by the tree of buffer objects
177522944501Smrg * rooted at bo.
177622944501Smrg */
177722944501Smrgstatic int
177822944501Smrgdrm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
177922944501Smrg{
178022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
178122944501Smrg	int i;
178222944501Smrg	int total = 0;
178322944501Smrg
178422944501Smrg	if (bo == NULL || bo_gem->included_in_check_aperture)
178522944501Smrg		return 0;
178622944501Smrg
178722944501Smrg	total += bo->size;
178822944501Smrg	bo_gem->included_in_check_aperture = 1;
178922944501Smrg
179022944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++)
179122944501Smrg		total +=
179222944501Smrg		    drm_intel_gem_bo_get_aperture_space(bo_gem->
179322944501Smrg							reloc_target_info[i].bo);
179422944501Smrg
179522944501Smrg	return total;
179622944501Smrg}
179722944501Smrg
179822944501Smrg/**
179922944501Smrg * Count the number of buffers in this list that need a fence reg
180022944501Smrg *
180122944501Smrg * If the count is greater than the number of available regs, we'll have
180222944501Smrg * to ask the caller to resubmit a batch with fewer tiled buffers.
180322944501Smrg *
180422944501Smrg * This function over-counts if the same buffer is used multiple times.
180522944501Smrg */
180622944501Smrgstatic unsigned int
180722944501Smrgdrm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
180822944501Smrg{
180922944501Smrg	int i;
181022944501Smrg	unsigned int total = 0;
181122944501Smrg
181222944501Smrg	for (i = 0; i < count; i++) {
181322944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
181422944501Smrg
181522944501Smrg		if (bo_gem == NULL)
181622944501Smrg			continue;
181722944501Smrg
181822944501Smrg		total += bo_gem->reloc_tree_fences;
181922944501Smrg	}
182022944501Smrg	return total;
182122944501Smrg}
182222944501Smrg
182322944501Smrg/**
182422944501Smrg * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
182522944501Smrg * for the next drm_intel_bufmgr_check_aperture_space() call.
182622944501Smrg */
182722944501Smrgstatic void
182822944501Smrgdrm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
182922944501Smrg{
183022944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
183122944501Smrg	int i;
183222944501Smrg
183322944501Smrg	if (bo == NULL || !bo_gem->included_in_check_aperture)
183422944501Smrg		return;
183522944501Smrg
183622944501Smrg	bo_gem->included_in_check_aperture = 0;
183722944501Smrg
183822944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++)
183922944501Smrg		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
184022944501Smrg							   reloc_target_info[i].bo);
184122944501Smrg}
184222944501Smrg
184322944501Smrg/**
184422944501Smrg * Return a conservative estimate for the amount of aperture required
184522944501Smrg * for a collection of buffers. This may double-count some buffers.
184622944501Smrg */
184722944501Smrgstatic unsigned int
184822944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
184922944501Smrg{
185022944501Smrg	int i;
185122944501Smrg	unsigned int total = 0;
185222944501Smrg
185322944501Smrg	for (i = 0; i < count; i++) {
185422944501Smrg		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
185522944501Smrg		if (bo_gem != NULL)
185622944501Smrg			total += bo_gem->reloc_tree_size;
185722944501Smrg	}
185822944501Smrg	return total;
185922944501Smrg}
186022944501Smrg
186122944501Smrg/**
186222944501Smrg * Return the amount of aperture needed for a collection of buffers.
186322944501Smrg * This avoids double counting any buffers, at the cost of looking
186422944501Smrg * at every buffer in the set.
186522944501Smrg */
186622944501Smrgstatic unsigned int
186722944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
186822944501Smrg{
186922944501Smrg	int i;
187022944501Smrg	unsigned int total = 0;
187122944501Smrg
187222944501Smrg	for (i = 0; i < count; i++) {
187322944501Smrg		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
187422944501Smrg		/* For the first buffer object in the array, we get an
187522944501Smrg		 * accurate count back for its reloc_tree size (since nothing
187622944501Smrg		 * had been flagged as being counted yet).  We can save that
187722944501Smrg		 * value out as a more conservative reloc_tree_size that
187822944501Smrg		 * avoids double-counting target buffers.  Since the first
187922944501Smrg		 * buffer happens to usually be the batch buffer in our
188022944501Smrg		 * callers, this can pull us back from doing the tree
188122944501Smrg		 * walk on every new batch emit.
188222944501Smrg		 */
188322944501Smrg		if (i == 0) {
188422944501Smrg			drm_intel_bo_gem *bo_gem =
188522944501Smrg			    (drm_intel_bo_gem *) bo_array[i];
188622944501Smrg			bo_gem->reloc_tree_size = total;
188722944501Smrg		}
188822944501Smrg	}
188922944501Smrg
189022944501Smrg	for (i = 0; i < count; i++)
189122944501Smrg		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
189222944501Smrg	return total;
189322944501Smrg}
189422944501Smrg
189522944501Smrg/**
189622944501Smrg * Return -1 if the batchbuffer should be flushed before attempting to
189722944501Smrg * emit rendering referencing the buffers pointed to by bo_array.
189822944501Smrg *
189922944501Smrg * This is required because if we try to emit a batchbuffer with relocations
190022944501Smrg * to a tree of buffers that won't simultaneously fit in the aperture,
190122944501Smrg * the rendering will return an error at a point where the software is not
190222944501Smrg * prepared to recover from it.
190322944501Smrg *
190422944501Smrg * However, we also want to emit the batchbuffer significantly before we reach
190522944501Smrg * the limit, as a series of batchbuffers each of which references buffers
190622944501Smrg * covering almost all of the aperture means that at each emit we end up
190722944501Smrg * waiting to evict a buffer from the last rendering, and we get synchronous
190822944501Smrg * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
190922944501Smrg * get better parallelism.
191022944501Smrg */
191122944501Smrgstatic int
191222944501Smrgdrm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
191322944501Smrg{
191422944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem =
191522944501Smrg	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
191622944501Smrg	unsigned int total = 0;
191722944501Smrg	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
191822944501Smrg	int total_fences;
191922944501Smrg
192022944501Smrg	/* Check for fence reg constraints if necessary */
192122944501Smrg	if (bufmgr_gem->available_fences) {
192222944501Smrg		total_fences = drm_intel_gem_total_fences(bo_array, count);
192322944501Smrg		if (total_fences > bufmgr_gem->available_fences)
192422944501Smrg			return -ENOSPC;
192522944501Smrg	}
192622944501Smrg
192722944501Smrg	total = drm_intel_gem_estimate_batch_space(bo_array, count);
192822944501Smrg
192922944501Smrg	if (total > threshold)
193022944501Smrg		total = drm_intel_gem_compute_batch_space(bo_array, count);
193122944501Smrg
193222944501Smrg	if (total > threshold) {
193322944501Smrg		DBG("check_space: overflowed available aperture, "
193422944501Smrg		    "%dkb vs %dkb\n",
193522944501Smrg		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
193622944501Smrg		return -ENOSPC;
193722944501Smrg	} else {
193822944501Smrg		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
193922944501Smrg		    (int)bufmgr_gem->gtt_size / 1024);
194022944501Smrg		return 0;
194122944501Smrg	}
194222944501Smrg}
194322944501Smrg
194422944501Smrg/*
194522944501Smrg * Disable buffer reuse for objects which are shared with the kernel
194622944501Smrg * as scanout buffers
194722944501Smrg */
194822944501Smrgstatic int
194922944501Smrgdrm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
195022944501Smrg{
195122944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
195222944501Smrg
195322944501Smrg	bo_gem->reusable = 0;
195422944501Smrg	return 0;
195522944501Smrg}
195622944501Smrg
1957aaba2545Smrgstatic int
1958aaba2545Smrgdrm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
1959aaba2545Smrg{
1960aaba2545Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1961aaba2545Smrg
1962aaba2545Smrg	return bo_gem->reusable;
1963aaba2545Smrg}
1964aaba2545Smrg
196522944501Smrgstatic int
196622944501Smrg_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
196722944501Smrg{
196822944501Smrg	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
196922944501Smrg	int i;
197022944501Smrg
197122944501Smrg	for (i = 0; i < bo_gem->reloc_count; i++) {
197222944501Smrg		if (bo_gem->reloc_target_info[i].bo == target_bo)
197322944501Smrg			return 1;
1974aaba2545Smrg		if (bo == bo_gem->reloc_target_info[i].bo)
1975aaba2545Smrg			continue;
197622944501Smrg		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
197722944501Smrg						target_bo))
197822944501Smrg			return 1;
197922944501Smrg	}
198022944501Smrg
198122944501Smrg	return 0;
198222944501Smrg}
198322944501Smrg
198422944501Smrg/** Return true if target_bo is referenced by bo's relocation tree. */
198522944501Smrgstatic int
198622944501Smrgdrm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
198722944501Smrg{
198822944501Smrg	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
198922944501Smrg
199022944501Smrg	if (bo == NULL || target_bo == NULL)
199122944501Smrg		return 0;
199222944501Smrg	if (target_bo_gem->used_as_reloc_target)
199322944501Smrg		return _drm_intel_gem_bo_references(bo, target_bo);
199422944501Smrg	return 0;
199522944501Smrg}
199622944501Smrg
1997aaba2545Smrgstatic void
1998aaba2545Smrgadd_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
1999aaba2545Smrg{
2000aaba2545Smrg	unsigned int i = bufmgr_gem->num_buckets;
2001aaba2545Smrg
2002aaba2545Smrg	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
2003aaba2545Smrg
2004aaba2545Smrg	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
2005aaba2545Smrg	bufmgr_gem->cache_bucket[i].size = size;
2006aaba2545Smrg	bufmgr_gem->num_buckets++;
2007aaba2545Smrg}
2008aaba2545Smrg
2009aaba2545Smrgstatic void
2010aaba2545Smrginit_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
2011aaba2545Smrg{
2012aaba2545Smrg	unsigned long size, cache_max_size = 64 * 1024 * 1024;
2013aaba2545Smrg
2014aaba2545Smrg	/* OK, so power of two buckets was too wasteful of memory.
2015aaba2545Smrg	 * Give 3 other sizes between each power of two, to hopefully
2016aaba2545Smrg	 * cover things accurately enough.  (The alternative is
2017aaba2545Smrg	 * probably to just go for exact matching of sizes, and assume
2018aaba2545Smrg	 * that for things like composited window resize the tiled
2019aaba2545Smrg	 * width/height alignment and rounding of sizes to pages will
2020aaba2545Smrg	 * get us useful cache hit rates anyway)
2021aaba2545Smrg	 */
2022aaba2545Smrg	add_bucket(bufmgr_gem, 4096);
2023aaba2545Smrg	add_bucket(bufmgr_gem, 4096 * 2);
2024aaba2545Smrg	add_bucket(bufmgr_gem, 4096 * 3);
2025aaba2545Smrg
2026aaba2545Smrg	/* Initialize the linked lists for BO reuse cache. */
2027aaba2545Smrg	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2028aaba2545Smrg		add_bucket(bufmgr_gem, size);
2029aaba2545Smrg
2030aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 1 / 4);
2031aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 2 / 4);
2032aaba2545Smrg		add_bucket(bufmgr_gem, size + size * 3 / 4);
2033aaba2545Smrg	}
2034aaba2545Smrg}
2035aaba2545Smrg
203622944501Smrg/**
203722944501Smrg * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
203822944501Smrg * and manage map buffer objections.
203922944501Smrg *
204022944501Smrg * \param fd File descriptor of the opened DRM device.
204122944501Smrg */
204222944501Smrgdrm_intel_bufmgr *
204322944501Smrgdrm_intel_bufmgr_gem_init(int fd, int batch_size)
204422944501Smrg{
204522944501Smrg	drm_intel_bufmgr_gem *bufmgr_gem;
204622944501Smrg	struct drm_i915_gem_get_aperture aperture;
204722944501Smrg	drm_i915_getparam_t gp;
2048aaba2545Smrg	int ret;
2049aaba2545Smrg	int exec2 = 0, has_bsd = 0;
205022944501Smrg
205122944501Smrg	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
205222944501Smrg	if (bufmgr_gem == NULL)
205322944501Smrg		return NULL;
205422944501Smrg
205522944501Smrg	bufmgr_gem->fd = fd;
205622944501Smrg
205722944501Smrg	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
205822944501Smrg		free(bufmgr_gem);
205922944501Smrg		return NULL;
206022944501Smrg	}
206122944501Smrg
206222944501Smrg	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
206322944501Smrg
206422944501Smrg	if (ret == 0)
206522944501Smrg		bufmgr_gem->gtt_size = aperture.aper_available_size;
206622944501Smrg	else {
206722944501Smrg		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
206822944501Smrg			strerror(errno));
206922944501Smrg		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
207022944501Smrg		fprintf(stderr, "Assuming %dkB available aperture size.\n"
207122944501Smrg			"May lead to reduced performance or incorrect "
207222944501Smrg			"rendering.\n",
207322944501Smrg			(int)bufmgr_gem->gtt_size / 1024);
207422944501Smrg	}
207522944501Smrg
207622944501Smrg	gp.param = I915_PARAM_CHIPSET_ID;
207722944501Smrg	gp.value = &bufmgr_gem->pci_device;
207822944501Smrg	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
207922944501Smrg	if (ret) {
208022944501Smrg		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
208122944501Smrg		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
208222944501Smrg	}
208322944501Smrg
208422944501Smrg	if (IS_GEN2(bufmgr_gem))
208522944501Smrg		bufmgr_gem->gen = 2;
208622944501Smrg	else if (IS_GEN3(bufmgr_gem))
208722944501Smrg		bufmgr_gem->gen = 3;
208822944501Smrg	else if (IS_GEN4(bufmgr_gem))
208922944501Smrg		bufmgr_gem->gen = 4;
209022944501Smrg	else
209122944501Smrg		bufmgr_gem->gen = 6;
209222944501Smrg
209322944501Smrg	gp.param = I915_PARAM_HAS_EXECBUF2;
209422944501Smrg	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
209522944501Smrg	if (!ret)
209622944501Smrg		exec2 = 1;
209722944501Smrg
2098aaba2545Smrg	gp.param = I915_PARAM_HAS_BSD;
2099aaba2545Smrg	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2100aaba2545Smrg	if (!ret)
2101aaba2545Smrg		has_bsd = 1;
2102aaba2545Smrg
210322944501Smrg	if (bufmgr_gem->gen < 4) {
210422944501Smrg		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
210522944501Smrg		gp.value = &bufmgr_gem->available_fences;
210622944501Smrg		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
210722944501Smrg		if (ret) {
210822944501Smrg			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
210922944501Smrg				errno);
211022944501Smrg			fprintf(stderr, "param: %d, val: %d\n", gp.param,
211122944501Smrg				*gp.value);
211222944501Smrg			bufmgr_gem->available_fences = 0;
211322944501Smrg		} else {
211422944501Smrg			/* XXX The kernel reports the total number of fences,
211522944501Smrg			 * including any that may be pinned.
211622944501Smrg			 *
211722944501Smrg			 * We presume that there will be at least one pinned
211822944501Smrg			 * fence for the scanout buffer, but there may be more
211922944501Smrg			 * than one scanout and the user may be manually
212022944501Smrg			 * pinning buffers. Let's move to execbuffer2 and
212122944501Smrg			 * thereby forget the insanity of using fences...
212222944501Smrg			 */
212322944501Smrg			bufmgr_gem->available_fences -= 2;
212422944501Smrg			if (bufmgr_gem->available_fences < 0)
212522944501Smrg				bufmgr_gem->available_fences = 0;
212622944501Smrg		}
212722944501Smrg	}
212822944501Smrg
212922944501Smrg	/* Let's go with one relocation per every 2 dwords (but round down a bit
213022944501Smrg	 * since a power of two will mean an extra page allocation for the reloc
213122944501Smrg	 * buffer).
213222944501Smrg	 *
213322944501Smrg	 * Every 4 was too few for the blender benchmark.
213422944501Smrg	 */
213522944501Smrg	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
213622944501Smrg
213722944501Smrg	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
213822944501Smrg	bufmgr_gem->bufmgr.bo_alloc_for_render =
213922944501Smrg	    drm_intel_gem_bo_alloc_for_render;
214022944501Smrg	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
214122944501Smrg	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
214222944501Smrg	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
214322944501Smrg	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
214422944501Smrg	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
214522944501Smrg	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
214622944501Smrg	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
214722944501Smrg	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
214822944501Smrg	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
214922944501Smrg	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
215022944501Smrg	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
215122944501Smrg	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
215222944501Smrg	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
215322944501Smrg	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
215422944501Smrg	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
215522944501Smrg	/* Use the new one if available */
2156aaba2545Smrg	if (exec2) {
215722944501Smrg		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
2158aaba2545Smrg		if (has_bsd)
2159aaba2545Smrg			bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
2160aaba2545Smrg	} else
216122944501Smrg		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
216222944501Smrg	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
216322944501Smrg	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
216422944501Smrg	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
216522944501Smrg	bufmgr_gem->bufmgr.debug = 0;
216622944501Smrg	bufmgr_gem->bufmgr.check_aperture_space =
216722944501Smrg	    drm_intel_gem_check_aperture_space;
216822944501Smrg	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
2169aaba2545Smrg	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
217022944501Smrg	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
217122944501Smrg	    drm_intel_gem_get_pipe_from_crtc_id;
217222944501Smrg	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
217322944501Smrg
2174aaba2545Smrg	init_cache_buckets(bufmgr_gem);
217522944501Smrg
217622944501Smrg	return &bufmgr_gem->bufmgr;
217722944501Smrg}
2178