intel_bufmgr_gem.c revision aaba2545
122944501Smrg/************************************************************************** 222944501Smrg * 322944501Smrg * Copyright � 2007 Red Hat Inc. 422944501Smrg * Copyright � 2007 Intel Corporation 522944501Smrg * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 622944501Smrg * All Rights Reserved. 722944501Smrg * 822944501Smrg * Permission is hereby granted, free of charge, to any person obtaining a 922944501Smrg * copy of this software and associated documentation files (the 1022944501Smrg * "Software"), to deal in the Software without restriction, including 1122944501Smrg * without limitation the rights to use, copy, modify, merge, publish, 1222944501Smrg * distribute, sub license, and/or sell copies of the Software, and to 1322944501Smrg * permit persons to whom the Software is furnished to do so, subject to 1422944501Smrg * the following conditions: 1522944501Smrg * 1622944501Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1722944501Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1822944501Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 1922944501Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 2022944501Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 2122944501Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2222944501Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2322944501Smrg * 2422944501Smrg * The above copyright notice and this permission notice (including the 2522944501Smrg * next paragraph) shall be included in all copies or substantial portions 2622944501Smrg * of the Software. 2722944501Smrg * 2822944501Smrg * 2922944501Smrg **************************************************************************/ 3022944501Smrg/* 3122944501Smrg * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 3222944501Smrg * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 3322944501Smrg * Eric Anholt <eric@anholt.net> 3422944501Smrg * Dave Airlie <airlied@linux.ie> 3522944501Smrg */ 3622944501Smrg 3722944501Smrg#ifdef HAVE_CONFIG_H 3822944501Smrg#include "config.h" 3922944501Smrg#endif 4022944501Smrg 4122944501Smrg#include <xf86drm.h> 4222944501Smrg#include <xf86atomic.h> 4322944501Smrg#include <fcntl.h> 4422944501Smrg#include <stdio.h> 4522944501Smrg#include <stdlib.h> 4622944501Smrg#include <string.h> 4722944501Smrg#include <unistd.h> 4822944501Smrg#include <assert.h> 4922944501Smrg#include <pthread.h> 502e6867f6Smrg#include <stddef.h> 5122944501Smrg#include <sys/ioctl.h> 5222944501Smrg#include <sys/mman.h> 5322944501Smrg#include <sys/stat.h> 5422944501Smrg#include <sys/types.h> 5522944501Smrg 5622944501Smrg#include "errno.h" 5722944501Smrg#include "libdrm_lists.h" 5822944501Smrg#include "intel_bufmgr.h" 5922944501Smrg#include "intel_bufmgr_priv.h" 6022944501Smrg#include "intel_chipset.h" 6122944501Smrg#include "string.h" 6222944501Smrg 6322944501Smrg#include "i915_drm.h" 6422944501Smrg 6522944501Smrg#define DBG(...) do { \ 6622944501Smrg if (bufmgr_gem->bufmgr.debug) \ 6722944501Smrg fprintf(stderr, __VA_ARGS__); \ 6822944501Smrg} while (0) 6922944501Smrg 70aaba2545Smrg#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 71aaba2545Smrg 7222944501Smrgtypedef struct _drm_intel_bo_gem drm_intel_bo_gem; 7322944501Smrg 7422944501Smrgstruct drm_intel_gem_bo_bucket { 7522944501Smrg drmMMListHead head; 7622944501Smrg unsigned long size; 7722944501Smrg}; 7822944501Smrg 7922944501Smrgtypedef struct _drm_intel_bufmgr_gem { 8022944501Smrg drm_intel_bufmgr bufmgr; 8122944501Smrg 8222944501Smrg int fd; 8322944501Smrg 8422944501Smrg int max_relocs; 8522944501Smrg 8622944501Smrg pthread_mutex_t lock; 8722944501Smrg 8822944501Smrg struct drm_i915_gem_exec_object *exec_objects; 8922944501Smrg struct drm_i915_gem_exec_object2 *exec2_objects; 9022944501Smrg drm_intel_bo **exec_bos; 9122944501Smrg int exec_size; 9222944501Smrg int exec_count; 9322944501Smrg 9422944501Smrg /** Array of lists of cached gem objects of power-of-two sizes */ 95aaba2545Smrg struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 96aaba2545Smrg int num_buckets; 9722944501Smrg 9822944501Smrg uint64_t gtt_size; 9922944501Smrg int available_fences; 10022944501Smrg int pci_device; 10122944501Smrg int gen; 10222944501Smrg char bo_reuse; 10322944501Smrg char fenced_relocs; 10422944501Smrg} drm_intel_bufmgr_gem; 10522944501Smrg 10622944501Smrg#define DRM_INTEL_RELOC_FENCE (1<<0) 10722944501Smrg 10822944501Smrgtypedef struct _drm_intel_reloc_target_info { 10922944501Smrg drm_intel_bo *bo; 11022944501Smrg int flags; 11122944501Smrg} drm_intel_reloc_target; 11222944501Smrg 11322944501Smrgstruct _drm_intel_bo_gem { 11422944501Smrg drm_intel_bo bo; 11522944501Smrg 11622944501Smrg atomic_t refcount; 11722944501Smrg uint32_t gem_handle; 11822944501Smrg const char *name; 11922944501Smrg 12022944501Smrg /** 12122944501Smrg * Kenel-assigned global name for this object 12222944501Smrg */ 12322944501Smrg unsigned int global_name; 12422944501Smrg 12522944501Smrg /** 12622944501Smrg * Index of the buffer within the validation list while preparing a 12722944501Smrg * batchbuffer execution. 12822944501Smrg */ 12922944501Smrg int validate_index; 13022944501Smrg 13122944501Smrg /** 13222944501Smrg * Current tiling mode 13322944501Smrg */ 13422944501Smrg uint32_t tiling_mode; 13522944501Smrg uint32_t swizzle_mode; 13622944501Smrg 13722944501Smrg time_t free_time; 13822944501Smrg 13922944501Smrg /** Array passed to the DRM containing relocation information. */ 14022944501Smrg struct drm_i915_gem_relocation_entry *relocs; 14122944501Smrg /** 14222944501Smrg * Array of info structs corresponding to relocs[i].target_handle etc 14322944501Smrg */ 14422944501Smrg drm_intel_reloc_target *reloc_target_info; 14522944501Smrg /** Number of entries in relocs */ 14622944501Smrg int reloc_count; 14722944501Smrg /** Mapped address for the buffer, saved across map/unmap cycles */ 14822944501Smrg void *mem_virtual; 14922944501Smrg /** GTT virtual address for the buffer, saved across map/unmap cycles */ 15022944501Smrg void *gtt_virtual; 15122944501Smrg 15222944501Smrg /** BO cache list */ 15322944501Smrg drmMMListHead head; 15422944501Smrg 15522944501Smrg /** 15622944501Smrg * Boolean of whether this BO and its children have been included in 15722944501Smrg * the current drm_intel_bufmgr_check_aperture_space() total. 15822944501Smrg */ 15922944501Smrg char included_in_check_aperture; 16022944501Smrg 16122944501Smrg /** 16222944501Smrg * Boolean of whether this buffer has been used as a relocation 16322944501Smrg * target and had its size accounted for, and thus can't have any 16422944501Smrg * further relocations added to it. 16522944501Smrg */ 16622944501Smrg char used_as_reloc_target; 16722944501Smrg 16822944501Smrg /** 16922944501Smrg * Boolean of whether we have encountered an error whilst building the relocation tree. 17022944501Smrg */ 17122944501Smrg char has_error; 17222944501Smrg 17322944501Smrg /** 17422944501Smrg * Boolean of whether this buffer can be re-used 17522944501Smrg */ 17622944501Smrg char reusable; 17722944501Smrg 17822944501Smrg /** 17922944501Smrg * Size in bytes of this buffer and its relocation descendents. 18022944501Smrg * 18122944501Smrg * Used to avoid costly tree walking in 18222944501Smrg * drm_intel_bufmgr_check_aperture in the common case. 18322944501Smrg */ 18422944501Smrg int reloc_tree_size; 18522944501Smrg 18622944501Smrg /** 18722944501Smrg * Number of potential fence registers required by this buffer and its 18822944501Smrg * relocations. 18922944501Smrg */ 19022944501Smrg int reloc_tree_fences; 19122944501Smrg}; 19222944501Smrg 19322944501Smrgstatic unsigned int 19422944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 19522944501Smrg 19622944501Smrgstatic unsigned int 19722944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 19822944501Smrg 19922944501Smrgstatic int 20022944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 20122944501Smrg uint32_t * swizzle_mode); 20222944501Smrg 20322944501Smrgstatic int 20422944501Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 20522944501Smrg uint32_t stride); 20622944501Smrg 20722944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 20822944501Smrg time_t time); 20922944501Smrg 21022944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 21122944501Smrg 21222944501Smrgstatic void drm_intel_gem_bo_free(drm_intel_bo *bo); 21322944501Smrg 21422944501Smrgstatic unsigned long 21522944501Smrgdrm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 21622944501Smrg uint32_t *tiling_mode) 21722944501Smrg{ 21822944501Smrg unsigned long min_size, max_size; 21922944501Smrg unsigned long i; 22022944501Smrg 22122944501Smrg if (*tiling_mode == I915_TILING_NONE) 22222944501Smrg return size; 22322944501Smrg 22422944501Smrg /* 965+ just need multiples of page size for tiling */ 22522944501Smrg if (bufmgr_gem->gen >= 4) 22622944501Smrg return ROUND_UP_TO(size, 4096); 22722944501Smrg 22822944501Smrg /* Older chips need powers of two, of at least 512k or 1M */ 22922944501Smrg if (bufmgr_gem->gen == 3) { 23022944501Smrg min_size = 1024*1024; 23122944501Smrg max_size = 128*1024*1024; 23222944501Smrg } else { 23322944501Smrg min_size = 512*1024; 23422944501Smrg max_size = 64*1024*1024; 23522944501Smrg } 23622944501Smrg 23722944501Smrg if (size > max_size) { 23822944501Smrg *tiling_mode = I915_TILING_NONE; 23922944501Smrg return size; 24022944501Smrg } 24122944501Smrg 24222944501Smrg for (i = min_size; i < size; i <<= 1) 24322944501Smrg ; 24422944501Smrg 24522944501Smrg return i; 24622944501Smrg} 24722944501Smrg 24822944501Smrg/* 24922944501Smrg * Round a given pitch up to the minimum required for X tiling on a 25022944501Smrg * given chip. We use 512 as the minimum to allow for a later tiling 25122944501Smrg * change. 25222944501Smrg */ 25322944501Smrgstatic unsigned long 25422944501Smrgdrm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 25522944501Smrg unsigned long pitch, uint32_t tiling_mode) 25622944501Smrg{ 25722944501Smrg unsigned long tile_width; 25822944501Smrg unsigned long i; 25922944501Smrg 26022944501Smrg /* If untiled, then just align it so that we can do rendering 26122944501Smrg * to it with the 3D engine. 26222944501Smrg */ 26322944501Smrg if (tiling_mode == I915_TILING_NONE) 26422944501Smrg return ALIGN(pitch, 64); 26522944501Smrg 26622944501Smrg if (tiling_mode == I915_TILING_X) 26722944501Smrg tile_width = 512; 26822944501Smrg else 26922944501Smrg tile_width = 128; 27022944501Smrg 27122944501Smrg /* 965 is flexible */ 27222944501Smrg if (bufmgr_gem->gen >= 4) 27322944501Smrg return ROUND_UP_TO(pitch, tile_width); 27422944501Smrg 27522944501Smrg /* Pre-965 needs power of two tile width */ 27622944501Smrg for (i = tile_width; i < pitch; i <<= 1) 27722944501Smrg ; 27822944501Smrg 27922944501Smrg return i; 28022944501Smrg} 28122944501Smrg 28222944501Smrgstatic struct drm_intel_gem_bo_bucket * 28322944501Smrgdrm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 28422944501Smrg unsigned long size) 28522944501Smrg{ 28622944501Smrg int i; 28722944501Smrg 288aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 28922944501Smrg struct drm_intel_gem_bo_bucket *bucket = 29022944501Smrg &bufmgr_gem->cache_bucket[i]; 29122944501Smrg if (bucket->size >= size) { 29222944501Smrg return bucket; 29322944501Smrg } 29422944501Smrg } 29522944501Smrg 29622944501Smrg return NULL; 29722944501Smrg} 29822944501Smrg 29922944501Smrgstatic void 30022944501Smrgdrm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 30122944501Smrg{ 30222944501Smrg int i, j; 30322944501Smrg 30422944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 30522944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 30622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 30722944501Smrg 30822944501Smrg if (bo_gem->relocs == NULL) { 30922944501Smrg DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 31022944501Smrg bo_gem->name); 31122944501Smrg continue; 31222944501Smrg } 31322944501Smrg 31422944501Smrg for (j = 0; j < bo_gem->reloc_count; j++) { 31522944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 31622944501Smrg drm_intel_bo_gem *target_gem = 31722944501Smrg (drm_intel_bo_gem *) target_bo; 31822944501Smrg 31922944501Smrg DBG("%2d: %d (%s)@0x%08llx -> " 32022944501Smrg "%d (%s)@0x%08lx + 0x%08x\n", 32122944501Smrg i, 32222944501Smrg bo_gem->gem_handle, bo_gem->name, 32322944501Smrg (unsigned long long)bo_gem->relocs[j].offset, 32422944501Smrg target_gem->gem_handle, 32522944501Smrg target_gem->name, 32622944501Smrg target_bo->offset, 32722944501Smrg bo_gem->relocs[j].delta); 32822944501Smrg } 32922944501Smrg } 33022944501Smrg} 33122944501Smrg 33222944501Smrgstatic inline void 33322944501Smrgdrm_intel_gem_bo_reference(drm_intel_bo *bo) 33422944501Smrg{ 33522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 33622944501Smrg 33722944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 33822944501Smrg atomic_inc(&bo_gem->refcount); 33922944501Smrg} 34022944501Smrg 34122944501Smrg/** 34222944501Smrg * Adds the given buffer to the list of buffers to be validated (moved into the 34322944501Smrg * appropriate memory type) with the next batch submission. 34422944501Smrg * 34522944501Smrg * If a buffer is validated multiple times in a batch submission, it ends up 34622944501Smrg * with the intersection of the memory type flags and the union of the 34722944501Smrg * access flags. 34822944501Smrg */ 34922944501Smrgstatic void 35022944501Smrgdrm_intel_add_validate_buffer(drm_intel_bo *bo) 35122944501Smrg{ 35222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 35322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 35422944501Smrg int index; 35522944501Smrg 35622944501Smrg if (bo_gem->validate_index != -1) 35722944501Smrg return; 35822944501Smrg 35922944501Smrg /* Extend the array of validation entries as necessary. */ 36022944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 36122944501Smrg int new_size = bufmgr_gem->exec_size * 2; 36222944501Smrg 36322944501Smrg if (new_size == 0) 36422944501Smrg new_size = 5; 36522944501Smrg 36622944501Smrg bufmgr_gem->exec_objects = 36722944501Smrg realloc(bufmgr_gem->exec_objects, 36822944501Smrg sizeof(*bufmgr_gem->exec_objects) * new_size); 36922944501Smrg bufmgr_gem->exec_bos = 37022944501Smrg realloc(bufmgr_gem->exec_bos, 37122944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 37222944501Smrg bufmgr_gem->exec_size = new_size; 37322944501Smrg } 37422944501Smrg 37522944501Smrg index = bufmgr_gem->exec_count; 37622944501Smrg bo_gem->validate_index = index; 37722944501Smrg /* Fill in array entry */ 37822944501Smrg bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 37922944501Smrg bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 38022944501Smrg bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 38122944501Smrg bufmgr_gem->exec_objects[index].alignment = 0; 38222944501Smrg bufmgr_gem->exec_objects[index].offset = 0; 38322944501Smrg bufmgr_gem->exec_bos[index] = bo; 38422944501Smrg bufmgr_gem->exec_count++; 38522944501Smrg} 38622944501Smrg 38722944501Smrgstatic void 38822944501Smrgdrm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 38922944501Smrg{ 39022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 39122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 39222944501Smrg int index; 39322944501Smrg 39422944501Smrg if (bo_gem->validate_index != -1) { 39522944501Smrg if (need_fence) 39622944501Smrg bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 39722944501Smrg EXEC_OBJECT_NEEDS_FENCE; 39822944501Smrg return; 39922944501Smrg } 40022944501Smrg 40122944501Smrg /* Extend the array of validation entries as necessary. */ 40222944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 40322944501Smrg int new_size = bufmgr_gem->exec_size * 2; 40422944501Smrg 40522944501Smrg if (new_size == 0) 40622944501Smrg new_size = 5; 40722944501Smrg 40822944501Smrg bufmgr_gem->exec2_objects = 40922944501Smrg realloc(bufmgr_gem->exec2_objects, 41022944501Smrg sizeof(*bufmgr_gem->exec2_objects) * new_size); 41122944501Smrg bufmgr_gem->exec_bos = 41222944501Smrg realloc(bufmgr_gem->exec_bos, 41322944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 41422944501Smrg bufmgr_gem->exec_size = new_size; 41522944501Smrg } 41622944501Smrg 41722944501Smrg index = bufmgr_gem->exec_count; 41822944501Smrg bo_gem->validate_index = index; 41922944501Smrg /* Fill in array entry */ 42022944501Smrg bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 42122944501Smrg bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 42222944501Smrg bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 42322944501Smrg bufmgr_gem->exec2_objects[index].alignment = 0; 42422944501Smrg bufmgr_gem->exec2_objects[index].offset = 0; 42522944501Smrg bufmgr_gem->exec_bos[index] = bo; 42622944501Smrg bufmgr_gem->exec2_objects[index].flags = 0; 42722944501Smrg bufmgr_gem->exec2_objects[index].rsvd1 = 0; 42822944501Smrg bufmgr_gem->exec2_objects[index].rsvd2 = 0; 42922944501Smrg if (need_fence) { 43022944501Smrg bufmgr_gem->exec2_objects[index].flags |= 43122944501Smrg EXEC_OBJECT_NEEDS_FENCE; 43222944501Smrg } 43322944501Smrg bufmgr_gem->exec_count++; 43422944501Smrg} 43522944501Smrg 43622944501Smrg#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 43722944501Smrg sizeof(uint32_t)) 43822944501Smrg 43922944501Smrgstatic void 44022944501Smrgdrm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 44122944501Smrg drm_intel_bo_gem *bo_gem) 44222944501Smrg{ 44322944501Smrg int size; 44422944501Smrg 44522944501Smrg assert(!bo_gem->used_as_reloc_target); 44622944501Smrg 44722944501Smrg /* The older chipsets are far-less flexible in terms of tiling, 44822944501Smrg * and require tiled buffer to be size aligned in the aperture. 44922944501Smrg * This means that in the worst possible case we will need a hole 45022944501Smrg * twice as large as the object in order for it to fit into the 45122944501Smrg * aperture. Optimal packing is for wimps. 45222944501Smrg */ 45322944501Smrg size = bo_gem->bo.size; 45422944501Smrg if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) 45522944501Smrg size *= 2; 45622944501Smrg 45722944501Smrg bo_gem->reloc_tree_size = size; 45822944501Smrg} 45922944501Smrg 46022944501Smrgstatic int 46122944501Smrgdrm_intel_setup_reloc_list(drm_intel_bo *bo) 46222944501Smrg{ 46322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 46422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 46522944501Smrg unsigned int max_relocs = bufmgr_gem->max_relocs; 46622944501Smrg 46722944501Smrg if (bo->size / 4 < max_relocs) 46822944501Smrg max_relocs = bo->size / 4; 46922944501Smrg 47022944501Smrg bo_gem->relocs = malloc(max_relocs * 47122944501Smrg sizeof(struct drm_i915_gem_relocation_entry)); 47222944501Smrg bo_gem->reloc_target_info = malloc(max_relocs * 473aaba2545Smrg sizeof(drm_intel_reloc_target)); 47422944501Smrg if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 47522944501Smrg bo_gem->has_error = 1; 47622944501Smrg 47722944501Smrg free (bo_gem->relocs); 47822944501Smrg bo_gem->relocs = NULL; 47922944501Smrg 48022944501Smrg free (bo_gem->reloc_target_info); 48122944501Smrg bo_gem->reloc_target_info = NULL; 48222944501Smrg 48322944501Smrg return 1; 48422944501Smrg } 48522944501Smrg 48622944501Smrg return 0; 48722944501Smrg} 48822944501Smrg 48922944501Smrgstatic int 49022944501Smrgdrm_intel_gem_bo_busy(drm_intel_bo *bo) 49122944501Smrg{ 49222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 49322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 49422944501Smrg struct drm_i915_gem_busy busy; 49522944501Smrg int ret; 49622944501Smrg 49722944501Smrg memset(&busy, 0, sizeof(busy)); 49822944501Smrg busy.handle = bo_gem->gem_handle; 49922944501Smrg 50022944501Smrg do { 50122944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 50222944501Smrg } while (ret == -1 && errno == EINTR); 50322944501Smrg 50422944501Smrg return (ret == 0 && busy.busy); 50522944501Smrg} 50622944501Smrg 50722944501Smrgstatic int 50822944501Smrgdrm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 50922944501Smrg drm_intel_bo_gem *bo_gem, int state) 51022944501Smrg{ 51122944501Smrg struct drm_i915_gem_madvise madv; 51222944501Smrg 51322944501Smrg madv.handle = bo_gem->gem_handle; 51422944501Smrg madv.madv = state; 51522944501Smrg madv.retained = 1; 51622944501Smrg ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 51722944501Smrg 51822944501Smrg return madv.retained; 51922944501Smrg} 52022944501Smrg 52122944501Smrgstatic int 52222944501Smrgdrm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 52322944501Smrg{ 52422944501Smrg return drm_intel_gem_bo_madvise_internal 52522944501Smrg ((drm_intel_bufmgr_gem *) bo->bufmgr, 52622944501Smrg (drm_intel_bo_gem *) bo, 52722944501Smrg madv); 52822944501Smrg} 52922944501Smrg 53022944501Smrg/* drop the oldest entries that have been purged by the kernel */ 53122944501Smrgstatic void 53222944501Smrgdrm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 53322944501Smrg struct drm_intel_gem_bo_bucket *bucket) 53422944501Smrg{ 53522944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 53622944501Smrg drm_intel_bo_gem *bo_gem; 53722944501Smrg 53822944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 53922944501Smrg bucket->head.next, head); 54022944501Smrg if (drm_intel_gem_bo_madvise_internal 54122944501Smrg (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 54222944501Smrg break; 54322944501Smrg 54422944501Smrg DRMLISTDEL(&bo_gem->head); 54522944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 54622944501Smrg } 54722944501Smrg} 54822944501Smrg 54922944501Smrgstatic drm_intel_bo * 55022944501Smrgdrm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 55122944501Smrg const char *name, 55222944501Smrg unsigned long size, 55322944501Smrg unsigned long flags) 55422944501Smrg{ 55522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 55622944501Smrg drm_intel_bo_gem *bo_gem; 55722944501Smrg unsigned int page_size = getpagesize(); 55822944501Smrg int ret; 55922944501Smrg struct drm_intel_gem_bo_bucket *bucket; 56022944501Smrg int alloc_from_cache; 56122944501Smrg unsigned long bo_size; 56222944501Smrg int for_render = 0; 56322944501Smrg 56422944501Smrg if (flags & BO_ALLOC_FOR_RENDER) 56522944501Smrg for_render = 1; 56622944501Smrg 56722944501Smrg /* Round the allocated size up to a power of two number of pages. */ 56822944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 56922944501Smrg 57022944501Smrg /* If we don't have caching at this size, don't actually round the 57122944501Smrg * allocation up. 57222944501Smrg */ 57322944501Smrg if (bucket == NULL) { 57422944501Smrg bo_size = size; 57522944501Smrg if (bo_size < page_size) 57622944501Smrg bo_size = page_size; 57722944501Smrg } else { 57822944501Smrg bo_size = bucket->size; 57922944501Smrg } 58022944501Smrg 58122944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 58222944501Smrg /* Get a buffer out of the cache if available */ 58322944501Smrgretry: 58422944501Smrg alloc_from_cache = 0; 58522944501Smrg if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 58622944501Smrg if (for_render) { 58722944501Smrg /* Allocate new render-target BOs from the tail (MRU) 58822944501Smrg * of the list, as it will likely be hot in the GPU 58922944501Smrg * cache and in the aperture for us. 59022944501Smrg */ 59122944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 59222944501Smrg bucket->head.prev, head); 59322944501Smrg DRMLISTDEL(&bo_gem->head); 59422944501Smrg alloc_from_cache = 1; 59522944501Smrg } else { 59622944501Smrg /* For non-render-target BOs (where we're probably 59722944501Smrg * going to map it first thing in order to fill it 59822944501Smrg * with data), check if the last BO in the cache is 59922944501Smrg * unbusy, and only reuse in that case. Otherwise, 60022944501Smrg * allocating a new buffer is probably faster than 60122944501Smrg * waiting for the GPU to finish. 60222944501Smrg */ 60322944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 60422944501Smrg bucket->head.next, head); 60522944501Smrg if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 60622944501Smrg alloc_from_cache = 1; 60722944501Smrg DRMLISTDEL(&bo_gem->head); 60822944501Smrg } 60922944501Smrg } 61022944501Smrg 61122944501Smrg if (alloc_from_cache) { 61222944501Smrg if (!drm_intel_gem_bo_madvise_internal 61322944501Smrg (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 61422944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 61522944501Smrg drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 61622944501Smrg bucket); 61722944501Smrg goto retry; 61822944501Smrg } 61922944501Smrg } 62022944501Smrg } 62122944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 62222944501Smrg 62322944501Smrg if (!alloc_from_cache) { 62422944501Smrg struct drm_i915_gem_create create; 62522944501Smrg 62622944501Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 62722944501Smrg if (!bo_gem) 62822944501Smrg return NULL; 62922944501Smrg 63022944501Smrg bo_gem->bo.size = bo_size; 63122944501Smrg memset(&create, 0, sizeof(create)); 63222944501Smrg create.size = bo_size; 63322944501Smrg 63422944501Smrg do { 63522944501Smrg ret = ioctl(bufmgr_gem->fd, 63622944501Smrg DRM_IOCTL_I915_GEM_CREATE, 63722944501Smrg &create); 63822944501Smrg } while (ret == -1 && errno == EINTR); 63922944501Smrg bo_gem->gem_handle = create.handle; 64022944501Smrg bo_gem->bo.handle = bo_gem->gem_handle; 64122944501Smrg if (ret != 0) { 64222944501Smrg free(bo_gem); 64322944501Smrg return NULL; 64422944501Smrg } 64522944501Smrg bo_gem->bo.bufmgr = bufmgr; 64622944501Smrg } 64722944501Smrg 64822944501Smrg bo_gem->name = name; 64922944501Smrg atomic_set(&bo_gem->refcount, 1); 65022944501Smrg bo_gem->validate_index = -1; 65122944501Smrg bo_gem->reloc_tree_fences = 0; 65222944501Smrg bo_gem->used_as_reloc_target = 0; 65322944501Smrg bo_gem->has_error = 0; 65422944501Smrg bo_gem->tiling_mode = I915_TILING_NONE; 65522944501Smrg bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 65622944501Smrg bo_gem->reusable = 1; 65722944501Smrg 65822944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 65922944501Smrg 66022944501Smrg DBG("bo_create: buf %d (%s) %ldb\n", 66122944501Smrg bo_gem->gem_handle, bo_gem->name, size); 66222944501Smrg 66322944501Smrg return &bo_gem->bo; 66422944501Smrg} 66522944501Smrg 66622944501Smrgstatic drm_intel_bo * 66722944501Smrgdrm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 66822944501Smrg const char *name, 66922944501Smrg unsigned long size, 67022944501Smrg unsigned int alignment) 67122944501Smrg{ 67222944501Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 67322944501Smrg BO_ALLOC_FOR_RENDER); 67422944501Smrg} 67522944501Smrg 67622944501Smrgstatic drm_intel_bo * 67722944501Smrgdrm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 67822944501Smrg const char *name, 67922944501Smrg unsigned long size, 68022944501Smrg unsigned int alignment) 68122944501Smrg{ 68222944501Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0); 68322944501Smrg} 68422944501Smrg 68522944501Smrgstatic drm_intel_bo * 68622944501Smrgdrm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 68722944501Smrg int x, int y, int cpp, uint32_t *tiling_mode, 68822944501Smrg unsigned long *pitch, unsigned long flags) 68922944501Smrg{ 69022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 69122944501Smrg drm_intel_bo *bo; 692aaba2545Smrg unsigned long size, stride; 693aaba2545Smrg uint32_t tiling; 69422944501Smrg int ret; 69522944501Smrg 696aaba2545Smrg do { 697aaba2545Smrg unsigned long aligned_y; 698aaba2545Smrg 699aaba2545Smrg tiling = *tiling_mode; 700aaba2545Smrg 701aaba2545Smrg /* If we're tiled, our allocations are in 8 or 32-row blocks, 702aaba2545Smrg * so failure to align our height means that we won't allocate 703aaba2545Smrg * enough pages. 704aaba2545Smrg * 705aaba2545Smrg * If we're untiled, we still have to align to 2 rows high 706aaba2545Smrg * because the data port accesses 2x2 blocks even if the 707aaba2545Smrg * bottom row isn't to be rendered, so failure to align means 708aaba2545Smrg * we could walk off the end of the GTT and fault. This is 709aaba2545Smrg * documented on 965, and may be the case on older chipsets 710aaba2545Smrg * too so we try to be careful. 711aaba2545Smrg */ 712aaba2545Smrg aligned_y = y; 713aaba2545Smrg if (tiling == I915_TILING_NONE) 714aaba2545Smrg aligned_y = ALIGN(y, 2); 715aaba2545Smrg else if (tiling == I915_TILING_X) 716aaba2545Smrg aligned_y = ALIGN(y, 8); 717aaba2545Smrg else if (tiling == I915_TILING_Y) 718aaba2545Smrg aligned_y = ALIGN(y, 32); 719aaba2545Smrg 720aaba2545Smrg stride = x * cpp; 721aaba2545Smrg stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling); 722aaba2545Smrg size = stride * aligned_y; 723aaba2545Smrg size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 724aaba2545Smrg } while (*tiling_mode != tiling); 72522944501Smrg 72622944501Smrg bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags); 72722944501Smrg if (!bo) 72822944501Smrg return NULL; 72922944501Smrg 73022944501Smrg ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride); 73122944501Smrg if (ret != 0) { 73222944501Smrg drm_intel_gem_bo_unreference(bo); 73322944501Smrg return NULL; 73422944501Smrg } 73522944501Smrg 73622944501Smrg *pitch = stride; 73722944501Smrg 73822944501Smrg return bo; 73922944501Smrg} 74022944501Smrg 74122944501Smrg/** 74222944501Smrg * Returns a drm_intel_bo wrapping the given buffer object handle. 74322944501Smrg * 74422944501Smrg * This can be used when one application needs to pass a buffer object 74522944501Smrg * to another. 74622944501Smrg */ 74722944501Smrgdrm_intel_bo * 74822944501Smrgdrm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 74922944501Smrg const char *name, 75022944501Smrg unsigned int handle) 75122944501Smrg{ 75222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 75322944501Smrg drm_intel_bo_gem *bo_gem; 75422944501Smrg int ret; 75522944501Smrg struct drm_gem_open open_arg; 75622944501Smrg struct drm_i915_gem_get_tiling get_tiling; 75722944501Smrg 75822944501Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 75922944501Smrg if (!bo_gem) 76022944501Smrg return NULL; 76122944501Smrg 76222944501Smrg memset(&open_arg, 0, sizeof(open_arg)); 76322944501Smrg open_arg.name = handle; 76422944501Smrg do { 76522944501Smrg ret = ioctl(bufmgr_gem->fd, 76622944501Smrg DRM_IOCTL_GEM_OPEN, 76722944501Smrg &open_arg); 76822944501Smrg } while (ret == -1 && errno == EINTR); 76922944501Smrg if (ret != 0) { 77022944501Smrg fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", 77122944501Smrg name, handle, strerror(errno)); 77222944501Smrg free(bo_gem); 77322944501Smrg return NULL; 77422944501Smrg } 77522944501Smrg bo_gem->bo.size = open_arg.size; 77622944501Smrg bo_gem->bo.offset = 0; 77722944501Smrg bo_gem->bo.virtual = NULL; 77822944501Smrg bo_gem->bo.bufmgr = bufmgr; 77922944501Smrg bo_gem->name = name; 78022944501Smrg atomic_set(&bo_gem->refcount, 1); 78122944501Smrg bo_gem->validate_index = -1; 78222944501Smrg bo_gem->gem_handle = open_arg.handle; 78322944501Smrg bo_gem->global_name = handle; 78422944501Smrg bo_gem->reusable = 0; 78522944501Smrg 78622944501Smrg memset(&get_tiling, 0, sizeof(get_tiling)); 78722944501Smrg get_tiling.handle = bo_gem->gem_handle; 78822944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 78922944501Smrg if (ret != 0) { 79022944501Smrg drm_intel_gem_bo_unreference(&bo_gem->bo); 79122944501Smrg return NULL; 79222944501Smrg } 79322944501Smrg bo_gem->tiling_mode = get_tiling.tiling_mode; 79422944501Smrg bo_gem->swizzle_mode = get_tiling.swizzle_mode; 79522944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 79622944501Smrg 79722944501Smrg DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 79822944501Smrg 79922944501Smrg return &bo_gem->bo; 80022944501Smrg} 80122944501Smrg 80222944501Smrgstatic void 80322944501Smrgdrm_intel_gem_bo_free(drm_intel_bo *bo) 80422944501Smrg{ 80522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 80622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 80722944501Smrg struct drm_gem_close close; 80822944501Smrg int ret; 80922944501Smrg 81022944501Smrg if (bo_gem->mem_virtual) 81122944501Smrg munmap(bo_gem->mem_virtual, bo_gem->bo.size); 81222944501Smrg if (bo_gem->gtt_virtual) 81322944501Smrg munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 81422944501Smrg 81522944501Smrg /* Close this object */ 81622944501Smrg memset(&close, 0, sizeof(close)); 81722944501Smrg close.handle = bo_gem->gem_handle; 81822944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 81922944501Smrg if (ret != 0) { 82022944501Smrg fprintf(stderr, 82122944501Smrg "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 82222944501Smrg bo_gem->gem_handle, bo_gem->name, strerror(errno)); 82322944501Smrg } 82422944501Smrg free(bo); 82522944501Smrg} 82622944501Smrg 82722944501Smrg/** Frees all cached buffers significantly older than @time. */ 82822944501Smrgstatic void 82922944501Smrgdrm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 83022944501Smrg{ 83122944501Smrg int i; 83222944501Smrg 833aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 83422944501Smrg struct drm_intel_gem_bo_bucket *bucket = 83522944501Smrg &bufmgr_gem->cache_bucket[i]; 83622944501Smrg 83722944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 83822944501Smrg drm_intel_bo_gem *bo_gem; 83922944501Smrg 84022944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 84122944501Smrg bucket->head.next, head); 84222944501Smrg if (time - bo_gem->free_time <= 1) 84322944501Smrg break; 84422944501Smrg 84522944501Smrg DRMLISTDEL(&bo_gem->head); 84622944501Smrg 84722944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 84822944501Smrg } 84922944501Smrg } 85022944501Smrg} 85122944501Smrg 85222944501Smrgstatic void 85322944501Smrgdrm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 85422944501Smrg{ 85522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 85622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 85722944501Smrg struct drm_intel_gem_bo_bucket *bucket; 85822944501Smrg uint32_t tiling_mode; 85922944501Smrg int i; 86022944501Smrg 86122944501Smrg /* Unreference all the target buffers */ 86222944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 863aaba2545Smrg if (bo_gem->reloc_target_info[i].bo != bo) { 864aaba2545Smrg drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 865aaba2545Smrg reloc_target_info[i].bo, 866aaba2545Smrg time); 867aaba2545Smrg } 86822944501Smrg } 86922944501Smrg bo_gem->reloc_count = 0; 87022944501Smrg bo_gem->used_as_reloc_target = 0; 87122944501Smrg 87222944501Smrg DBG("bo_unreference final: %d (%s)\n", 87322944501Smrg bo_gem->gem_handle, bo_gem->name); 87422944501Smrg 87522944501Smrg /* release memory associated with this object */ 87622944501Smrg if (bo_gem->reloc_target_info) { 87722944501Smrg free(bo_gem->reloc_target_info); 87822944501Smrg bo_gem->reloc_target_info = NULL; 87922944501Smrg } 88022944501Smrg if (bo_gem->relocs) { 88122944501Smrg free(bo_gem->relocs); 88222944501Smrg bo_gem->relocs = NULL; 88322944501Smrg } 88422944501Smrg 88522944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 88622944501Smrg /* Put the buffer into our internal cache for reuse if we can. */ 88722944501Smrg tiling_mode = I915_TILING_NONE; 88822944501Smrg if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 88922944501Smrg drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 && 89022944501Smrg drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 89122944501Smrg I915_MADV_DONTNEED)) { 89222944501Smrg bo_gem->free_time = time; 89322944501Smrg 89422944501Smrg bo_gem->name = NULL; 89522944501Smrg bo_gem->validate_index = -1; 89622944501Smrg 89722944501Smrg DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 89822944501Smrg 89922944501Smrg drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time); 90022944501Smrg } else { 90122944501Smrg drm_intel_gem_bo_free(bo); 90222944501Smrg } 90322944501Smrg} 90422944501Smrg 90522944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 90622944501Smrg time_t time) 90722944501Smrg{ 90822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 90922944501Smrg 91022944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 91122944501Smrg if (atomic_dec_and_test(&bo_gem->refcount)) 91222944501Smrg drm_intel_gem_bo_unreference_final(bo, time); 91322944501Smrg} 91422944501Smrg 91522944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 91622944501Smrg{ 91722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 91822944501Smrg 91922944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 92022944501Smrg if (atomic_dec_and_test(&bo_gem->refcount)) { 92122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 92222944501Smrg (drm_intel_bufmgr_gem *) bo->bufmgr; 92322944501Smrg struct timespec time; 92422944501Smrg 92522944501Smrg clock_gettime(CLOCK_MONOTONIC, &time); 92622944501Smrg 92722944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 92822944501Smrg drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 92922944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 93022944501Smrg } 93122944501Smrg} 93222944501Smrg 93322944501Smrgstatic int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 93422944501Smrg{ 93522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 93622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 93722944501Smrg struct drm_i915_gem_set_domain set_domain; 93822944501Smrg int ret; 93922944501Smrg 94022944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 94122944501Smrg 94222944501Smrg /* Allow recursive mapping. Mesa may recursively map buffers with 94322944501Smrg * nested display loops. 94422944501Smrg */ 94522944501Smrg if (!bo_gem->mem_virtual) { 94622944501Smrg struct drm_i915_gem_mmap mmap_arg; 94722944501Smrg 94822944501Smrg DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 94922944501Smrg 95022944501Smrg memset(&mmap_arg, 0, sizeof(mmap_arg)); 95122944501Smrg mmap_arg.handle = bo_gem->gem_handle; 95222944501Smrg mmap_arg.offset = 0; 95322944501Smrg mmap_arg.size = bo->size; 95422944501Smrg do { 95522944501Smrg ret = ioctl(bufmgr_gem->fd, 95622944501Smrg DRM_IOCTL_I915_GEM_MMAP, 95722944501Smrg &mmap_arg); 95822944501Smrg } while (ret == -1 && errno == EINTR); 95922944501Smrg if (ret != 0) { 96022944501Smrg ret = -errno; 96122944501Smrg fprintf(stderr, 96222944501Smrg "%s:%d: Error mapping buffer %d (%s): %s .\n", 96322944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 96422944501Smrg bo_gem->name, strerror(errno)); 96522944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 96622944501Smrg return ret; 96722944501Smrg } 96822944501Smrg bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 96922944501Smrg } 97022944501Smrg DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 97122944501Smrg bo_gem->mem_virtual); 97222944501Smrg bo->virtual = bo_gem->mem_virtual; 97322944501Smrg 97422944501Smrg set_domain.handle = bo_gem->gem_handle; 97522944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_CPU; 97622944501Smrg if (write_enable) 97722944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_CPU; 97822944501Smrg else 97922944501Smrg set_domain.write_domain = 0; 98022944501Smrg do { 98122944501Smrg ret = ioctl(bufmgr_gem->fd, 98222944501Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 98322944501Smrg &set_domain); 98422944501Smrg } while (ret == -1 && errno == EINTR); 98522944501Smrg if (ret != 0) { 98622944501Smrg ret = -errno; 98722944501Smrg fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n", 98822944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 98922944501Smrg strerror(errno)); 99022944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 99122944501Smrg return ret; 99222944501Smrg } 99322944501Smrg 99422944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 99522944501Smrg 99622944501Smrg return 0; 99722944501Smrg} 99822944501Smrg 99922944501Smrgint drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 100022944501Smrg{ 100122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 100222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 100322944501Smrg struct drm_i915_gem_set_domain set_domain; 100422944501Smrg int ret; 100522944501Smrg 100622944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 100722944501Smrg 100822944501Smrg /* Get a mapping of the buffer if we haven't before. */ 100922944501Smrg if (bo_gem->gtt_virtual == NULL) { 101022944501Smrg struct drm_i915_gem_mmap_gtt mmap_arg; 101122944501Smrg 101222944501Smrg DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, 101322944501Smrg bo_gem->name); 101422944501Smrg 101522944501Smrg memset(&mmap_arg, 0, sizeof(mmap_arg)); 101622944501Smrg mmap_arg.handle = bo_gem->gem_handle; 101722944501Smrg 101822944501Smrg /* Get the fake offset back... */ 101922944501Smrg do { 102022944501Smrg ret = ioctl(bufmgr_gem->fd, 102122944501Smrg DRM_IOCTL_I915_GEM_MMAP_GTT, 102222944501Smrg &mmap_arg); 102322944501Smrg } while (ret == -1 && errno == EINTR); 102422944501Smrg if (ret != 0) { 102522944501Smrg ret = -errno; 102622944501Smrg fprintf(stderr, 102722944501Smrg "%s:%d: Error preparing buffer map %d (%s): %s .\n", 102822944501Smrg __FILE__, __LINE__, 102922944501Smrg bo_gem->gem_handle, bo_gem->name, 103022944501Smrg strerror(errno)); 103122944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 103222944501Smrg return ret; 103322944501Smrg } 103422944501Smrg 103522944501Smrg /* and mmap it */ 103622944501Smrg bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 103722944501Smrg MAP_SHARED, bufmgr_gem->fd, 103822944501Smrg mmap_arg.offset); 103922944501Smrg if (bo_gem->gtt_virtual == MAP_FAILED) { 104022944501Smrg bo_gem->gtt_virtual = NULL; 104122944501Smrg ret = -errno; 104222944501Smrg fprintf(stderr, 104322944501Smrg "%s:%d: Error mapping buffer %d (%s): %s .\n", 104422944501Smrg __FILE__, __LINE__, 104522944501Smrg bo_gem->gem_handle, bo_gem->name, 104622944501Smrg strerror(errno)); 104722944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 104822944501Smrg return ret; 104922944501Smrg } 105022944501Smrg } 105122944501Smrg 105222944501Smrg bo->virtual = bo_gem->gtt_virtual; 105322944501Smrg 105422944501Smrg DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 105522944501Smrg bo_gem->gtt_virtual); 105622944501Smrg 105722944501Smrg /* Now move it to the GTT domain so that the CPU caches are flushed */ 105822944501Smrg set_domain.handle = bo_gem->gem_handle; 105922944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 106022944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_GTT; 106122944501Smrg do { 106222944501Smrg ret = ioctl(bufmgr_gem->fd, 106322944501Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 106422944501Smrg &set_domain); 106522944501Smrg } while (ret == -1 && errno == EINTR); 106622944501Smrg 106722944501Smrg if (ret != 0) { 106822944501Smrg ret = -errno; 106922944501Smrg fprintf(stderr, "%s:%d: Error setting domain %d: %s\n", 107022944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 107122944501Smrg strerror(errno)); 107222944501Smrg } 107322944501Smrg 107422944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 107522944501Smrg 107622944501Smrg return ret; 107722944501Smrg} 107822944501Smrg 107922944501Smrgint drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 108022944501Smrg{ 108122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 108222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 108322944501Smrg int ret = 0; 108422944501Smrg 108522944501Smrg if (bo == NULL) 108622944501Smrg return 0; 108722944501Smrg 108822944501Smrg assert(bo_gem->gtt_virtual != NULL); 108922944501Smrg 109022944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 109122944501Smrg bo->virtual = NULL; 109222944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 109322944501Smrg 109422944501Smrg return ret; 109522944501Smrg} 109622944501Smrg 109722944501Smrgstatic int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 109822944501Smrg{ 109922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 110022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 110122944501Smrg struct drm_i915_gem_sw_finish sw_finish; 110222944501Smrg int ret; 110322944501Smrg 110422944501Smrg if (bo == NULL) 110522944501Smrg return 0; 110622944501Smrg 110722944501Smrg assert(bo_gem->mem_virtual != NULL); 110822944501Smrg 110922944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 111022944501Smrg 111122944501Smrg /* Cause a flush to happen if the buffer's pinned for scanout, so the 111222944501Smrg * results show up in a timely manner. 111322944501Smrg */ 111422944501Smrg sw_finish.handle = bo_gem->gem_handle; 111522944501Smrg do { 111622944501Smrg ret = ioctl(bufmgr_gem->fd, 111722944501Smrg DRM_IOCTL_I915_GEM_SW_FINISH, 111822944501Smrg &sw_finish); 111922944501Smrg } while (ret == -1 && errno == EINTR); 112022944501Smrg ret = ret == -1 ? -errno : 0; 112122944501Smrg 112222944501Smrg bo->virtual = NULL; 112322944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 112422944501Smrg 112522944501Smrg return ret; 112622944501Smrg} 112722944501Smrg 112822944501Smrgstatic int 112922944501Smrgdrm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 113022944501Smrg unsigned long size, const void *data) 113122944501Smrg{ 113222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 113322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 113422944501Smrg struct drm_i915_gem_pwrite pwrite; 113522944501Smrg int ret; 113622944501Smrg 113722944501Smrg memset(&pwrite, 0, sizeof(pwrite)); 113822944501Smrg pwrite.handle = bo_gem->gem_handle; 113922944501Smrg pwrite.offset = offset; 114022944501Smrg pwrite.size = size; 114122944501Smrg pwrite.data_ptr = (uint64_t) (uintptr_t) data; 114222944501Smrg do { 114322944501Smrg ret = ioctl(bufmgr_gem->fd, 114422944501Smrg DRM_IOCTL_I915_GEM_PWRITE, 114522944501Smrg &pwrite); 114622944501Smrg } while (ret == -1 && errno == EINTR); 114722944501Smrg if (ret != 0) { 114822944501Smrg ret = -errno; 114922944501Smrg fprintf(stderr, 115022944501Smrg "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 115122944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 115222944501Smrg (int)size, strerror(errno)); 115322944501Smrg } 115422944501Smrg 115522944501Smrg return ret; 115622944501Smrg} 115722944501Smrg 115822944501Smrgstatic int 115922944501Smrgdrm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 116022944501Smrg{ 116122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 116222944501Smrg struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 116322944501Smrg int ret; 116422944501Smrg 116522944501Smrg get_pipe_from_crtc_id.crtc_id = crtc_id; 116622944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 116722944501Smrg &get_pipe_from_crtc_id); 116822944501Smrg if (ret != 0) { 116922944501Smrg /* We return -1 here to signal that we don't 117022944501Smrg * know which pipe is associated with this crtc. 117122944501Smrg * This lets the caller know that this information 117222944501Smrg * isn't available; using the wrong pipe for 117322944501Smrg * vblank waiting can cause the chipset to lock up 117422944501Smrg */ 117522944501Smrg return -1; 117622944501Smrg } 117722944501Smrg 117822944501Smrg return get_pipe_from_crtc_id.pipe; 117922944501Smrg} 118022944501Smrg 118122944501Smrgstatic int 118222944501Smrgdrm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 118322944501Smrg unsigned long size, void *data) 118422944501Smrg{ 118522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 118622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 118722944501Smrg struct drm_i915_gem_pread pread; 118822944501Smrg int ret; 118922944501Smrg 119022944501Smrg memset(&pread, 0, sizeof(pread)); 119122944501Smrg pread.handle = bo_gem->gem_handle; 119222944501Smrg pread.offset = offset; 119322944501Smrg pread.size = size; 119422944501Smrg pread.data_ptr = (uint64_t) (uintptr_t) data; 119522944501Smrg do { 119622944501Smrg ret = ioctl(bufmgr_gem->fd, 119722944501Smrg DRM_IOCTL_I915_GEM_PREAD, 119822944501Smrg &pread); 119922944501Smrg } while (ret == -1 && errno == EINTR); 120022944501Smrg if (ret != 0) { 120122944501Smrg ret = -errno; 120222944501Smrg fprintf(stderr, 120322944501Smrg "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 120422944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 120522944501Smrg (int)size, strerror(errno)); 120622944501Smrg } 120722944501Smrg 120822944501Smrg return ret; 120922944501Smrg} 121022944501Smrg 121122944501Smrg/** Waits for all GPU rendering to the object to have completed. */ 121222944501Smrgstatic void 121322944501Smrgdrm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 121422944501Smrg{ 121522944501Smrg drm_intel_gem_bo_start_gtt_access(bo, 0); 121622944501Smrg} 121722944501Smrg 121822944501Smrg/** 121922944501Smrg * Sets the object to the GTT read and possibly write domain, used by the X 122022944501Smrg * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 122122944501Smrg * 122222944501Smrg * In combination with drm_intel_gem_bo_pin() and manual fence management, we 122322944501Smrg * can do tiled pixmaps this way. 122422944501Smrg */ 122522944501Smrgvoid 122622944501Smrgdrm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 122722944501Smrg{ 122822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 122922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 123022944501Smrg struct drm_i915_gem_set_domain set_domain; 123122944501Smrg int ret; 123222944501Smrg 123322944501Smrg set_domain.handle = bo_gem->gem_handle; 123422944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 123522944501Smrg set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 123622944501Smrg do { 123722944501Smrg ret = ioctl(bufmgr_gem->fd, 123822944501Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 123922944501Smrg &set_domain); 124022944501Smrg } while (ret == -1 && errno == EINTR); 124122944501Smrg if (ret != 0) { 124222944501Smrg fprintf(stderr, 124322944501Smrg "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 124422944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 124522944501Smrg set_domain.read_domains, set_domain.write_domain, 124622944501Smrg strerror(errno)); 124722944501Smrg } 124822944501Smrg} 124922944501Smrg 125022944501Smrgstatic void 125122944501Smrgdrm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 125222944501Smrg{ 125322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 125422944501Smrg int i; 125522944501Smrg 125622944501Smrg free(bufmgr_gem->exec2_objects); 125722944501Smrg free(bufmgr_gem->exec_objects); 125822944501Smrg free(bufmgr_gem->exec_bos); 125922944501Smrg 126022944501Smrg pthread_mutex_destroy(&bufmgr_gem->lock); 126122944501Smrg 126222944501Smrg /* Free any cached buffer objects we were going to reuse */ 1263aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 126422944501Smrg struct drm_intel_gem_bo_bucket *bucket = 126522944501Smrg &bufmgr_gem->cache_bucket[i]; 126622944501Smrg drm_intel_bo_gem *bo_gem; 126722944501Smrg 126822944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 126922944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 127022944501Smrg bucket->head.next, head); 127122944501Smrg DRMLISTDEL(&bo_gem->head); 127222944501Smrg 127322944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 127422944501Smrg } 127522944501Smrg } 127622944501Smrg 127722944501Smrg free(bufmgr); 127822944501Smrg} 127922944501Smrg 128022944501Smrg/** 128122944501Smrg * Adds the target buffer to the validation list and adds the relocation 128222944501Smrg * to the reloc_buffer's relocation list. 128322944501Smrg * 128422944501Smrg * The relocation entry at the given offset must already contain the 128522944501Smrg * precomputed relocation value, because the kernel will optimize out 128622944501Smrg * the relocation entry write when the buffer hasn't moved from the 128722944501Smrg * last known offset in target_bo. 128822944501Smrg */ 128922944501Smrgstatic int 129022944501Smrgdo_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 129122944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 129222944501Smrg uint32_t read_domains, uint32_t write_domain, 129322944501Smrg int need_fence) 129422944501Smrg{ 129522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 129622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 129722944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 129822944501Smrg 129922944501Smrg if (bo_gem->has_error) 130022944501Smrg return -ENOMEM; 130122944501Smrg 130222944501Smrg if (target_bo_gem->has_error) { 130322944501Smrg bo_gem->has_error = 1; 130422944501Smrg return -ENOMEM; 130522944501Smrg } 130622944501Smrg 130722944501Smrg if (target_bo_gem->tiling_mode == I915_TILING_NONE) 130822944501Smrg need_fence = 0; 130922944501Smrg 131022944501Smrg /* We never use HW fences for rendering on 965+ */ 131122944501Smrg if (bufmgr_gem->gen >= 4) 131222944501Smrg need_fence = 0; 131322944501Smrg 131422944501Smrg /* Create a new relocation list if needed */ 131522944501Smrg if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 131622944501Smrg return -ENOMEM; 131722944501Smrg 131822944501Smrg /* Check overflow */ 131922944501Smrg assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 132022944501Smrg 132122944501Smrg /* Check args */ 132222944501Smrg assert(offset <= bo->size - 4); 132322944501Smrg assert((write_domain & (write_domain - 1)) == 0); 132422944501Smrg 132522944501Smrg /* Make sure that we're not adding a reloc to something whose size has 132622944501Smrg * already been accounted for. 132722944501Smrg */ 132822944501Smrg assert(!bo_gem->used_as_reloc_target); 1329aaba2545Smrg if (target_bo_gem != bo_gem) { 1330aaba2545Smrg target_bo_gem->used_as_reloc_target = 1; 1331aaba2545Smrg bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1332aaba2545Smrg } 133322944501Smrg /* An object needing a fence is a tiled buffer, so it won't have 133422944501Smrg * relocs to other buffers. 133522944501Smrg */ 133622944501Smrg if (need_fence) 133722944501Smrg target_bo_gem->reloc_tree_fences = 1; 133822944501Smrg bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 133922944501Smrg 134022944501Smrg /* Flag the target to disallow further relocations in it. */ 134122944501Smrg 134222944501Smrg bo_gem->relocs[bo_gem->reloc_count].offset = offset; 134322944501Smrg bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 134422944501Smrg bo_gem->relocs[bo_gem->reloc_count].target_handle = 134522944501Smrg target_bo_gem->gem_handle; 134622944501Smrg bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 134722944501Smrg bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 134822944501Smrg bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 134922944501Smrg 135022944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1351aaba2545Smrg if (target_bo != bo) 1352aaba2545Smrg drm_intel_gem_bo_reference(target_bo); 135322944501Smrg if (need_fence) 135422944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 135522944501Smrg DRM_INTEL_RELOC_FENCE; 135622944501Smrg else 135722944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 135822944501Smrg 135922944501Smrg bo_gem->reloc_count++; 136022944501Smrg 136122944501Smrg return 0; 136222944501Smrg} 136322944501Smrg 136422944501Smrgstatic int 136522944501Smrgdrm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 136622944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 136722944501Smrg uint32_t read_domains, uint32_t write_domain) 136822944501Smrg{ 136922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 137022944501Smrg 137122944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 137222944501Smrg read_domains, write_domain, 137322944501Smrg !bufmgr_gem->fenced_relocs); 137422944501Smrg} 137522944501Smrg 137622944501Smrgstatic int 137722944501Smrgdrm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 137822944501Smrg drm_intel_bo *target_bo, 137922944501Smrg uint32_t target_offset, 138022944501Smrg uint32_t read_domains, uint32_t write_domain) 138122944501Smrg{ 138222944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 138322944501Smrg read_domains, write_domain, 1); 138422944501Smrg} 138522944501Smrg 138622944501Smrg/** 138722944501Smrg * Walk the tree of relocations rooted at BO and accumulate the list of 138822944501Smrg * validations to be performed and update the relocation buffers with 138922944501Smrg * index values into the validation list. 139022944501Smrg */ 139122944501Smrgstatic void 139222944501Smrgdrm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 139322944501Smrg{ 139422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 139522944501Smrg int i; 139622944501Smrg 139722944501Smrg if (bo_gem->relocs == NULL) 139822944501Smrg return; 139922944501Smrg 140022944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 140122944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 140222944501Smrg 1403aaba2545Smrg if (target_bo == bo) 1404aaba2545Smrg continue; 1405aaba2545Smrg 140622944501Smrg /* Continue walking the tree depth-first. */ 140722944501Smrg drm_intel_gem_bo_process_reloc(target_bo); 140822944501Smrg 140922944501Smrg /* Add the target to the validate list */ 141022944501Smrg drm_intel_add_validate_buffer(target_bo); 141122944501Smrg } 141222944501Smrg} 141322944501Smrg 141422944501Smrgstatic void 141522944501Smrgdrm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 141622944501Smrg{ 141722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 141822944501Smrg int i; 141922944501Smrg 142022944501Smrg if (bo_gem->relocs == NULL) 142122944501Smrg return; 142222944501Smrg 142322944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 142422944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 142522944501Smrg int need_fence; 142622944501Smrg 1427aaba2545Smrg if (target_bo == bo) 1428aaba2545Smrg continue; 1429aaba2545Smrg 143022944501Smrg /* Continue walking the tree depth-first. */ 143122944501Smrg drm_intel_gem_bo_process_reloc2(target_bo); 143222944501Smrg 143322944501Smrg need_fence = (bo_gem->reloc_target_info[i].flags & 143422944501Smrg DRM_INTEL_RELOC_FENCE); 143522944501Smrg 143622944501Smrg /* Add the target to the validate list */ 143722944501Smrg drm_intel_add_validate_buffer2(target_bo, need_fence); 143822944501Smrg } 143922944501Smrg} 144022944501Smrg 144122944501Smrg 144222944501Smrgstatic void 144322944501Smrgdrm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 144422944501Smrg{ 144522944501Smrg int i; 144622944501Smrg 144722944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 144822944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 144922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 145022944501Smrg 145122944501Smrg /* Update the buffer offset */ 145222944501Smrg if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 145322944501Smrg DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 145422944501Smrg bo_gem->gem_handle, bo_gem->name, bo->offset, 145522944501Smrg (unsigned long long)bufmgr_gem->exec_objects[i]. 145622944501Smrg offset); 145722944501Smrg bo->offset = bufmgr_gem->exec_objects[i].offset; 145822944501Smrg } 145922944501Smrg } 146022944501Smrg} 146122944501Smrg 146222944501Smrgstatic void 146322944501Smrgdrm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 146422944501Smrg{ 146522944501Smrg int i; 146622944501Smrg 146722944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 146822944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 146922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 147022944501Smrg 147122944501Smrg /* Update the buffer offset */ 147222944501Smrg if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { 147322944501Smrg DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 147422944501Smrg bo_gem->gem_handle, bo_gem->name, bo->offset, 147522944501Smrg (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 147622944501Smrg bo->offset = bufmgr_gem->exec2_objects[i].offset; 147722944501Smrg } 147822944501Smrg } 147922944501Smrg} 148022944501Smrg 148122944501Smrgstatic int 148222944501Smrgdrm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 148322944501Smrg drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 148422944501Smrg{ 148522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 148622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 148722944501Smrg struct drm_i915_gem_execbuffer execbuf; 148822944501Smrg int ret, i; 148922944501Smrg 149022944501Smrg if (bo_gem->has_error) 149122944501Smrg return -ENOMEM; 149222944501Smrg 149322944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 149422944501Smrg /* Update indices and set up the validate list. */ 149522944501Smrg drm_intel_gem_bo_process_reloc(bo); 149622944501Smrg 149722944501Smrg /* Add the batch buffer to the validation list. There are no 149822944501Smrg * relocations pointing to it. 149922944501Smrg */ 150022944501Smrg drm_intel_add_validate_buffer(bo); 150122944501Smrg 150222944501Smrg execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 150322944501Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 150422944501Smrg execbuf.batch_start_offset = 0; 150522944501Smrg execbuf.batch_len = used; 150622944501Smrg execbuf.cliprects_ptr = (uintptr_t) cliprects; 150722944501Smrg execbuf.num_cliprects = num_cliprects; 150822944501Smrg execbuf.DR1 = 0; 150922944501Smrg execbuf.DR4 = DR4; 151022944501Smrg 151122944501Smrg do { 151222944501Smrg ret = ioctl(bufmgr_gem->fd, 151322944501Smrg DRM_IOCTL_I915_GEM_EXECBUFFER, 151422944501Smrg &execbuf); 151522944501Smrg } while (ret != 0 && errno == EINTR); 151622944501Smrg 151722944501Smrg if (ret != 0) { 151822944501Smrg ret = -errno; 151922944501Smrg if (errno == ENOSPC) { 152022944501Smrg fprintf(stderr, 152122944501Smrg "Execbuffer fails to pin. " 152222944501Smrg "Estimate: %u. Actual: %u. Available: %u\n", 152322944501Smrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 152422944501Smrg bufmgr_gem-> 152522944501Smrg exec_count), 152622944501Smrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 152722944501Smrg bufmgr_gem-> 152822944501Smrg exec_count), 152922944501Smrg (unsigned int)bufmgr_gem->gtt_size); 153022944501Smrg } 153122944501Smrg } 153222944501Smrg drm_intel_update_buffer_offsets(bufmgr_gem); 153322944501Smrg 153422944501Smrg if (bufmgr_gem->bufmgr.debug) 153522944501Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 153622944501Smrg 153722944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 153822944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 153922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 154022944501Smrg 154122944501Smrg /* Disconnect the buffer from the validate list */ 154222944501Smrg bo_gem->validate_index = -1; 154322944501Smrg bufmgr_gem->exec_bos[i] = NULL; 154422944501Smrg } 154522944501Smrg bufmgr_gem->exec_count = 0; 154622944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 154722944501Smrg 154822944501Smrg return ret; 154922944501Smrg} 155022944501Smrg 155122944501Smrgstatic int 1552aaba2545Smrgdrm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 1553aaba2545Smrg drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 1554aaba2545Smrg int ring_flag) 155522944501Smrg{ 155622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 155722944501Smrg struct drm_i915_gem_execbuffer2 execbuf; 155822944501Smrg int ret, i; 155922944501Smrg 1560aaba2545Smrg if ((ring_flag != I915_EXEC_RENDER) && (ring_flag != I915_EXEC_BSD)) 1561aaba2545Smrg return -EINVAL; 1562aaba2545Smrg 156322944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 156422944501Smrg /* Update indices and set up the validate list. */ 156522944501Smrg drm_intel_gem_bo_process_reloc2(bo); 156622944501Smrg 156722944501Smrg /* Add the batch buffer to the validation list. There are no relocations 156822944501Smrg * pointing to it. 156922944501Smrg */ 157022944501Smrg drm_intel_add_validate_buffer2(bo, 0); 157122944501Smrg 157222944501Smrg execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 157322944501Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 157422944501Smrg execbuf.batch_start_offset = 0; 157522944501Smrg execbuf.batch_len = used; 157622944501Smrg execbuf.cliprects_ptr = (uintptr_t)cliprects; 157722944501Smrg execbuf.num_cliprects = num_cliprects; 157822944501Smrg execbuf.DR1 = 0; 157922944501Smrg execbuf.DR4 = DR4; 1580aaba2545Smrg execbuf.flags = ring_flag; 158122944501Smrg execbuf.rsvd1 = 0; 158222944501Smrg execbuf.rsvd2 = 0; 158322944501Smrg 158422944501Smrg do { 158522944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, 158622944501Smrg &execbuf); 158722944501Smrg } while (ret != 0 && errno == EINTR); 158822944501Smrg 158922944501Smrg if (ret != 0) { 159022944501Smrg ret = -errno; 159122944501Smrg if (ret == -ENOMEM) { 159222944501Smrg fprintf(stderr, 159322944501Smrg "Execbuffer fails to pin. " 159422944501Smrg "Estimate: %u. Actual: %u. Available: %u\n", 159522944501Smrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 159622944501Smrg bufmgr_gem->exec_count), 159722944501Smrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 159822944501Smrg bufmgr_gem->exec_count), 159922944501Smrg (unsigned int) bufmgr_gem->gtt_size); 160022944501Smrg } 160122944501Smrg } 160222944501Smrg drm_intel_update_buffer_offsets2(bufmgr_gem); 160322944501Smrg 160422944501Smrg if (bufmgr_gem->bufmgr.debug) 160522944501Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 160622944501Smrg 160722944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 160822944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 160922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 161022944501Smrg 161122944501Smrg /* Disconnect the buffer from the validate list */ 161222944501Smrg bo_gem->validate_index = -1; 161322944501Smrg bufmgr_gem->exec_bos[i] = NULL; 161422944501Smrg } 161522944501Smrg bufmgr_gem->exec_count = 0; 161622944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 161722944501Smrg 161822944501Smrg return ret; 161922944501Smrg} 162022944501Smrg 1621aaba2545Smrgstatic int 1622aaba2545Smrgdrm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 1623aaba2545Smrg drm_clip_rect_t *cliprects, int num_cliprects, 1624aaba2545Smrg int DR4) 1625aaba2545Smrg{ 1626aaba2545Smrg return drm_intel_gem_bo_mrb_exec2(bo, used, 1627aaba2545Smrg cliprects, num_cliprects, DR4, 1628aaba2545Smrg I915_EXEC_RENDER); 1629aaba2545Smrg} 1630aaba2545Smrg 163122944501Smrgstatic int 163222944501Smrgdrm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 163322944501Smrg{ 163422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 163522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 163622944501Smrg struct drm_i915_gem_pin pin; 163722944501Smrg int ret; 163822944501Smrg 163922944501Smrg memset(&pin, 0, sizeof(pin)); 164022944501Smrg pin.handle = bo_gem->gem_handle; 164122944501Smrg pin.alignment = alignment; 164222944501Smrg 164322944501Smrg do { 164422944501Smrg ret = ioctl(bufmgr_gem->fd, 164522944501Smrg DRM_IOCTL_I915_GEM_PIN, 164622944501Smrg &pin); 164722944501Smrg } while (ret == -1 && errno == EINTR); 164822944501Smrg 164922944501Smrg if (ret != 0) 165022944501Smrg return -errno; 165122944501Smrg 165222944501Smrg bo->offset = pin.offset; 165322944501Smrg return 0; 165422944501Smrg} 165522944501Smrg 165622944501Smrgstatic int 165722944501Smrgdrm_intel_gem_bo_unpin(drm_intel_bo *bo) 165822944501Smrg{ 165922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 166022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 166122944501Smrg struct drm_i915_gem_unpin unpin; 166222944501Smrg int ret; 166322944501Smrg 166422944501Smrg memset(&unpin, 0, sizeof(unpin)); 166522944501Smrg unpin.handle = bo_gem->gem_handle; 166622944501Smrg 166722944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 166822944501Smrg if (ret != 0) 166922944501Smrg return -errno; 167022944501Smrg 167122944501Smrg return 0; 167222944501Smrg} 167322944501Smrg 167422944501Smrgstatic int 167522944501Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 167622944501Smrg uint32_t stride) 167722944501Smrg{ 167822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 167922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 168022944501Smrg struct drm_i915_gem_set_tiling set_tiling; 168122944501Smrg int ret; 168222944501Smrg 168322944501Smrg if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode) 168422944501Smrg return 0; 168522944501Smrg 168622944501Smrg memset(&set_tiling, 0, sizeof(set_tiling)); 168722944501Smrg set_tiling.handle = bo_gem->gem_handle; 168822944501Smrg 168922944501Smrg do { 169022944501Smrg set_tiling.tiling_mode = *tiling_mode; 169122944501Smrg set_tiling.stride = stride; 169222944501Smrg 169322944501Smrg ret = ioctl(bufmgr_gem->fd, 169422944501Smrg DRM_IOCTL_I915_GEM_SET_TILING, 169522944501Smrg &set_tiling); 169622944501Smrg } while (ret == -1 && errno == EINTR); 1697aaba2545Smrg if (ret == 0) { 1698aaba2545Smrg bo_gem->tiling_mode = set_tiling.tiling_mode; 1699aaba2545Smrg bo_gem->swizzle_mode = set_tiling.swizzle_mode; 1700aaba2545Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 1701aaba2545Smrg } else 1702aaba2545Smrg ret = -errno; 170322944501Smrg 170422944501Smrg *tiling_mode = bo_gem->tiling_mode; 1705aaba2545Smrg return ret; 170622944501Smrg} 170722944501Smrg 170822944501Smrgstatic int 170922944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 171022944501Smrg uint32_t * swizzle_mode) 171122944501Smrg{ 171222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 171322944501Smrg 171422944501Smrg *tiling_mode = bo_gem->tiling_mode; 171522944501Smrg *swizzle_mode = bo_gem->swizzle_mode; 171622944501Smrg return 0; 171722944501Smrg} 171822944501Smrg 171922944501Smrgstatic int 172022944501Smrgdrm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 172122944501Smrg{ 172222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 172322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 172422944501Smrg struct drm_gem_flink flink; 172522944501Smrg int ret; 172622944501Smrg 172722944501Smrg if (!bo_gem->global_name) { 172822944501Smrg memset(&flink, 0, sizeof(flink)); 172922944501Smrg flink.handle = bo_gem->gem_handle; 173022944501Smrg 173122944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 173222944501Smrg if (ret != 0) 173322944501Smrg return -errno; 173422944501Smrg bo_gem->global_name = flink.name; 173522944501Smrg bo_gem->reusable = 0; 173622944501Smrg } 173722944501Smrg 173822944501Smrg *name = bo_gem->global_name; 173922944501Smrg return 0; 174022944501Smrg} 174122944501Smrg 174222944501Smrg/** 174322944501Smrg * Enables unlimited caching of buffer objects for reuse. 174422944501Smrg * 174522944501Smrg * This is potentially very memory expensive, as the cache at each bucket 174622944501Smrg * size is only bounded by how many buffers of that size we've managed to have 174722944501Smrg * in flight at once. 174822944501Smrg */ 174922944501Smrgvoid 175022944501Smrgdrm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 175122944501Smrg{ 175222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 175322944501Smrg 175422944501Smrg bufmgr_gem->bo_reuse = 1; 175522944501Smrg} 175622944501Smrg 175722944501Smrg/** 175822944501Smrg * Enable use of fenced reloc type. 175922944501Smrg * 176022944501Smrg * New code should enable this to avoid unnecessary fence register 176122944501Smrg * allocation. If this option is not enabled, all relocs will have fence 176222944501Smrg * register allocated. 176322944501Smrg */ 176422944501Smrgvoid 176522944501Smrgdrm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 176622944501Smrg{ 176722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 176822944501Smrg 176922944501Smrg if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 177022944501Smrg bufmgr_gem->fenced_relocs = 1; 177122944501Smrg} 177222944501Smrg 177322944501Smrg/** 177422944501Smrg * Return the additional aperture space required by the tree of buffer objects 177522944501Smrg * rooted at bo. 177622944501Smrg */ 177722944501Smrgstatic int 177822944501Smrgdrm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 177922944501Smrg{ 178022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 178122944501Smrg int i; 178222944501Smrg int total = 0; 178322944501Smrg 178422944501Smrg if (bo == NULL || bo_gem->included_in_check_aperture) 178522944501Smrg return 0; 178622944501Smrg 178722944501Smrg total += bo->size; 178822944501Smrg bo_gem->included_in_check_aperture = 1; 178922944501Smrg 179022944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 179122944501Smrg total += 179222944501Smrg drm_intel_gem_bo_get_aperture_space(bo_gem-> 179322944501Smrg reloc_target_info[i].bo); 179422944501Smrg 179522944501Smrg return total; 179622944501Smrg} 179722944501Smrg 179822944501Smrg/** 179922944501Smrg * Count the number of buffers in this list that need a fence reg 180022944501Smrg * 180122944501Smrg * If the count is greater than the number of available regs, we'll have 180222944501Smrg * to ask the caller to resubmit a batch with fewer tiled buffers. 180322944501Smrg * 180422944501Smrg * This function over-counts if the same buffer is used multiple times. 180522944501Smrg */ 180622944501Smrgstatic unsigned int 180722944501Smrgdrm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 180822944501Smrg{ 180922944501Smrg int i; 181022944501Smrg unsigned int total = 0; 181122944501Smrg 181222944501Smrg for (i = 0; i < count; i++) { 181322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 181422944501Smrg 181522944501Smrg if (bo_gem == NULL) 181622944501Smrg continue; 181722944501Smrg 181822944501Smrg total += bo_gem->reloc_tree_fences; 181922944501Smrg } 182022944501Smrg return total; 182122944501Smrg} 182222944501Smrg 182322944501Smrg/** 182422944501Smrg * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 182522944501Smrg * for the next drm_intel_bufmgr_check_aperture_space() call. 182622944501Smrg */ 182722944501Smrgstatic void 182822944501Smrgdrm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 182922944501Smrg{ 183022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 183122944501Smrg int i; 183222944501Smrg 183322944501Smrg if (bo == NULL || !bo_gem->included_in_check_aperture) 183422944501Smrg return; 183522944501Smrg 183622944501Smrg bo_gem->included_in_check_aperture = 0; 183722944501Smrg 183822944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 183922944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 184022944501Smrg reloc_target_info[i].bo); 184122944501Smrg} 184222944501Smrg 184322944501Smrg/** 184422944501Smrg * Return a conservative estimate for the amount of aperture required 184522944501Smrg * for a collection of buffers. This may double-count some buffers. 184622944501Smrg */ 184722944501Smrgstatic unsigned int 184822944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 184922944501Smrg{ 185022944501Smrg int i; 185122944501Smrg unsigned int total = 0; 185222944501Smrg 185322944501Smrg for (i = 0; i < count; i++) { 185422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 185522944501Smrg if (bo_gem != NULL) 185622944501Smrg total += bo_gem->reloc_tree_size; 185722944501Smrg } 185822944501Smrg return total; 185922944501Smrg} 186022944501Smrg 186122944501Smrg/** 186222944501Smrg * Return the amount of aperture needed for a collection of buffers. 186322944501Smrg * This avoids double counting any buffers, at the cost of looking 186422944501Smrg * at every buffer in the set. 186522944501Smrg */ 186622944501Smrgstatic unsigned int 186722944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 186822944501Smrg{ 186922944501Smrg int i; 187022944501Smrg unsigned int total = 0; 187122944501Smrg 187222944501Smrg for (i = 0; i < count; i++) { 187322944501Smrg total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 187422944501Smrg /* For the first buffer object in the array, we get an 187522944501Smrg * accurate count back for its reloc_tree size (since nothing 187622944501Smrg * had been flagged as being counted yet). We can save that 187722944501Smrg * value out as a more conservative reloc_tree_size that 187822944501Smrg * avoids double-counting target buffers. Since the first 187922944501Smrg * buffer happens to usually be the batch buffer in our 188022944501Smrg * callers, this can pull us back from doing the tree 188122944501Smrg * walk on every new batch emit. 188222944501Smrg */ 188322944501Smrg if (i == 0) { 188422944501Smrg drm_intel_bo_gem *bo_gem = 188522944501Smrg (drm_intel_bo_gem *) bo_array[i]; 188622944501Smrg bo_gem->reloc_tree_size = total; 188722944501Smrg } 188822944501Smrg } 188922944501Smrg 189022944501Smrg for (i = 0; i < count; i++) 189122944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 189222944501Smrg return total; 189322944501Smrg} 189422944501Smrg 189522944501Smrg/** 189622944501Smrg * Return -1 if the batchbuffer should be flushed before attempting to 189722944501Smrg * emit rendering referencing the buffers pointed to by bo_array. 189822944501Smrg * 189922944501Smrg * This is required because if we try to emit a batchbuffer with relocations 190022944501Smrg * to a tree of buffers that won't simultaneously fit in the aperture, 190122944501Smrg * the rendering will return an error at a point where the software is not 190222944501Smrg * prepared to recover from it. 190322944501Smrg * 190422944501Smrg * However, we also want to emit the batchbuffer significantly before we reach 190522944501Smrg * the limit, as a series of batchbuffers each of which references buffers 190622944501Smrg * covering almost all of the aperture means that at each emit we end up 190722944501Smrg * waiting to evict a buffer from the last rendering, and we get synchronous 190822944501Smrg * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 190922944501Smrg * get better parallelism. 191022944501Smrg */ 191122944501Smrgstatic int 191222944501Smrgdrm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 191322944501Smrg{ 191422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 191522944501Smrg (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 191622944501Smrg unsigned int total = 0; 191722944501Smrg unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 191822944501Smrg int total_fences; 191922944501Smrg 192022944501Smrg /* Check for fence reg constraints if necessary */ 192122944501Smrg if (bufmgr_gem->available_fences) { 192222944501Smrg total_fences = drm_intel_gem_total_fences(bo_array, count); 192322944501Smrg if (total_fences > bufmgr_gem->available_fences) 192422944501Smrg return -ENOSPC; 192522944501Smrg } 192622944501Smrg 192722944501Smrg total = drm_intel_gem_estimate_batch_space(bo_array, count); 192822944501Smrg 192922944501Smrg if (total > threshold) 193022944501Smrg total = drm_intel_gem_compute_batch_space(bo_array, count); 193122944501Smrg 193222944501Smrg if (total > threshold) { 193322944501Smrg DBG("check_space: overflowed available aperture, " 193422944501Smrg "%dkb vs %dkb\n", 193522944501Smrg total / 1024, (int)bufmgr_gem->gtt_size / 1024); 193622944501Smrg return -ENOSPC; 193722944501Smrg } else { 193822944501Smrg DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 193922944501Smrg (int)bufmgr_gem->gtt_size / 1024); 194022944501Smrg return 0; 194122944501Smrg } 194222944501Smrg} 194322944501Smrg 194422944501Smrg/* 194522944501Smrg * Disable buffer reuse for objects which are shared with the kernel 194622944501Smrg * as scanout buffers 194722944501Smrg */ 194822944501Smrgstatic int 194922944501Smrgdrm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 195022944501Smrg{ 195122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 195222944501Smrg 195322944501Smrg bo_gem->reusable = 0; 195422944501Smrg return 0; 195522944501Smrg} 195622944501Smrg 1957aaba2545Smrgstatic int 1958aaba2545Smrgdrm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 1959aaba2545Smrg{ 1960aaba2545Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1961aaba2545Smrg 1962aaba2545Smrg return bo_gem->reusable; 1963aaba2545Smrg} 1964aaba2545Smrg 196522944501Smrgstatic int 196622944501Smrg_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 196722944501Smrg{ 196822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 196922944501Smrg int i; 197022944501Smrg 197122944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 197222944501Smrg if (bo_gem->reloc_target_info[i].bo == target_bo) 197322944501Smrg return 1; 1974aaba2545Smrg if (bo == bo_gem->reloc_target_info[i].bo) 1975aaba2545Smrg continue; 197622944501Smrg if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 197722944501Smrg target_bo)) 197822944501Smrg return 1; 197922944501Smrg } 198022944501Smrg 198122944501Smrg return 0; 198222944501Smrg} 198322944501Smrg 198422944501Smrg/** Return true if target_bo is referenced by bo's relocation tree. */ 198522944501Smrgstatic int 198622944501Smrgdrm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 198722944501Smrg{ 198822944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 198922944501Smrg 199022944501Smrg if (bo == NULL || target_bo == NULL) 199122944501Smrg return 0; 199222944501Smrg if (target_bo_gem->used_as_reloc_target) 199322944501Smrg return _drm_intel_gem_bo_references(bo, target_bo); 199422944501Smrg return 0; 199522944501Smrg} 199622944501Smrg 1997aaba2545Smrgstatic void 1998aaba2545Smrgadd_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 1999aaba2545Smrg{ 2000aaba2545Smrg unsigned int i = bufmgr_gem->num_buckets; 2001aaba2545Smrg 2002aaba2545Smrg assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2003aaba2545Smrg 2004aaba2545Smrg DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2005aaba2545Smrg bufmgr_gem->cache_bucket[i].size = size; 2006aaba2545Smrg bufmgr_gem->num_buckets++; 2007aaba2545Smrg} 2008aaba2545Smrg 2009aaba2545Smrgstatic void 2010aaba2545Smrginit_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2011aaba2545Smrg{ 2012aaba2545Smrg unsigned long size, cache_max_size = 64 * 1024 * 1024; 2013aaba2545Smrg 2014aaba2545Smrg /* OK, so power of two buckets was too wasteful of memory. 2015aaba2545Smrg * Give 3 other sizes between each power of two, to hopefully 2016aaba2545Smrg * cover things accurately enough. (The alternative is 2017aaba2545Smrg * probably to just go for exact matching of sizes, and assume 2018aaba2545Smrg * that for things like composited window resize the tiled 2019aaba2545Smrg * width/height alignment and rounding of sizes to pages will 2020aaba2545Smrg * get us useful cache hit rates anyway) 2021aaba2545Smrg */ 2022aaba2545Smrg add_bucket(bufmgr_gem, 4096); 2023aaba2545Smrg add_bucket(bufmgr_gem, 4096 * 2); 2024aaba2545Smrg add_bucket(bufmgr_gem, 4096 * 3); 2025aaba2545Smrg 2026aaba2545Smrg /* Initialize the linked lists for BO reuse cache. */ 2027aaba2545Smrg for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2028aaba2545Smrg add_bucket(bufmgr_gem, size); 2029aaba2545Smrg 2030aaba2545Smrg add_bucket(bufmgr_gem, size + size * 1 / 4); 2031aaba2545Smrg add_bucket(bufmgr_gem, size + size * 2 / 4); 2032aaba2545Smrg add_bucket(bufmgr_gem, size + size * 3 / 4); 2033aaba2545Smrg } 2034aaba2545Smrg} 2035aaba2545Smrg 203622944501Smrg/** 203722944501Smrg * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 203822944501Smrg * and manage map buffer objections. 203922944501Smrg * 204022944501Smrg * \param fd File descriptor of the opened DRM device. 204122944501Smrg */ 204222944501Smrgdrm_intel_bufmgr * 204322944501Smrgdrm_intel_bufmgr_gem_init(int fd, int batch_size) 204422944501Smrg{ 204522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem; 204622944501Smrg struct drm_i915_gem_get_aperture aperture; 204722944501Smrg drm_i915_getparam_t gp; 2048aaba2545Smrg int ret; 2049aaba2545Smrg int exec2 = 0, has_bsd = 0; 205022944501Smrg 205122944501Smrg bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 205222944501Smrg if (bufmgr_gem == NULL) 205322944501Smrg return NULL; 205422944501Smrg 205522944501Smrg bufmgr_gem->fd = fd; 205622944501Smrg 205722944501Smrg if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 205822944501Smrg free(bufmgr_gem); 205922944501Smrg return NULL; 206022944501Smrg } 206122944501Smrg 206222944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 206322944501Smrg 206422944501Smrg if (ret == 0) 206522944501Smrg bufmgr_gem->gtt_size = aperture.aper_available_size; 206622944501Smrg else { 206722944501Smrg fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 206822944501Smrg strerror(errno)); 206922944501Smrg bufmgr_gem->gtt_size = 128 * 1024 * 1024; 207022944501Smrg fprintf(stderr, "Assuming %dkB available aperture size.\n" 207122944501Smrg "May lead to reduced performance or incorrect " 207222944501Smrg "rendering.\n", 207322944501Smrg (int)bufmgr_gem->gtt_size / 1024); 207422944501Smrg } 207522944501Smrg 207622944501Smrg gp.param = I915_PARAM_CHIPSET_ID; 207722944501Smrg gp.value = &bufmgr_gem->pci_device; 207822944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 207922944501Smrg if (ret) { 208022944501Smrg fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 208122944501Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 208222944501Smrg } 208322944501Smrg 208422944501Smrg if (IS_GEN2(bufmgr_gem)) 208522944501Smrg bufmgr_gem->gen = 2; 208622944501Smrg else if (IS_GEN3(bufmgr_gem)) 208722944501Smrg bufmgr_gem->gen = 3; 208822944501Smrg else if (IS_GEN4(bufmgr_gem)) 208922944501Smrg bufmgr_gem->gen = 4; 209022944501Smrg else 209122944501Smrg bufmgr_gem->gen = 6; 209222944501Smrg 209322944501Smrg gp.param = I915_PARAM_HAS_EXECBUF2; 209422944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 209522944501Smrg if (!ret) 209622944501Smrg exec2 = 1; 209722944501Smrg 2098aaba2545Smrg gp.param = I915_PARAM_HAS_BSD; 2099aaba2545Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2100aaba2545Smrg if (!ret) 2101aaba2545Smrg has_bsd = 1; 2102aaba2545Smrg 210322944501Smrg if (bufmgr_gem->gen < 4) { 210422944501Smrg gp.param = I915_PARAM_NUM_FENCES_AVAIL; 210522944501Smrg gp.value = &bufmgr_gem->available_fences; 210622944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 210722944501Smrg if (ret) { 210822944501Smrg fprintf(stderr, "get fences failed: %d [%d]\n", ret, 210922944501Smrg errno); 211022944501Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, 211122944501Smrg *gp.value); 211222944501Smrg bufmgr_gem->available_fences = 0; 211322944501Smrg } else { 211422944501Smrg /* XXX The kernel reports the total number of fences, 211522944501Smrg * including any that may be pinned. 211622944501Smrg * 211722944501Smrg * We presume that there will be at least one pinned 211822944501Smrg * fence for the scanout buffer, but there may be more 211922944501Smrg * than one scanout and the user may be manually 212022944501Smrg * pinning buffers. Let's move to execbuffer2 and 212122944501Smrg * thereby forget the insanity of using fences... 212222944501Smrg */ 212322944501Smrg bufmgr_gem->available_fences -= 2; 212422944501Smrg if (bufmgr_gem->available_fences < 0) 212522944501Smrg bufmgr_gem->available_fences = 0; 212622944501Smrg } 212722944501Smrg } 212822944501Smrg 212922944501Smrg /* Let's go with one relocation per every 2 dwords (but round down a bit 213022944501Smrg * since a power of two will mean an extra page allocation for the reloc 213122944501Smrg * buffer). 213222944501Smrg * 213322944501Smrg * Every 4 was too few for the blender benchmark. 213422944501Smrg */ 213522944501Smrg bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 213622944501Smrg 213722944501Smrg bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 213822944501Smrg bufmgr_gem->bufmgr.bo_alloc_for_render = 213922944501Smrg drm_intel_gem_bo_alloc_for_render; 214022944501Smrg bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 214122944501Smrg bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 214222944501Smrg bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 214322944501Smrg bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 214422944501Smrg bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 214522944501Smrg bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 214622944501Smrg bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 214722944501Smrg bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 214822944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 214922944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 215022944501Smrg bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 215122944501Smrg bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 215222944501Smrg bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 215322944501Smrg bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 215422944501Smrg bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 215522944501Smrg /* Use the new one if available */ 2156aaba2545Smrg if (exec2) { 215722944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 2158aaba2545Smrg if (has_bsd) 2159aaba2545Smrg bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 2160aaba2545Smrg } else 216122944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 216222944501Smrg bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 216322944501Smrg bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 216422944501Smrg bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 216522944501Smrg bufmgr_gem->bufmgr.debug = 0; 216622944501Smrg bufmgr_gem->bufmgr.check_aperture_space = 216722944501Smrg drm_intel_gem_check_aperture_space; 216822944501Smrg bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 2169aaba2545Smrg bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 217022944501Smrg bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 217122944501Smrg drm_intel_gem_get_pipe_from_crtc_id; 217222944501Smrg bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 217322944501Smrg 2174aaba2545Smrg init_cache_buckets(bufmgr_gem); 217522944501Smrg 217622944501Smrg return &bufmgr_gem->bufmgr; 217722944501Smrg} 2178