intel_bufmgr_gem.c revision 6d98c517
122944501Smrg/************************************************************************** 222944501Smrg * 322944501Smrg * Copyright � 2007 Red Hat Inc. 422944501Smrg * Copyright � 2007 Intel Corporation 522944501Smrg * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 622944501Smrg * All Rights Reserved. 722944501Smrg * 822944501Smrg * Permission is hereby granted, free of charge, to any person obtaining a 922944501Smrg * copy of this software and associated documentation files (the 1022944501Smrg * "Software"), to deal in the Software without restriction, including 1122944501Smrg * without limitation the rights to use, copy, modify, merge, publish, 1222944501Smrg * distribute, sub license, and/or sell copies of the Software, and to 1322944501Smrg * permit persons to whom the Software is furnished to do so, subject to 1422944501Smrg * the following conditions: 1522944501Smrg * 1622944501Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1722944501Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1822944501Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 1922944501Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 2022944501Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 2122944501Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2222944501Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2322944501Smrg * 2422944501Smrg * The above copyright notice and this permission notice (including the 2522944501Smrg * next paragraph) shall be included in all copies or substantial portions 2622944501Smrg * of the Software. 2722944501Smrg * 2822944501Smrg * 2922944501Smrg **************************************************************************/ 3022944501Smrg/* 3122944501Smrg * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 3222944501Smrg * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 3322944501Smrg * Eric Anholt <eric@anholt.net> 3422944501Smrg * Dave Airlie <airlied@linux.ie> 3522944501Smrg */ 3622944501Smrg 3722944501Smrg#ifdef HAVE_CONFIG_H 3822944501Smrg#include "config.h" 3922944501Smrg#endif 4022944501Smrg 4122944501Smrg#include <xf86drm.h> 4222944501Smrg#include <xf86atomic.h> 4322944501Smrg#include <fcntl.h> 4422944501Smrg#include <stdio.h> 4522944501Smrg#include <stdlib.h> 4622944501Smrg#include <string.h> 4722944501Smrg#include <unistd.h> 4822944501Smrg#include <assert.h> 4922944501Smrg#include <pthread.h> 502e6867f6Smrg#include <stddef.h> 5122944501Smrg#include <sys/ioctl.h> 5222944501Smrg#include <sys/mman.h> 5322944501Smrg#include <sys/stat.h> 5422944501Smrg#include <sys/types.h> 5522944501Smrg 5622944501Smrg#include "errno.h" 5722944501Smrg#include "libdrm_lists.h" 5822944501Smrg#include "intel_bufmgr.h" 5922944501Smrg#include "intel_bufmgr_priv.h" 6022944501Smrg#include "intel_chipset.h" 6122944501Smrg#include "string.h" 6222944501Smrg 6322944501Smrg#include "i915_drm.h" 6422944501Smrg 6522944501Smrg#define DBG(...) do { \ 6622944501Smrg if (bufmgr_gem->bufmgr.debug) \ 6722944501Smrg fprintf(stderr, __VA_ARGS__); \ 6822944501Smrg} while (0) 6922944501Smrg 70aaba2545Smrg#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 71aaba2545Smrg 7222944501Smrgtypedef struct _drm_intel_bo_gem drm_intel_bo_gem; 7322944501Smrg 7422944501Smrgstruct drm_intel_gem_bo_bucket { 7522944501Smrg drmMMListHead head; 7622944501Smrg unsigned long size; 7722944501Smrg}; 7822944501Smrg 7922944501Smrgtypedef struct _drm_intel_bufmgr_gem { 8022944501Smrg drm_intel_bufmgr bufmgr; 8122944501Smrg 8222944501Smrg int fd; 8322944501Smrg 8422944501Smrg int max_relocs; 8522944501Smrg 8622944501Smrg pthread_mutex_t lock; 8722944501Smrg 8822944501Smrg struct drm_i915_gem_exec_object *exec_objects; 8922944501Smrg struct drm_i915_gem_exec_object2 *exec2_objects; 9022944501Smrg drm_intel_bo **exec_bos; 9122944501Smrg int exec_size; 9222944501Smrg int exec_count; 9322944501Smrg 9422944501Smrg /** Array of lists of cached gem objects of power-of-two sizes */ 95aaba2545Smrg struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 96aaba2545Smrg int num_buckets; 976d98c517Smrg time_t time; 9822944501Smrg 9922944501Smrg uint64_t gtt_size; 10022944501Smrg int available_fences; 10122944501Smrg int pci_device; 10222944501Smrg int gen; 10322944501Smrg char bo_reuse; 10422944501Smrg char fenced_relocs; 10522944501Smrg} drm_intel_bufmgr_gem; 10622944501Smrg 10722944501Smrg#define DRM_INTEL_RELOC_FENCE (1<<0) 10822944501Smrg 10922944501Smrgtypedef struct _drm_intel_reloc_target_info { 11022944501Smrg drm_intel_bo *bo; 11122944501Smrg int flags; 11222944501Smrg} drm_intel_reloc_target; 11322944501Smrg 11422944501Smrgstruct _drm_intel_bo_gem { 11522944501Smrg drm_intel_bo bo; 11622944501Smrg 11722944501Smrg atomic_t refcount; 11822944501Smrg uint32_t gem_handle; 11922944501Smrg const char *name; 12022944501Smrg 12122944501Smrg /** 12222944501Smrg * Kenel-assigned global name for this object 12322944501Smrg */ 12422944501Smrg unsigned int global_name; 12522944501Smrg 12622944501Smrg /** 12722944501Smrg * Index of the buffer within the validation list while preparing a 12822944501Smrg * batchbuffer execution. 12922944501Smrg */ 13022944501Smrg int validate_index; 13122944501Smrg 13222944501Smrg /** 13322944501Smrg * Current tiling mode 13422944501Smrg */ 13522944501Smrg uint32_t tiling_mode; 13622944501Smrg uint32_t swizzle_mode; 1376d98c517Smrg unsigned long stride; 13822944501Smrg 13922944501Smrg time_t free_time; 14022944501Smrg 14122944501Smrg /** Array passed to the DRM containing relocation information. */ 14222944501Smrg struct drm_i915_gem_relocation_entry *relocs; 14322944501Smrg /** 14422944501Smrg * Array of info structs corresponding to relocs[i].target_handle etc 14522944501Smrg */ 14622944501Smrg drm_intel_reloc_target *reloc_target_info; 14722944501Smrg /** Number of entries in relocs */ 14822944501Smrg int reloc_count; 14922944501Smrg /** Mapped address for the buffer, saved across map/unmap cycles */ 15022944501Smrg void *mem_virtual; 15122944501Smrg /** GTT virtual address for the buffer, saved across map/unmap cycles */ 15222944501Smrg void *gtt_virtual; 15322944501Smrg 15422944501Smrg /** BO cache list */ 15522944501Smrg drmMMListHead head; 15622944501Smrg 15722944501Smrg /** 15822944501Smrg * Boolean of whether this BO and its children have been included in 15922944501Smrg * the current drm_intel_bufmgr_check_aperture_space() total. 16022944501Smrg */ 16122944501Smrg char included_in_check_aperture; 16222944501Smrg 16322944501Smrg /** 16422944501Smrg * Boolean of whether this buffer has been used as a relocation 16522944501Smrg * target and had its size accounted for, and thus can't have any 16622944501Smrg * further relocations added to it. 16722944501Smrg */ 16822944501Smrg char used_as_reloc_target; 16922944501Smrg 17022944501Smrg /** 17122944501Smrg * Boolean of whether we have encountered an error whilst building the relocation tree. 17222944501Smrg */ 17322944501Smrg char has_error; 17422944501Smrg 17522944501Smrg /** 17622944501Smrg * Boolean of whether this buffer can be re-used 17722944501Smrg */ 17822944501Smrg char reusable; 17922944501Smrg 18022944501Smrg /** 18122944501Smrg * Size in bytes of this buffer and its relocation descendents. 18222944501Smrg * 18322944501Smrg * Used to avoid costly tree walking in 18422944501Smrg * drm_intel_bufmgr_check_aperture in the common case. 18522944501Smrg */ 18622944501Smrg int reloc_tree_size; 18722944501Smrg 18822944501Smrg /** 18922944501Smrg * Number of potential fence registers required by this buffer and its 19022944501Smrg * relocations. 19122944501Smrg */ 19222944501Smrg int reloc_tree_fences; 19322944501Smrg}; 19422944501Smrg 19522944501Smrgstatic unsigned int 19622944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 19722944501Smrg 19822944501Smrgstatic unsigned int 19922944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 20022944501Smrg 20122944501Smrgstatic int 20222944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 20322944501Smrg uint32_t * swizzle_mode); 20422944501Smrg 20522944501Smrgstatic int 2066d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2076d98c517Smrg uint32_t tiling_mode, 2086d98c517Smrg uint32_t stride); 20922944501Smrg 21022944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 21122944501Smrg time_t time); 21222944501Smrg 21322944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 21422944501Smrg 21522944501Smrgstatic void drm_intel_gem_bo_free(drm_intel_bo *bo); 21622944501Smrg 21722944501Smrgstatic unsigned long 21822944501Smrgdrm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 21922944501Smrg uint32_t *tiling_mode) 22022944501Smrg{ 22122944501Smrg unsigned long min_size, max_size; 22222944501Smrg unsigned long i; 22322944501Smrg 22422944501Smrg if (*tiling_mode == I915_TILING_NONE) 22522944501Smrg return size; 22622944501Smrg 22722944501Smrg /* 965+ just need multiples of page size for tiling */ 22822944501Smrg if (bufmgr_gem->gen >= 4) 22922944501Smrg return ROUND_UP_TO(size, 4096); 23022944501Smrg 23122944501Smrg /* Older chips need powers of two, of at least 512k or 1M */ 23222944501Smrg if (bufmgr_gem->gen == 3) { 23322944501Smrg min_size = 1024*1024; 23422944501Smrg max_size = 128*1024*1024; 23522944501Smrg } else { 23622944501Smrg min_size = 512*1024; 23722944501Smrg max_size = 64*1024*1024; 23822944501Smrg } 23922944501Smrg 24022944501Smrg if (size > max_size) { 24122944501Smrg *tiling_mode = I915_TILING_NONE; 24222944501Smrg return size; 24322944501Smrg } 24422944501Smrg 24522944501Smrg for (i = min_size; i < size; i <<= 1) 24622944501Smrg ; 24722944501Smrg 24822944501Smrg return i; 24922944501Smrg} 25022944501Smrg 25122944501Smrg/* 25222944501Smrg * Round a given pitch up to the minimum required for X tiling on a 25322944501Smrg * given chip. We use 512 as the minimum to allow for a later tiling 25422944501Smrg * change. 25522944501Smrg */ 25622944501Smrgstatic unsigned long 25722944501Smrgdrm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 2586d98c517Smrg unsigned long pitch, uint32_t *tiling_mode) 25922944501Smrg{ 26022944501Smrg unsigned long tile_width; 26122944501Smrg unsigned long i; 26222944501Smrg 26322944501Smrg /* If untiled, then just align it so that we can do rendering 26422944501Smrg * to it with the 3D engine. 26522944501Smrg */ 2666d98c517Smrg if (*tiling_mode == I915_TILING_NONE) 26722944501Smrg return ALIGN(pitch, 64); 26822944501Smrg 2696d98c517Smrg if (*tiling_mode == I915_TILING_X) 27022944501Smrg tile_width = 512; 27122944501Smrg else 27222944501Smrg tile_width = 128; 27322944501Smrg 27422944501Smrg /* 965 is flexible */ 27522944501Smrg if (bufmgr_gem->gen >= 4) 27622944501Smrg return ROUND_UP_TO(pitch, tile_width); 27722944501Smrg 2786d98c517Smrg /* The older hardware has a maximum pitch of 8192 with tiled 2796d98c517Smrg * surfaces, so fallback to untiled if it's too large. 2806d98c517Smrg */ 2816d98c517Smrg if (pitch > 8192) { 2826d98c517Smrg *tiling_mode = I915_TILING_NONE; 2836d98c517Smrg return ALIGN(pitch, 64); 2846d98c517Smrg } 2856d98c517Smrg 28622944501Smrg /* Pre-965 needs power of two tile width */ 28722944501Smrg for (i = tile_width; i < pitch; i <<= 1) 28822944501Smrg ; 28922944501Smrg 29022944501Smrg return i; 29122944501Smrg} 29222944501Smrg 29322944501Smrgstatic struct drm_intel_gem_bo_bucket * 29422944501Smrgdrm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 29522944501Smrg unsigned long size) 29622944501Smrg{ 29722944501Smrg int i; 29822944501Smrg 299aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 30022944501Smrg struct drm_intel_gem_bo_bucket *bucket = 30122944501Smrg &bufmgr_gem->cache_bucket[i]; 30222944501Smrg if (bucket->size >= size) { 30322944501Smrg return bucket; 30422944501Smrg } 30522944501Smrg } 30622944501Smrg 30722944501Smrg return NULL; 30822944501Smrg} 30922944501Smrg 31022944501Smrgstatic void 31122944501Smrgdrm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 31222944501Smrg{ 31322944501Smrg int i, j; 31422944501Smrg 31522944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 31622944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 31722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 31822944501Smrg 31922944501Smrg if (bo_gem->relocs == NULL) { 32022944501Smrg DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 32122944501Smrg bo_gem->name); 32222944501Smrg continue; 32322944501Smrg } 32422944501Smrg 32522944501Smrg for (j = 0; j < bo_gem->reloc_count; j++) { 32622944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 32722944501Smrg drm_intel_bo_gem *target_gem = 32822944501Smrg (drm_intel_bo_gem *) target_bo; 32922944501Smrg 33022944501Smrg DBG("%2d: %d (%s)@0x%08llx -> " 33122944501Smrg "%d (%s)@0x%08lx + 0x%08x\n", 33222944501Smrg i, 33322944501Smrg bo_gem->gem_handle, bo_gem->name, 33422944501Smrg (unsigned long long)bo_gem->relocs[j].offset, 33522944501Smrg target_gem->gem_handle, 33622944501Smrg target_gem->name, 33722944501Smrg target_bo->offset, 33822944501Smrg bo_gem->relocs[j].delta); 33922944501Smrg } 34022944501Smrg } 34122944501Smrg} 34222944501Smrg 34322944501Smrgstatic inline void 34422944501Smrgdrm_intel_gem_bo_reference(drm_intel_bo *bo) 34522944501Smrg{ 34622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 34722944501Smrg 34822944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 34922944501Smrg atomic_inc(&bo_gem->refcount); 35022944501Smrg} 35122944501Smrg 35222944501Smrg/** 35322944501Smrg * Adds the given buffer to the list of buffers to be validated (moved into the 35422944501Smrg * appropriate memory type) with the next batch submission. 35522944501Smrg * 35622944501Smrg * If a buffer is validated multiple times in a batch submission, it ends up 35722944501Smrg * with the intersection of the memory type flags and the union of the 35822944501Smrg * access flags. 35922944501Smrg */ 36022944501Smrgstatic void 36122944501Smrgdrm_intel_add_validate_buffer(drm_intel_bo *bo) 36222944501Smrg{ 36322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 36422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 36522944501Smrg int index; 36622944501Smrg 36722944501Smrg if (bo_gem->validate_index != -1) 36822944501Smrg return; 36922944501Smrg 37022944501Smrg /* Extend the array of validation entries as necessary. */ 37122944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 37222944501Smrg int new_size = bufmgr_gem->exec_size * 2; 37322944501Smrg 37422944501Smrg if (new_size == 0) 37522944501Smrg new_size = 5; 37622944501Smrg 37722944501Smrg bufmgr_gem->exec_objects = 37822944501Smrg realloc(bufmgr_gem->exec_objects, 37922944501Smrg sizeof(*bufmgr_gem->exec_objects) * new_size); 38022944501Smrg bufmgr_gem->exec_bos = 38122944501Smrg realloc(bufmgr_gem->exec_bos, 38222944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 38322944501Smrg bufmgr_gem->exec_size = new_size; 38422944501Smrg } 38522944501Smrg 38622944501Smrg index = bufmgr_gem->exec_count; 38722944501Smrg bo_gem->validate_index = index; 38822944501Smrg /* Fill in array entry */ 38922944501Smrg bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 39022944501Smrg bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 39122944501Smrg bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 39222944501Smrg bufmgr_gem->exec_objects[index].alignment = 0; 39322944501Smrg bufmgr_gem->exec_objects[index].offset = 0; 39422944501Smrg bufmgr_gem->exec_bos[index] = bo; 39522944501Smrg bufmgr_gem->exec_count++; 39622944501Smrg} 39722944501Smrg 39822944501Smrgstatic void 39922944501Smrgdrm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 40022944501Smrg{ 40122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 40222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 40322944501Smrg int index; 40422944501Smrg 40522944501Smrg if (bo_gem->validate_index != -1) { 40622944501Smrg if (need_fence) 40722944501Smrg bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 40822944501Smrg EXEC_OBJECT_NEEDS_FENCE; 40922944501Smrg return; 41022944501Smrg } 41122944501Smrg 41222944501Smrg /* Extend the array of validation entries as necessary. */ 41322944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 41422944501Smrg int new_size = bufmgr_gem->exec_size * 2; 41522944501Smrg 41622944501Smrg if (new_size == 0) 41722944501Smrg new_size = 5; 41822944501Smrg 41922944501Smrg bufmgr_gem->exec2_objects = 42022944501Smrg realloc(bufmgr_gem->exec2_objects, 42122944501Smrg sizeof(*bufmgr_gem->exec2_objects) * new_size); 42222944501Smrg bufmgr_gem->exec_bos = 42322944501Smrg realloc(bufmgr_gem->exec_bos, 42422944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 42522944501Smrg bufmgr_gem->exec_size = new_size; 42622944501Smrg } 42722944501Smrg 42822944501Smrg index = bufmgr_gem->exec_count; 42922944501Smrg bo_gem->validate_index = index; 43022944501Smrg /* Fill in array entry */ 43122944501Smrg bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 43222944501Smrg bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 43322944501Smrg bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 43422944501Smrg bufmgr_gem->exec2_objects[index].alignment = 0; 43522944501Smrg bufmgr_gem->exec2_objects[index].offset = 0; 43622944501Smrg bufmgr_gem->exec_bos[index] = bo; 43722944501Smrg bufmgr_gem->exec2_objects[index].flags = 0; 43822944501Smrg bufmgr_gem->exec2_objects[index].rsvd1 = 0; 43922944501Smrg bufmgr_gem->exec2_objects[index].rsvd2 = 0; 44022944501Smrg if (need_fence) { 44122944501Smrg bufmgr_gem->exec2_objects[index].flags |= 44222944501Smrg EXEC_OBJECT_NEEDS_FENCE; 44322944501Smrg } 44422944501Smrg bufmgr_gem->exec_count++; 44522944501Smrg} 44622944501Smrg 44722944501Smrg#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 44822944501Smrg sizeof(uint32_t)) 44922944501Smrg 45022944501Smrgstatic void 45122944501Smrgdrm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 45222944501Smrg drm_intel_bo_gem *bo_gem) 45322944501Smrg{ 45422944501Smrg int size; 45522944501Smrg 45622944501Smrg assert(!bo_gem->used_as_reloc_target); 45722944501Smrg 45822944501Smrg /* The older chipsets are far-less flexible in terms of tiling, 45922944501Smrg * and require tiled buffer to be size aligned in the aperture. 46022944501Smrg * This means that in the worst possible case we will need a hole 46122944501Smrg * twice as large as the object in order for it to fit into the 46222944501Smrg * aperture. Optimal packing is for wimps. 46322944501Smrg */ 46422944501Smrg size = bo_gem->bo.size; 46522944501Smrg if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) 46622944501Smrg size *= 2; 46722944501Smrg 46822944501Smrg bo_gem->reloc_tree_size = size; 46922944501Smrg} 47022944501Smrg 47122944501Smrgstatic int 47222944501Smrgdrm_intel_setup_reloc_list(drm_intel_bo *bo) 47322944501Smrg{ 47422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 47522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 47622944501Smrg unsigned int max_relocs = bufmgr_gem->max_relocs; 47722944501Smrg 47822944501Smrg if (bo->size / 4 < max_relocs) 47922944501Smrg max_relocs = bo->size / 4; 48022944501Smrg 48122944501Smrg bo_gem->relocs = malloc(max_relocs * 48222944501Smrg sizeof(struct drm_i915_gem_relocation_entry)); 48322944501Smrg bo_gem->reloc_target_info = malloc(max_relocs * 484aaba2545Smrg sizeof(drm_intel_reloc_target)); 48522944501Smrg if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 48622944501Smrg bo_gem->has_error = 1; 48722944501Smrg 48822944501Smrg free (bo_gem->relocs); 48922944501Smrg bo_gem->relocs = NULL; 49022944501Smrg 49122944501Smrg free (bo_gem->reloc_target_info); 49222944501Smrg bo_gem->reloc_target_info = NULL; 49322944501Smrg 49422944501Smrg return 1; 49522944501Smrg } 49622944501Smrg 49722944501Smrg return 0; 49822944501Smrg} 49922944501Smrg 50022944501Smrgstatic int 50122944501Smrgdrm_intel_gem_bo_busy(drm_intel_bo *bo) 50222944501Smrg{ 50322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 50422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 50522944501Smrg struct drm_i915_gem_busy busy; 50622944501Smrg int ret; 50722944501Smrg 50822944501Smrg memset(&busy, 0, sizeof(busy)); 50922944501Smrg busy.handle = bo_gem->gem_handle; 51022944501Smrg 5116d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 51222944501Smrg 51322944501Smrg return (ret == 0 && busy.busy); 51422944501Smrg} 51522944501Smrg 51622944501Smrgstatic int 51722944501Smrgdrm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 51822944501Smrg drm_intel_bo_gem *bo_gem, int state) 51922944501Smrg{ 52022944501Smrg struct drm_i915_gem_madvise madv; 52122944501Smrg 52222944501Smrg madv.handle = bo_gem->gem_handle; 52322944501Smrg madv.madv = state; 52422944501Smrg madv.retained = 1; 5256d98c517Smrg drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 52622944501Smrg 52722944501Smrg return madv.retained; 52822944501Smrg} 52922944501Smrg 53022944501Smrgstatic int 53122944501Smrgdrm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 53222944501Smrg{ 53322944501Smrg return drm_intel_gem_bo_madvise_internal 53422944501Smrg ((drm_intel_bufmgr_gem *) bo->bufmgr, 53522944501Smrg (drm_intel_bo_gem *) bo, 53622944501Smrg madv); 53722944501Smrg} 53822944501Smrg 53922944501Smrg/* drop the oldest entries that have been purged by the kernel */ 54022944501Smrgstatic void 54122944501Smrgdrm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 54222944501Smrg struct drm_intel_gem_bo_bucket *bucket) 54322944501Smrg{ 54422944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 54522944501Smrg drm_intel_bo_gem *bo_gem; 54622944501Smrg 54722944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 54822944501Smrg bucket->head.next, head); 54922944501Smrg if (drm_intel_gem_bo_madvise_internal 55022944501Smrg (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 55122944501Smrg break; 55222944501Smrg 55322944501Smrg DRMLISTDEL(&bo_gem->head); 55422944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 55522944501Smrg } 55622944501Smrg} 55722944501Smrg 55822944501Smrgstatic drm_intel_bo * 55922944501Smrgdrm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 56022944501Smrg const char *name, 56122944501Smrg unsigned long size, 5626d98c517Smrg unsigned long flags, 5636d98c517Smrg uint32_t tiling_mode, 5646d98c517Smrg unsigned long stride) 56522944501Smrg{ 56622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 56722944501Smrg drm_intel_bo_gem *bo_gem; 56822944501Smrg unsigned int page_size = getpagesize(); 56922944501Smrg int ret; 57022944501Smrg struct drm_intel_gem_bo_bucket *bucket; 57122944501Smrg int alloc_from_cache; 57222944501Smrg unsigned long bo_size; 57322944501Smrg int for_render = 0; 57422944501Smrg 57522944501Smrg if (flags & BO_ALLOC_FOR_RENDER) 57622944501Smrg for_render = 1; 57722944501Smrg 57822944501Smrg /* Round the allocated size up to a power of two number of pages. */ 57922944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 58022944501Smrg 58122944501Smrg /* If we don't have caching at this size, don't actually round the 58222944501Smrg * allocation up. 58322944501Smrg */ 58422944501Smrg if (bucket == NULL) { 58522944501Smrg bo_size = size; 58622944501Smrg if (bo_size < page_size) 58722944501Smrg bo_size = page_size; 58822944501Smrg } else { 58922944501Smrg bo_size = bucket->size; 59022944501Smrg } 59122944501Smrg 59222944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 59322944501Smrg /* Get a buffer out of the cache if available */ 59422944501Smrgretry: 59522944501Smrg alloc_from_cache = 0; 59622944501Smrg if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 59722944501Smrg if (for_render) { 59822944501Smrg /* Allocate new render-target BOs from the tail (MRU) 59922944501Smrg * of the list, as it will likely be hot in the GPU 60022944501Smrg * cache and in the aperture for us. 60122944501Smrg */ 60222944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 60322944501Smrg bucket->head.prev, head); 60422944501Smrg DRMLISTDEL(&bo_gem->head); 60522944501Smrg alloc_from_cache = 1; 60622944501Smrg } else { 60722944501Smrg /* For non-render-target BOs (where we're probably 60822944501Smrg * going to map it first thing in order to fill it 60922944501Smrg * with data), check if the last BO in the cache is 61022944501Smrg * unbusy, and only reuse in that case. Otherwise, 61122944501Smrg * allocating a new buffer is probably faster than 61222944501Smrg * waiting for the GPU to finish. 61322944501Smrg */ 61422944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 61522944501Smrg bucket->head.next, head); 61622944501Smrg if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 61722944501Smrg alloc_from_cache = 1; 61822944501Smrg DRMLISTDEL(&bo_gem->head); 61922944501Smrg } 62022944501Smrg } 62122944501Smrg 62222944501Smrg if (alloc_from_cache) { 62322944501Smrg if (!drm_intel_gem_bo_madvise_internal 62422944501Smrg (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 62522944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 62622944501Smrg drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 62722944501Smrg bucket); 62822944501Smrg goto retry; 62922944501Smrg } 6306d98c517Smrg 6316d98c517Smrg if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 6326d98c517Smrg tiling_mode, 6336d98c517Smrg stride)) { 6346d98c517Smrg drm_intel_gem_bo_free(&bo_gem->bo); 6356d98c517Smrg goto retry; 6366d98c517Smrg } 63722944501Smrg } 63822944501Smrg } 63922944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 64022944501Smrg 64122944501Smrg if (!alloc_from_cache) { 64222944501Smrg struct drm_i915_gem_create create; 64322944501Smrg 64422944501Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 64522944501Smrg if (!bo_gem) 64622944501Smrg return NULL; 64722944501Smrg 64822944501Smrg bo_gem->bo.size = bo_size; 64922944501Smrg memset(&create, 0, sizeof(create)); 65022944501Smrg create.size = bo_size; 65122944501Smrg 6526d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 6536d98c517Smrg DRM_IOCTL_I915_GEM_CREATE, 6546d98c517Smrg &create); 65522944501Smrg bo_gem->gem_handle = create.handle; 65622944501Smrg bo_gem->bo.handle = bo_gem->gem_handle; 65722944501Smrg if (ret != 0) { 65822944501Smrg free(bo_gem); 65922944501Smrg return NULL; 66022944501Smrg } 66122944501Smrg bo_gem->bo.bufmgr = bufmgr; 6626d98c517Smrg 6636d98c517Smrg bo_gem->tiling_mode = I915_TILING_NONE; 6646d98c517Smrg bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 6656d98c517Smrg bo_gem->stride = 0; 6666d98c517Smrg 6676d98c517Smrg if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 6686d98c517Smrg tiling_mode, 6696d98c517Smrg stride)) { 6706d98c517Smrg drm_intel_gem_bo_free(&bo_gem->bo); 6716d98c517Smrg return NULL; 6726d98c517Smrg } 67322944501Smrg } 67422944501Smrg 67522944501Smrg bo_gem->name = name; 67622944501Smrg atomic_set(&bo_gem->refcount, 1); 67722944501Smrg bo_gem->validate_index = -1; 67822944501Smrg bo_gem->reloc_tree_fences = 0; 67922944501Smrg bo_gem->used_as_reloc_target = 0; 68022944501Smrg bo_gem->has_error = 0; 68122944501Smrg bo_gem->reusable = 1; 68222944501Smrg 68322944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 68422944501Smrg 68522944501Smrg DBG("bo_create: buf %d (%s) %ldb\n", 68622944501Smrg bo_gem->gem_handle, bo_gem->name, size); 68722944501Smrg 68822944501Smrg return &bo_gem->bo; 68922944501Smrg} 69022944501Smrg 69122944501Smrgstatic drm_intel_bo * 69222944501Smrgdrm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 69322944501Smrg const char *name, 69422944501Smrg unsigned long size, 69522944501Smrg unsigned int alignment) 69622944501Smrg{ 69722944501Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 6986d98c517Smrg BO_ALLOC_FOR_RENDER, 6996d98c517Smrg I915_TILING_NONE, 0); 70022944501Smrg} 70122944501Smrg 70222944501Smrgstatic drm_intel_bo * 70322944501Smrgdrm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 70422944501Smrg const char *name, 70522944501Smrg unsigned long size, 70622944501Smrg unsigned int alignment) 70722944501Smrg{ 7086d98c517Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 7096d98c517Smrg I915_TILING_NONE, 0); 71022944501Smrg} 71122944501Smrg 71222944501Smrgstatic drm_intel_bo * 71322944501Smrgdrm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 71422944501Smrg int x, int y, int cpp, uint32_t *tiling_mode, 71522944501Smrg unsigned long *pitch, unsigned long flags) 71622944501Smrg{ 71722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 718aaba2545Smrg unsigned long size, stride; 719aaba2545Smrg uint32_t tiling; 72022944501Smrg 721aaba2545Smrg do { 722aaba2545Smrg unsigned long aligned_y; 723aaba2545Smrg 724aaba2545Smrg tiling = *tiling_mode; 725aaba2545Smrg 726aaba2545Smrg /* If we're tiled, our allocations are in 8 or 32-row blocks, 727aaba2545Smrg * so failure to align our height means that we won't allocate 728aaba2545Smrg * enough pages. 729aaba2545Smrg * 730aaba2545Smrg * If we're untiled, we still have to align to 2 rows high 731aaba2545Smrg * because the data port accesses 2x2 blocks even if the 732aaba2545Smrg * bottom row isn't to be rendered, so failure to align means 733aaba2545Smrg * we could walk off the end of the GTT and fault. This is 734aaba2545Smrg * documented on 965, and may be the case on older chipsets 735aaba2545Smrg * too so we try to be careful. 736aaba2545Smrg */ 737aaba2545Smrg aligned_y = y; 738aaba2545Smrg if (tiling == I915_TILING_NONE) 739aaba2545Smrg aligned_y = ALIGN(y, 2); 740aaba2545Smrg else if (tiling == I915_TILING_X) 741aaba2545Smrg aligned_y = ALIGN(y, 8); 742aaba2545Smrg else if (tiling == I915_TILING_Y) 743aaba2545Smrg aligned_y = ALIGN(y, 32); 744aaba2545Smrg 745aaba2545Smrg stride = x * cpp; 7466d98c517Smrg stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 747aaba2545Smrg size = stride * aligned_y; 748aaba2545Smrg size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 749aaba2545Smrg } while (*tiling_mode != tiling); 75022944501Smrg *pitch = stride; 75122944501Smrg 7526d98c517Smrg if (tiling == I915_TILING_NONE) 7536d98c517Smrg stride = 0; 7546d98c517Smrg 7556d98c517Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 7566d98c517Smrg tiling, stride); 75722944501Smrg} 75822944501Smrg 75922944501Smrg/** 76022944501Smrg * Returns a drm_intel_bo wrapping the given buffer object handle. 76122944501Smrg * 76222944501Smrg * This can be used when one application needs to pass a buffer object 76322944501Smrg * to another. 76422944501Smrg */ 76522944501Smrgdrm_intel_bo * 76622944501Smrgdrm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 76722944501Smrg const char *name, 76822944501Smrg unsigned int handle) 76922944501Smrg{ 77022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 77122944501Smrg drm_intel_bo_gem *bo_gem; 77222944501Smrg int ret; 77322944501Smrg struct drm_gem_open open_arg; 77422944501Smrg struct drm_i915_gem_get_tiling get_tiling; 77522944501Smrg 77622944501Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 77722944501Smrg if (!bo_gem) 77822944501Smrg return NULL; 77922944501Smrg 78022944501Smrg memset(&open_arg, 0, sizeof(open_arg)); 78122944501Smrg open_arg.name = handle; 7826d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 7836d98c517Smrg DRM_IOCTL_GEM_OPEN, 7846d98c517Smrg &open_arg); 78522944501Smrg if (ret != 0) { 78622944501Smrg fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", 78722944501Smrg name, handle, strerror(errno)); 78822944501Smrg free(bo_gem); 78922944501Smrg return NULL; 79022944501Smrg } 79122944501Smrg bo_gem->bo.size = open_arg.size; 79222944501Smrg bo_gem->bo.offset = 0; 79322944501Smrg bo_gem->bo.virtual = NULL; 79422944501Smrg bo_gem->bo.bufmgr = bufmgr; 79522944501Smrg bo_gem->name = name; 79622944501Smrg atomic_set(&bo_gem->refcount, 1); 79722944501Smrg bo_gem->validate_index = -1; 79822944501Smrg bo_gem->gem_handle = open_arg.handle; 79922944501Smrg bo_gem->global_name = handle; 80022944501Smrg bo_gem->reusable = 0; 80122944501Smrg 80222944501Smrg memset(&get_tiling, 0, sizeof(get_tiling)); 80322944501Smrg get_tiling.handle = bo_gem->gem_handle; 8046d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 8056d98c517Smrg DRM_IOCTL_I915_GEM_GET_TILING, 8066d98c517Smrg &get_tiling); 80722944501Smrg if (ret != 0) { 80822944501Smrg drm_intel_gem_bo_unreference(&bo_gem->bo); 80922944501Smrg return NULL; 81022944501Smrg } 81122944501Smrg bo_gem->tiling_mode = get_tiling.tiling_mode; 81222944501Smrg bo_gem->swizzle_mode = get_tiling.swizzle_mode; 8136d98c517Smrg /* XXX stride is unknown */ 81422944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 81522944501Smrg 81622944501Smrg DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 81722944501Smrg 81822944501Smrg return &bo_gem->bo; 81922944501Smrg} 82022944501Smrg 82122944501Smrgstatic void 82222944501Smrgdrm_intel_gem_bo_free(drm_intel_bo *bo) 82322944501Smrg{ 82422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 82522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 82622944501Smrg struct drm_gem_close close; 82722944501Smrg int ret; 82822944501Smrg 82922944501Smrg if (bo_gem->mem_virtual) 83022944501Smrg munmap(bo_gem->mem_virtual, bo_gem->bo.size); 83122944501Smrg if (bo_gem->gtt_virtual) 83222944501Smrg munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 83322944501Smrg 83422944501Smrg /* Close this object */ 83522944501Smrg memset(&close, 0, sizeof(close)); 83622944501Smrg close.handle = bo_gem->gem_handle; 8376d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 83822944501Smrg if (ret != 0) { 83922944501Smrg fprintf(stderr, 84022944501Smrg "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 84122944501Smrg bo_gem->gem_handle, bo_gem->name, strerror(errno)); 84222944501Smrg } 84322944501Smrg free(bo); 84422944501Smrg} 84522944501Smrg 84622944501Smrg/** Frees all cached buffers significantly older than @time. */ 84722944501Smrgstatic void 84822944501Smrgdrm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 84922944501Smrg{ 85022944501Smrg int i; 85122944501Smrg 8526d98c517Smrg if (bufmgr_gem->time == time) 8536d98c517Smrg return; 8546d98c517Smrg 855aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 85622944501Smrg struct drm_intel_gem_bo_bucket *bucket = 85722944501Smrg &bufmgr_gem->cache_bucket[i]; 85822944501Smrg 85922944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 86022944501Smrg drm_intel_bo_gem *bo_gem; 86122944501Smrg 86222944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 86322944501Smrg bucket->head.next, head); 86422944501Smrg if (time - bo_gem->free_time <= 1) 86522944501Smrg break; 86622944501Smrg 86722944501Smrg DRMLISTDEL(&bo_gem->head); 86822944501Smrg 86922944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 87022944501Smrg } 87122944501Smrg } 8726d98c517Smrg 8736d98c517Smrg bufmgr_gem->time = time; 87422944501Smrg} 87522944501Smrg 87622944501Smrgstatic void 87722944501Smrgdrm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 87822944501Smrg{ 87922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 88022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 88122944501Smrg struct drm_intel_gem_bo_bucket *bucket; 88222944501Smrg int i; 88322944501Smrg 88422944501Smrg /* Unreference all the target buffers */ 88522944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 886aaba2545Smrg if (bo_gem->reloc_target_info[i].bo != bo) { 887aaba2545Smrg drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 888aaba2545Smrg reloc_target_info[i].bo, 889aaba2545Smrg time); 890aaba2545Smrg } 89122944501Smrg } 89222944501Smrg bo_gem->reloc_count = 0; 89322944501Smrg bo_gem->used_as_reloc_target = 0; 89422944501Smrg 89522944501Smrg DBG("bo_unreference final: %d (%s)\n", 89622944501Smrg bo_gem->gem_handle, bo_gem->name); 89722944501Smrg 89822944501Smrg /* release memory associated with this object */ 89922944501Smrg if (bo_gem->reloc_target_info) { 90022944501Smrg free(bo_gem->reloc_target_info); 90122944501Smrg bo_gem->reloc_target_info = NULL; 90222944501Smrg } 90322944501Smrg if (bo_gem->relocs) { 90422944501Smrg free(bo_gem->relocs); 90522944501Smrg bo_gem->relocs = NULL; 90622944501Smrg } 90722944501Smrg 90822944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 90922944501Smrg /* Put the buffer into our internal cache for reuse if we can. */ 91022944501Smrg if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 91122944501Smrg drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 91222944501Smrg I915_MADV_DONTNEED)) { 91322944501Smrg bo_gem->free_time = time; 91422944501Smrg 91522944501Smrg bo_gem->name = NULL; 91622944501Smrg bo_gem->validate_index = -1; 91722944501Smrg 91822944501Smrg DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 91922944501Smrg } else { 92022944501Smrg drm_intel_gem_bo_free(bo); 92122944501Smrg } 92222944501Smrg} 92322944501Smrg 92422944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 92522944501Smrg time_t time) 92622944501Smrg{ 92722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 92822944501Smrg 92922944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 93022944501Smrg if (atomic_dec_and_test(&bo_gem->refcount)) 93122944501Smrg drm_intel_gem_bo_unreference_final(bo, time); 93222944501Smrg} 93322944501Smrg 93422944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 93522944501Smrg{ 93622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 93722944501Smrg 93822944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 93922944501Smrg if (atomic_dec_and_test(&bo_gem->refcount)) { 94022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 94122944501Smrg (drm_intel_bufmgr_gem *) bo->bufmgr; 94222944501Smrg struct timespec time; 94322944501Smrg 94422944501Smrg clock_gettime(CLOCK_MONOTONIC, &time); 94522944501Smrg 94622944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 94722944501Smrg drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 9486d98c517Smrg drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 94922944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 95022944501Smrg } 95122944501Smrg} 95222944501Smrg 95322944501Smrgstatic int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 95422944501Smrg{ 95522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 95622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 95722944501Smrg struct drm_i915_gem_set_domain set_domain; 95822944501Smrg int ret; 95922944501Smrg 96022944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 96122944501Smrg 96222944501Smrg /* Allow recursive mapping. Mesa may recursively map buffers with 96322944501Smrg * nested display loops. 96422944501Smrg */ 96522944501Smrg if (!bo_gem->mem_virtual) { 96622944501Smrg struct drm_i915_gem_mmap mmap_arg; 96722944501Smrg 96822944501Smrg DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 96922944501Smrg 97022944501Smrg memset(&mmap_arg, 0, sizeof(mmap_arg)); 97122944501Smrg mmap_arg.handle = bo_gem->gem_handle; 97222944501Smrg mmap_arg.offset = 0; 97322944501Smrg mmap_arg.size = bo->size; 9746d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 9756d98c517Smrg DRM_IOCTL_I915_GEM_MMAP, 9766d98c517Smrg &mmap_arg); 97722944501Smrg if (ret != 0) { 97822944501Smrg ret = -errno; 97922944501Smrg fprintf(stderr, 98022944501Smrg "%s:%d: Error mapping buffer %d (%s): %s .\n", 98122944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 98222944501Smrg bo_gem->name, strerror(errno)); 98322944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 98422944501Smrg return ret; 98522944501Smrg } 98622944501Smrg bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 98722944501Smrg } 98822944501Smrg DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 98922944501Smrg bo_gem->mem_virtual); 99022944501Smrg bo->virtual = bo_gem->mem_virtual; 99122944501Smrg 99222944501Smrg set_domain.handle = bo_gem->gem_handle; 99322944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_CPU; 99422944501Smrg if (write_enable) 99522944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_CPU; 99622944501Smrg else 99722944501Smrg set_domain.write_domain = 0; 9986d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 9996d98c517Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 10006d98c517Smrg &set_domain); 100122944501Smrg if (ret != 0) { 100222944501Smrg fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n", 100322944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 100422944501Smrg strerror(errno)); 100522944501Smrg } 100622944501Smrg 100722944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 100822944501Smrg 100922944501Smrg return 0; 101022944501Smrg} 101122944501Smrg 101222944501Smrgint drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 101322944501Smrg{ 101422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 101522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 101622944501Smrg struct drm_i915_gem_set_domain set_domain; 101722944501Smrg int ret; 101822944501Smrg 101922944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 102022944501Smrg 102122944501Smrg /* Get a mapping of the buffer if we haven't before. */ 102222944501Smrg if (bo_gem->gtt_virtual == NULL) { 102322944501Smrg struct drm_i915_gem_mmap_gtt mmap_arg; 102422944501Smrg 102522944501Smrg DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, 102622944501Smrg bo_gem->name); 102722944501Smrg 102822944501Smrg memset(&mmap_arg, 0, sizeof(mmap_arg)); 102922944501Smrg mmap_arg.handle = bo_gem->gem_handle; 103022944501Smrg 103122944501Smrg /* Get the fake offset back... */ 10326d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 10336d98c517Smrg DRM_IOCTL_I915_GEM_MMAP_GTT, 10346d98c517Smrg &mmap_arg); 103522944501Smrg if (ret != 0) { 103622944501Smrg ret = -errno; 103722944501Smrg fprintf(stderr, 103822944501Smrg "%s:%d: Error preparing buffer map %d (%s): %s .\n", 103922944501Smrg __FILE__, __LINE__, 104022944501Smrg bo_gem->gem_handle, bo_gem->name, 104122944501Smrg strerror(errno)); 104222944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 104322944501Smrg return ret; 104422944501Smrg } 104522944501Smrg 104622944501Smrg /* and mmap it */ 104722944501Smrg bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 104822944501Smrg MAP_SHARED, bufmgr_gem->fd, 104922944501Smrg mmap_arg.offset); 105022944501Smrg if (bo_gem->gtt_virtual == MAP_FAILED) { 105122944501Smrg bo_gem->gtt_virtual = NULL; 105222944501Smrg ret = -errno; 105322944501Smrg fprintf(stderr, 105422944501Smrg "%s:%d: Error mapping buffer %d (%s): %s .\n", 105522944501Smrg __FILE__, __LINE__, 105622944501Smrg bo_gem->gem_handle, bo_gem->name, 105722944501Smrg strerror(errno)); 105822944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 105922944501Smrg return ret; 106022944501Smrg } 106122944501Smrg } 106222944501Smrg 106322944501Smrg bo->virtual = bo_gem->gtt_virtual; 106422944501Smrg 106522944501Smrg DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 106622944501Smrg bo_gem->gtt_virtual); 106722944501Smrg 106822944501Smrg /* Now move it to the GTT domain so that the CPU caches are flushed */ 106922944501Smrg set_domain.handle = bo_gem->gem_handle; 107022944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 107122944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_GTT; 10726d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 10736d98c517Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 10746d98c517Smrg &set_domain); 107522944501Smrg if (ret != 0) { 107622944501Smrg fprintf(stderr, "%s:%d: Error setting domain %d: %s\n", 107722944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 107822944501Smrg strerror(errno)); 107922944501Smrg } 108022944501Smrg 108122944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 108222944501Smrg 10836d98c517Smrg return 0; 108422944501Smrg} 108522944501Smrg 108622944501Smrgint drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 108722944501Smrg{ 108822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 108922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 109022944501Smrg int ret = 0; 109122944501Smrg 109222944501Smrg if (bo == NULL) 109322944501Smrg return 0; 109422944501Smrg 109522944501Smrg assert(bo_gem->gtt_virtual != NULL); 109622944501Smrg 109722944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 109822944501Smrg bo->virtual = NULL; 109922944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 110022944501Smrg 110122944501Smrg return ret; 110222944501Smrg} 110322944501Smrg 110422944501Smrgstatic int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 110522944501Smrg{ 110622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 110722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 110822944501Smrg struct drm_i915_gem_sw_finish sw_finish; 110922944501Smrg int ret; 111022944501Smrg 111122944501Smrg if (bo == NULL) 111222944501Smrg return 0; 111322944501Smrg 111422944501Smrg assert(bo_gem->mem_virtual != NULL); 111522944501Smrg 111622944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 111722944501Smrg 111822944501Smrg /* Cause a flush to happen if the buffer's pinned for scanout, so the 111922944501Smrg * results show up in a timely manner. 112022944501Smrg */ 112122944501Smrg sw_finish.handle = bo_gem->gem_handle; 11226d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 11236d98c517Smrg DRM_IOCTL_I915_GEM_SW_FINISH, 11246d98c517Smrg &sw_finish); 112522944501Smrg ret = ret == -1 ? -errno : 0; 112622944501Smrg 112722944501Smrg bo->virtual = NULL; 112822944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 112922944501Smrg 113022944501Smrg return ret; 113122944501Smrg} 113222944501Smrg 113322944501Smrgstatic int 113422944501Smrgdrm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 113522944501Smrg unsigned long size, const void *data) 113622944501Smrg{ 113722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 113822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 113922944501Smrg struct drm_i915_gem_pwrite pwrite; 114022944501Smrg int ret; 114122944501Smrg 114222944501Smrg memset(&pwrite, 0, sizeof(pwrite)); 114322944501Smrg pwrite.handle = bo_gem->gem_handle; 114422944501Smrg pwrite.offset = offset; 114522944501Smrg pwrite.size = size; 114622944501Smrg pwrite.data_ptr = (uint64_t) (uintptr_t) data; 11476d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 11486d98c517Smrg DRM_IOCTL_I915_GEM_PWRITE, 11496d98c517Smrg &pwrite); 115022944501Smrg if (ret != 0) { 115122944501Smrg ret = -errno; 115222944501Smrg fprintf(stderr, 115322944501Smrg "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 115422944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 115522944501Smrg (int)size, strerror(errno)); 115622944501Smrg } 115722944501Smrg 115822944501Smrg return ret; 115922944501Smrg} 116022944501Smrg 116122944501Smrgstatic int 116222944501Smrgdrm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 116322944501Smrg{ 116422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 116522944501Smrg struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 116622944501Smrg int ret; 116722944501Smrg 116822944501Smrg get_pipe_from_crtc_id.crtc_id = crtc_id; 11696d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 11706d98c517Smrg DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 11716d98c517Smrg &get_pipe_from_crtc_id); 117222944501Smrg if (ret != 0) { 117322944501Smrg /* We return -1 here to signal that we don't 117422944501Smrg * know which pipe is associated with this crtc. 117522944501Smrg * This lets the caller know that this information 117622944501Smrg * isn't available; using the wrong pipe for 117722944501Smrg * vblank waiting can cause the chipset to lock up 117822944501Smrg */ 117922944501Smrg return -1; 118022944501Smrg } 118122944501Smrg 118222944501Smrg return get_pipe_from_crtc_id.pipe; 118322944501Smrg} 118422944501Smrg 118522944501Smrgstatic int 118622944501Smrgdrm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 118722944501Smrg unsigned long size, void *data) 118822944501Smrg{ 118922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 119022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 119122944501Smrg struct drm_i915_gem_pread pread; 119222944501Smrg int ret; 119322944501Smrg 119422944501Smrg memset(&pread, 0, sizeof(pread)); 119522944501Smrg pread.handle = bo_gem->gem_handle; 119622944501Smrg pread.offset = offset; 119722944501Smrg pread.size = size; 119822944501Smrg pread.data_ptr = (uint64_t) (uintptr_t) data; 11996d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 12006d98c517Smrg DRM_IOCTL_I915_GEM_PREAD, 12016d98c517Smrg &pread); 120222944501Smrg if (ret != 0) { 120322944501Smrg ret = -errno; 120422944501Smrg fprintf(stderr, 120522944501Smrg "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 120622944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 120722944501Smrg (int)size, strerror(errno)); 120822944501Smrg } 120922944501Smrg 121022944501Smrg return ret; 121122944501Smrg} 121222944501Smrg 121322944501Smrg/** Waits for all GPU rendering to the object to have completed. */ 121422944501Smrgstatic void 121522944501Smrgdrm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 121622944501Smrg{ 121722944501Smrg drm_intel_gem_bo_start_gtt_access(bo, 0); 121822944501Smrg} 121922944501Smrg 122022944501Smrg/** 122122944501Smrg * Sets the object to the GTT read and possibly write domain, used by the X 122222944501Smrg * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 122322944501Smrg * 122422944501Smrg * In combination with drm_intel_gem_bo_pin() and manual fence management, we 122522944501Smrg * can do tiled pixmaps this way. 122622944501Smrg */ 122722944501Smrgvoid 122822944501Smrgdrm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 122922944501Smrg{ 123022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 123122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 123222944501Smrg struct drm_i915_gem_set_domain set_domain; 123322944501Smrg int ret; 123422944501Smrg 123522944501Smrg set_domain.handle = bo_gem->gem_handle; 123622944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 123722944501Smrg set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 12386d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 12396d98c517Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 12406d98c517Smrg &set_domain); 124122944501Smrg if (ret != 0) { 124222944501Smrg fprintf(stderr, 124322944501Smrg "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 124422944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 124522944501Smrg set_domain.read_domains, set_domain.write_domain, 124622944501Smrg strerror(errno)); 124722944501Smrg } 124822944501Smrg} 124922944501Smrg 125022944501Smrgstatic void 125122944501Smrgdrm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 125222944501Smrg{ 125322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 125422944501Smrg int i; 125522944501Smrg 125622944501Smrg free(bufmgr_gem->exec2_objects); 125722944501Smrg free(bufmgr_gem->exec_objects); 125822944501Smrg free(bufmgr_gem->exec_bos); 125922944501Smrg 126022944501Smrg pthread_mutex_destroy(&bufmgr_gem->lock); 126122944501Smrg 126222944501Smrg /* Free any cached buffer objects we were going to reuse */ 1263aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 126422944501Smrg struct drm_intel_gem_bo_bucket *bucket = 126522944501Smrg &bufmgr_gem->cache_bucket[i]; 126622944501Smrg drm_intel_bo_gem *bo_gem; 126722944501Smrg 126822944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 126922944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 127022944501Smrg bucket->head.next, head); 127122944501Smrg DRMLISTDEL(&bo_gem->head); 127222944501Smrg 127322944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 127422944501Smrg } 127522944501Smrg } 127622944501Smrg 127722944501Smrg free(bufmgr); 127822944501Smrg} 127922944501Smrg 128022944501Smrg/** 128122944501Smrg * Adds the target buffer to the validation list and adds the relocation 128222944501Smrg * to the reloc_buffer's relocation list. 128322944501Smrg * 128422944501Smrg * The relocation entry at the given offset must already contain the 128522944501Smrg * precomputed relocation value, because the kernel will optimize out 128622944501Smrg * the relocation entry write when the buffer hasn't moved from the 128722944501Smrg * last known offset in target_bo. 128822944501Smrg */ 128922944501Smrgstatic int 129022944501Smrgdo_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 129122944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 129222944501Smrg uint32_t read_domains, uint32_t write_domain, 129322944501Smrg int need_fence) 129422944501Smrg{ 129522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 129622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 129722944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 129822944501Smrg 129922944501Smrg if (bo_gem->has_error) 130022944501Smrg return -ENOMEM; 130122944501Smrg 130222944501Smrg if (target_bo_gem->has_error) { 130322944501Smrg bo_gem->has_error = 1; 130422944501Smrg return -ENOMEM; 130522944501Smrg } 130622944501Smrg 130722944501Smrg if (target_bo_gem->tiling_mode == I915_TILING_NONE) 130822944501Smrg need_fence = 0; 130922944501Smrg 131022944501Smrg /* We never use HW fences for rendering on 965+ */ 131122944501Smrg if (bufmgr_gem->gen >= 4) 131222944501Smrg need_fence = 0; 131322944501Smrg 131422944501Smrg /* Create a new relocation list if needed */ 131522944501Smrg if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 131622944501Smrg return -ENOMEM; 131722944501Smrg 131822944501Smrg /* Check overflow */ 131922944501Smrg assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 132022944501Smrg 132122944501Smrg /* Check args */ 132222944501Smrg assert(offset <= bo->size - 4); 132322944501Smrg assert((write_domain & (write_domain - 1)) == 0); 132422944501Smrg 132522944501Smrg /* Make sure that we're not adding a reloc to something whose size has 132622944501Smrg * already been accounted for. 132722944501Smrg */ 132822944501Smrg assert(!bo_gem->used_as_reloc_target); 1329aaba2545Smrg if (target_bo_gem != bo_gem) { 1330aaba2545Smrg target_bo_gem->used_as_reloc_target = 1; 1331aaba2545Smrg bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1332aaba2545Smrg } 133322944501Smrg /* An object needing a fence is a tiled buffer, so it won't have 133422944501Smrg * relocs to other buffers. 133522944501Smrg */ 133622944501Smrg if (need_fence) 133722944501Smrg target_bo_gem->reloc_tree_fences = 1; 133822944501Smrg bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 133922944501Smrg 134022944501Smrg /* Flag the target to disallow further relocations in it. */ 134122944501Smrg 134222944501Smrg bo_gem->relocs[bo_gem->reloc_count].offset = offset; 134322944501Smrg bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 134422944501Smrg bo_gem->relocs[bo_gem->reloc_count].target_handle = 134522944501Smrg target_bo_gem->gem_handle; 134622944501Smrg bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 134722944501Smrg bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 134822944501Smrg bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 134922944501Smrg 135022944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1351aaba2545Smrg if (target_bo != bo) 1352aaba2545Smrg drm_intel_gem_bo_reference(target_bo); 135322944501Smrg if (need_fence) 135422944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 135522944501Smrg DRM_INTEL_RELOC_FENCE; 135622944501Smrg else 135722944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 135822944501Smrg 135922944501Smrg bo_gem->reloc_count++; 136022944501Smrg 136122944501Smrg return 0; 136222944501Smrg} 136322944501Smrg 136422944501Smrgstatic int 136522944501Smrgdrm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 136622944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 136722944501Smrg uint32_t read_domains, uint32_t write_domain) 136822944501Smrg{ 136922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 137022944501Smrg 137122944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 137222944501Smrg read_domains, write_domain, 137322944501Smrg !bufmgr_gem->fenced_relocs); 137422944501Smrg} 137522944501Smrg 137622944501Smrgstatic int 137722944501Smrgdrm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 137822944501Smrg drm_intel_bo *target_bo, 137922944501Smrg uint32_t target_offset, 138022944501Smrg uint32_t read_domains, uint32_t write_domain) 138122944501Smrg{ 138222944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 138322944501Smrg read_domains, write_domain, 1); 138422944501Smrg} 138522944501Smrg 138622944501Smrg/** 138722944501Smrg * Walk the tree of relocations rooted at BO and accumulate the list of 138822944501Smrg * validations to be performed and update the relocation buffers with 138922944501Smrg * index values into the validation list. 139022944501Smrg */ 139122944501Smrgstatic void 139222944501Smrgdrm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 139322944501Smrg{ 139422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 139522944501Smrg int i; 139622944501Smrg 139722944501Smrg if (bo_gem->relocs == NULL) 139822944501Smrg return; 139922944501Smrg 140022944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 140122944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 140222944501Smrg 1403aaba2545Smrg if (target_bo == bo) 1404aaba2545Smrg continue; 1405aaba2545Smrg 140622944501Smrg /* Continue walking the tree depth-first. */ 140722944501Smrg drm_intel_gem_bo_process_reloc(target_bo); 140822944501Smrg 140922944501Smrg /* Add the target to the validate list */ 141022944501Smrg drm_intel_add_validate_buffer(target_bo); 141122944501Smrg } 141222944501Smrg} 141322944501Smrg 141422944501Smrgstatic void 141522944501Smrgdrm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 141622944501Smrg{ 141722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 141822944501Smrg int i; 141922944501Smrg 142022944501Smrg if (bo_gem->relocs == NULL) 142122944501Smrg return; 142222944501Smrg 142322944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 142422944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 142522944501Smrg int need_fence; 142622944501Smrg 1427aaba2545Smrg if (target_bo == bo) 1428aaba2545Smrg continue; 1429aaba2545Smrg 143022944501Smrg /* Continue walking the tree depth-first. */ 143122944501Smrg drm_intel_gem_bo_process_reloc2(target_bo); 143222944501Smrg 143322944501Smrg need_fence = (bo_gem->reloc_target_info[i].flags & 143422944501Smrg DRM_INTEL_RELOC_FENCE); 143522944501Smrg 143622944501Smrg /* Add the target to the validate list */ 143722944501Smrg drm_intel_add_validate_buffer2(target_bo, need_fence); 143822944501Smrg } 143922944501Smrg} 144022944501Smrg 144122944501Smrg 144222944501Smrgstatic void 144322944501Smrgdrm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 144422944501Smrg{ 144522944501Smrg int i; 144622944501Smrg 144722944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 144822944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 144922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 145022944501Smrg 145122944501Smrg /* Update the buffer offset */ 145222944501Smrg if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 145322944501Smrg DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 145422944501Smrg bo_gem->gem_handle, bo_gem->name, bo->offset, 145522944501Smrg (unsigned long long)bufmgr_gem->exec_objects[i]. 145622944501Smrg offset); 145722944501Smrg bo->offset = bufmgr_gem->exec_objects[i].offset; 145822944501Smrg } 145922944501Smrg } 146022944501Smrg} 146122944501Smrg 146222944501Smrgstatic void 146322944501Smrgdrm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 146422944501Smrg{ 146522944501Smrg int i; 146622944501Smrg 146722944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 146822944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 146922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 147022944501Smrg 147122944501Smrg /* Update the buffer offset */ 147222944501Smrg if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { 147322944501Smrg DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 147422944501Smrg bo_gem->gem_handle, bo_gem->name, bo->offset, 147522944501Smrg (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 147622944501Smrg bo->offset = bufmgr_gem->exec2_objects[i].offset; 147722944501Smrg } 147822944501Smrg } 147922944501Smrg} 148022944501Smrg 148122944501Smrgstatic int 148222944501Smrgdrm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 148322944501Smrg drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 148422944501Smrg{ 148522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 148622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 148722944501Smrg struct drm_i915_gem_execbuffer execbuf; 148822944501Smrg int ret, i; 148922944501Smrg 149022944501Smrg if (bo_gem->has_error) 149122944501Smrg return -ENOMEM; 149222944501Smrg 149322944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 149422944501Smrg /* Update indices and set up the validate list. */ 149522944501Smrg drm_intel_gem_bo_process_reloc(bo); 149622944501Smrg 149722944501Smrg /* Add the batch buffer to the validation list. There are no 149822944501Smrg * relocations pointing to it. 149922944501Smrg */ 150022944501Smrg drm_intel_add_validate_buffer(bo); 150122944501Smrg 150222944501Smrg execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 150322944501Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 150422944501Smrg execbuf.batch_start_offset = 0; 150522944501Smrg execbuf.batch_len = used; 150622944501Smrg execbuf.cliprects_ptr = (uintptr_t) cliprects; 150722944501Smrg execbuf.num_cliprects = num_cliprects; 150822944501Smrg execbuf.DR1 = 0; 150922944501Smrg execbuf.DR4 = DR4; 151022944501Smrg 15116d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 15126d98c517Smrg DRM_IOCTL_I915_GEM_EXECBUFFER, 15136d98c517Smrg &execbuf); 151422944501Smrg if (ret != 0) { 151522944501Smrg ret = -errno; 151622944501Smrg if (errno == ENOSPC) { 151722944501Smrg fprintf(stderr, 151822944501Smrg "Execbuffer fails to pin. " 151922944501Smrg "Estimate: %u. Actual: %u. Available: %u\n", 152022944501Smrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 152122944501Smrg bufmgr_gem-> 152222944501Smrg exec_count), 152322944501Smrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 152422944501Smrg bufmgr_gem-> 152522944501Smrg exec_count), 152622944501Smrg (unsigned int)bufmgr_gem->gtt_size); 152722944501Smrg } 152822944501Smrg } 152922944501Smrg drm_intel_update_buffer_offsets(bufmgr_gem); 153022944501Smrg 153122944501Smrg if (bufmgr_gem->bufmgr.debug) 153222944501Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 153322944501Smrg 153422944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 153522944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 153622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 153722944501Smrg 153822944501Smrg /* Disconnect the buffer from the validate list */ 153922944501Smrg bo_gem->validate_index = -1; 154022944501Smrg bufmgr_gem->exec_bos[i] = NULL; 154122944501Smrg } 154222944501Smrg bufmgr_gem->exec_count = 0; 154322944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 154422944501Smrg 154522944501Smrg return ret; 154622944501Smrg} 154722944501Smrg 154822944501Smrgstatic int 1549aaba2545Smrgdrm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 1550aaba2545Smrg drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 1551aaba2545Smrg int ring_flag) 155222944501Smrg{ 155322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 155422944501Smrg struct drm_i915_gem_execbuffer2 execbuf; 155522944501Smrg int ret, i; 155622944501Smrg 1557aaba2545Smrg if ((ring_flag != I915_EXEC_RENDER) && (ring_flag != I915_EXEC_BSD)) 1558aaba2545Smrg return -EINVAL; 1559aaba2545Smrg 156022944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 156122944501Smrg /* Update indices and set up the validate list. */ 156222944501Smrg drm_intel_gem_bo_process_reloc2(bo); 156322944501Smrg 156422944501Smrg /* Add the batch buffer to the validation list. There are no relocations 156522944501Smrg * pointing to it. 156622944501Smrg */ 156722944501Smrg drm_intel_add_validate_buffer2(bo, 0); 156822944501Smrg 156922944501Smrg execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 157022944501Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 157122944501Smrg execbuf.batch_start_offset = 0; 157222944501Smrg execbuf.batch_len = used; 157322944501Smrg execbuf.cliprects_ptr = (uintptr_t)cliprects; 157422944501Smrg execbuf.num_cliprects = num_cliprects; 157522944501Smrg execbuf.DR1 = 0; 157622944501Smrg execbuf.DR4 = DR4; 1577aaba2545Smrg execbuf.flags = ring_flag; 157822944501Smrg execbuf.rsvd1 = 0; 157922944501Smrg execbuf.rsvd2 = 0; 158022944501Smrg 15816d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 15826d98c517Smrg DRM_IOCTL_I915_GEM_EXECBUFFER2, 15836d98c517Smrg &execbuf); 158422944501Smrg if (ret != 0) { 158522944501Smrg ret = -errno; 15866d98c517Smrg if (ret == -ENOSPC) { 158722944501Smrg fprintf(stderr, 158822944501Smrg "Execbuffer fails to pin. " 158922944501Smrg "Estimate: %u. Actual: %u. Available: %u\n", 159022944501Smrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 159122944501Smrg bufmgr_gem->exec_count), 159222944501Smrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 159322944501Smrg bufmgr_gem->exec_count), 159422944501Smrg (unsigned int) bufmgr_gem->gtt_size); 159522944501Smrg } 159622944501Smrg } 159722944501Smrg drm_intel_update_buffer_offsets2(bufmgr_gem); 159822944501Smrg 159922944501Smrg if (bufmgr_gem->bufmgr.debug) 160022944501Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 160122944501Smrg 160222944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 160322944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 160422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 160522944501Smrg 160622944501Smrg /* Disconnect the buffer from the validate list */ 160722944501Smrg bo_gem->validate_index = -1; 160822944501Smrg bufmgr_gem->exec_bos[i] = NULL; 160922944501Smrg } 161022944501Smrg bufmgr_gem->exec_count = 0; 161122944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 161222944501Smrg 161322944501Smrg return ret; 161422944501Smrg} 161522944501Smrg 1616aaba2545Smrgstatic int 1617aaba2545Smrgdrm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 1618aaba2545Smrg drm_clip_rect_t *cliprects, int num_cliprects, 1619aaba2545Smrg int DR4) 1620aaba2545Smrg{ 1621aaba2545Smrg return drm_intel_gem_bo_mrb_exec2(bo, used, 1622aaba2545Smrg cliprects, num_cliprects, DR4, 1623aaba2545Smrg I915_EXEC_RENDER); 1624aaba2545Smrg} 1625aaba2545Smrg 162622944501Smrgstatic int 162722944501Smrgdrm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 162822944501Smrg{ 162922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 163022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 163122944501Smrg struct drm_i915_gem_pin pin; 163222944501Smrg int ret; 163322944501Smrg 163422944501Smrg memset(&pin, 0, sizeof(pin)); 163522944501Smrg pin.handle = bo_gem->gem_handle; 163622944501Smrg pin.alignment = alignment; 163722944501Smrg 16386d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 16396d98c517Smrg DRM_IOCTL_I915_GEM_PIN, 16406d98c517Smrg &pin); 164122944501Smrg if (ret != 0) 164222944501Smrg return -errno; 164322944501Smrg 164422944501Smrg bo->offset = pin.offset; 164522944501Smrg return 0; 164622944501Smrg} 164722944501Smrg 164822944501Smrgstatic int 164922944501Smrgdrm_intel_gem_bo_unpin(drm_intel_bo *bo) 165022944501Smrg{ 165122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 165222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 165322944501Smrg struct drm_i915_gem_unpin unpin; 165422944501Smrg int ret; 165522944501Smrg 165622944501Smrg memset(&unpin, 0, sizeof(unpin)); 165722944501Smrg unpin.handle = bo_gem->gem_handle; 165822944501Smrg 16596d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 166022944501Smrg if (ret != 0) 166122944501Smrg return -errno; 166222944501Smrg 166322944501Smrg return 0; 166422944501Smrg} 166522944501Smrg 166622944501Smrgstatic int 16676d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 16686d98c517Smrg uint32_t tiling_mode, 16696d98c517Smrg uint32_t stride) 167022944501Smrg{ 167122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 167222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 167322944501Smrg struct drm_i915_gem_set_tiling set_tiling; 167422944501Smrg int ret; 167522944501Smrg 16766d98c517Smrg if (bo_gem->global_name == 0 && 16776d98c517Smrg tiling_mode == bo_gem->tiling_mode && 16786d98c517Smrg stride == bo_gem->stride) 167922944501Smrg return 0; 168022944501Smrg 168122944501Smrg memset(&set_tiling, 0, sizeof(set_tiling)); 168222944501Smrg do { 16836d98c517Smrg /* set_tiling is slightly broken and overwrites the 16846d98c517Smrg * input on the error path, so we have to open code 16856d98c517Smrg * rmIoctl. 16866d98c517Smrg */ 16876d98c517Smrg set_tiling.handle = bo_gem->gem_handle; 16886d98c517Smrg set_tiling.tiling_mode = tiling_mode; 168922944501Smrg set_tiling.stride = stride; 169022944501Smrg 169122944501Smrg ret = ioctl(bufmgr_gem->fd, 169222944501Smrg DRM_IOCTL_I915_GEM_SET_TILING, 169322944501Smrg &set_tiling); 16946d98c517Smrg } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 16956d98c517Smrg if (ret == -1) 16966d98c517Smrg return -errno; 16976d98c517Smrg 16986d98c517Smrg bo_gem->tiling_mode = set_tiling.tiling_mode; 16996d98c517Smrg bo_gem->swizzle_mode = set_tiling.swizzle_mode; 17006d98c517Smrg bo_gem->stride = set_tiling.stride; 17016d98c517Smrg return 0; 17026d98c517Smrg} 17036d98c517Smrg 17046d98c517Smrgstatic int 17056d98c517Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 17066d98c517Smrg uint32_t stride) 17076d98c517Smrg{ 17086d98c517Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 17096d98c517Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 17106d98c517Smrg int ret; 17116d98c517Smrg 17126d98c517Smrg /* Linear buffers have no stride. By ensuring that we only ever use 17136d98c517Smrg * stride 0 with linear buffers, we simplify our code. 17146d98c517Smrg */ 17156d98c517Smrg if (*tiling_mode == I915_TILING_NONE) 17166d98c517Smrg stride = 0; 17176d98c517Smrg 17186d98c517Smrg ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 17196d98c517Smrg if (ret == 0) 1720aaba2545Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 172122944501Smrg 172222944501Smrg *tiling_mode = bo_gem->tiling_mode; 1723aaba2545Smrg return ret; 172422944501Smrg} 172522944501Smrg 172622944501Smrgstatic int 172722944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 172822944501Smrg uint32_t * swizzle_mode) 172922944501Smrg{ 173022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 173122944501Smrg 173222944501Smrg *tiling_mode = bo_gem->tiling_mode; 173322944501Smrg *swizzle_mode = bo_gem->swizzle_mode; 173422944501Smrg return 0; 173522944501Smrg} 173622944501Smrg 173722944501Smrgstatic int 173822944501Smrgdrm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 173922944501Smrg{ 174022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 174122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 174222944501Smrg struct drm_gem_flink flink; 174322944501Smrg int ret; 174422944501Smrg 174522944501Smrg if (!bo_gem->global_name) { 174622944501Smrg memset(&flink, 0, sizeof(flink)); 174722944501Smrg flink.handle = bo_gem->gem_handle; 174822944501Smrg 17496d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 175022944501Smrg if (ret != 0) 175122944501Smrg return -errno; 175222944501Smrg bo_gem->global_name = flink.name; 175322944501Smrg bo_gem->reusable = 0; 175422944501Smrg } 175522944501Smrg 175622944501Smrg *name = bo_gem->global_name; 175722944501Smrg return 0; 175822944501Smrg} 175922944501Smrg 176022944501Smrg/** 176122944501Smrg * Enables unlimited caching of buffer objects for reuse. 176222944501Smrg * 176322944501Smrg * This is potentially very memory expensive, as the cache at each bucket 176422944501Smrg * size is only bounded by how many buffers of that size we've managed to have 176522944501Smrg * in flight at once. 176622944501Smrg */ 176722944501Smrgvoid 176822944501Smrgdrm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 176922944501Smrg{ 177022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 177122944501Smrg 177222944501Smrg bufmgr_gem->bo_reuse = 1; 177322944501Smrg} 177422944501Smrg 177522944501Smrg/** 177622944501Smrg * Enable use of fenced reloc type. 177722944501Smrg * 177822944501Smrg * New code should enable this to avoid unnecessary fence register 177922944501Smrg * allocation. If this option is not enabled, all relocs will have fence 178022944501Smrg * register allocated. 178122944501Smrg */ 178222944501Smrgvoid 178322944501Smrgdrm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 178422944501Smrg{ 178522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 178622944501Smrg 178722944501Smrg if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 178822944501Smrg bufmgr_gem->fenced_relocs = 1; 178922944501Smrg} 179022944501Smrg 179122944501Smrg/** 179222944501Smrg * Return the additional aperture space required by the tree of buffer objects 179322944501Smrg * rooted at bo. 179422944501Smrg */ 179522944501Smrgstatic int 179622944501Smrgdrm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 179722944501Smrg{ 179822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 179922944501Smrg int i; 180022944501Smrg int total = 0; 180122944501Smrg 180222944501Smrg if (bo == NULL || bo_gem->included_in_check_aperture) 180322944501Smrg return 0; 180422944501Smrg 180522944501Smrg total += bo->size; 180622944501Smrg bo_gem->included_in_check_aperture = 1; 180722944501Smrg 180822944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 180922944501Smrg total += 181022944501Smrg drm_intel_gem_bo_get_aperture_space(bo_gem-> 181122944501Smrg reloc_target_info[i].bo); 181222944501Smrg 181322944501Smrg return total; 181422944501Smrg} 181522944501Smrg 181622944501Smrg/** 181722944501Smrg * Count the number of buffers in this list that need a fence reg 181822944501Smrg * 181922944501Smrg * If the count is greater than the number of available regs, we'll have 182022944501Smrg * to ask the caller to resubmit a batch with fewer tiled buffers. 182122944501Smrg * 182222944501Smrg * This function over-counts if the same buffer is used multiple times. 182322944501Smrg */ 182422944501Smrgstatic unsigned int 182522944501Smrgdrm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 182622944501Smrg{ 182722944501Smrg int i; 182822944501Smrg unsigned int total = 0; 182922944501Smrg 183022944501Smrg for (i = 0; i < count; i++) { 183122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 183222944501Smrg 183322944501Smrg if (bo_gem == NULL) 183422944501Smrg continue; 183522944501Smrg 183622944501Smrg total += bo_gem->reloc_tree_fences; 183722944501Smrg } 183822944501Smrg return total; 183922944501Smrg} 184022944501Smrg 184122944501Smrg/** 184222944501Smrg * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 184322944501Smrg * for the next drm_intel_bufmgr_check_aperture_space() call. 184422944501Smrg */ 184522944501Smrgstatic void 184622944501Smrgdrm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 184722944501Smrg{ 184822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 184922944501Smrg int i; 185022944501Smrg 185122944501Smrg if (bo == NULL || !bo_gem->included_in_check_aperture) 185222944501Smrg return; 185322944501Smrg 185422944501Smrg bo_gem->included_in_check_aperture = 0; 185522944501Smrg 185622944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 185722944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 185822944501Smrg reloc_target_info[i].bo); 185922944501Smrg} 186022944501Smrg 186122944501Smrg/** 186222944501Smrg * Return a conservative estimate for the amount of aperture required 186322944501Smrg * for a collection of buffers. This may double-count some buffers. 186422944501Smrg */ 186522944501Smrgstatic unsigned int 186622944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 186722944501Smrg{ 186822944501Smrg int i; 186922944501Smrg unsigned int total = 0; 187022944501Smrg 187122944501Smrg for (i = 0; i < count; i++) { 187222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 187322944501Smrg if (bo_gem != NULL) 187422944501Smrg total += bo_gem->reloc_tree_size; 187522944501Smrg } 187622944501Smrg return total; 187722944501Smrg} 187822944501Smrg 187922944501Smrg/** 188022944501Smrg * Return the amount of aperture needed for a collection of buffers. 188122944501Smrg * This avoids double counting any buffers, at the cost of looking 188222944501Smrg * at every buffer in the set. 188322944501Smrg */ 188422944501Smrgstatic unsigned int 188522944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 188622944501Smrg{ 188722944501Smrg int i; 188822944501Smrg unsigned int total = 0; 188922944501Smrg 189022944501Smrg for (i = 0; i < count; i++) { 189122944501Smrg total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 189222944501Smrg /* For the first buffer object in the array, we get an 189322944501Smrg * accurate count back for its reloc_tree size (since nothing 189422944501Smrg * had been flagged as being counted yet). We can save that 189522944501Smrg * value out as a more conservative reloc_tree_size that 189622944501Smrg * avoids double-counting target buffers. Since the first 189722944501Smrg * buffer happens to usually be the batch buffer in our 189822944501Smrg * callers, this can pull us back from doing the tree 189922944501Smrg * walk on every new batch emit. 190022944501Smrg */ 190122944501Smrg if (i == 0) { 190222944501Smrg drm_intel_bo_gem *bo_gem = 190322944501Smrg (drm_intel_bo_gem *) bo_array[i]; 190422944501Smrg bo_gem->reloc_tree_size = total; 190522944501Smrg } 190622944501Smrg } 190722944501Smrg 190822944501Smrg for (i = 0; i < count; i++) 190922944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 191022944501Smrg return total; 191122944501Smrg} 191222944501Smrg 191322944501Smrg/** 191422944501Smrg * Return -1 if the batchbuffer should be flushed before attempting to 191522944501Smrg * emit rendering referencing the buffers pointed to by bo_array. 191622944501Smrg * 191722944501Smrg * This is required because if we try to emit a batchbuffer with relocations 191822944501Smrg * to a tree of buffers that won't simultaneously fit in the aperture, 191922944501Smrg * the rendering will return an error at a point where the software is not 192022944501Smrg * prepared to recover from it. 192122944501Smrg * 192222944501Smrg * However, we also want to emit the batchbuffer significantly before we reach 192322944501Smrg * the limit, as a series of batchbuffers each of which references buffers 192422944501Smrg * covering almost all of the aperture means that at each emit we end up 192522944501Smrg * waiting to evict a buffer from the last rendering, and we get synchronous 192622944501Smrg * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 192722944501Smrg * get better parallelism. 192822944501Smrg */ 192922944501Smrgstatic int 193022944501Smrgdrm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 193122944501Smrg{ 193222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 193322944501Smrg (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 193422944501Smrg unsigned int total = 0; 193522944501Smrg unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 193622944501Smrg int total_fences; 193722944501Smrg 193822944501Smrg /* Check for fence reg constraints if necessary */ 193922944501Smrg if (bufmgr_gem->available_fences) { 194022944501Smrg total_fences = drm_intel_gem_total_fences(bo_array, count); 194122944501Smrg if (total_fences > bufmgr_gem->available_fences) 194222944501Smrg return -ENOSPC; 194322944501Smrg } 194422944501Smrg 194522944501Smrg total = drm_intel_gem_estimate_batch_space(bo_array, count); 194622944501Smrg 194722944501Smrg if (total > threshold) 194822944501Smrg total = drm_intel_gem_compute_batch_space(bo_array, count); 194922944501Smrg 195022944501Smrg if (total > threshold) { 195122944501Smrg DBG("check_space: overflowed available aperture, " 195222944501Smrg "%dkb vs %dkb\n", 195322944501Smrg total / 1024, (int)bufmgr_gem->gtt_size / 1024); 195422944501Smrg return -ENOSPC; 195522944501Smrg } else { 195622944501Smrg DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 195722944501Smrg (int)bufmgr_gem->gtt_size / 1024); 195822944501Smrg return 0; 195922944501Smrg } 196022944501Smrg} 196122944501Smrg 196222944501Smrg/* 196322944501Smrg * Disable buffer reuse for objects which are shared with the kernel 196422944501Smrg * as scanout buffers 196522944501Smrg */ 196622944501Smrgstatic int 196722944501Smrgdrm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 196822944501Smrg{ 196922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 197022944501Smrg 197122944501Smrg bo_gem->reusable = 0; 197222944501Smrg return 0; 197322944501Smrg} 197422944501Smrg 1975aaba2545Smrgstatic int 1976aaba2545Smrgdrm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 1977aaba2545Smrg{ 1978aaba2545Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1979aaba2545Smrg 1980aaba2545Smrg return bo_gem->reusable; 1981aaba2545Smrg} 1982aaba2545Smrg 198322944501Smrgstatic int 198422944501Smrg_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 198522944501Smrg{ 198622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 198722944501Smrg int i; 198822944501Smrg 198922944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 199022944501Smrg if (bo_gem->reloc_target_info[i].bo == target_bo) 199122944501Smrg return 1; 1992aaba2545Smrg if (bo == bo_gem->reloc_target_info[i].bo) 1993aaba2545Smrg continue; 199422944501Smrg if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 199522944501Smrg target_bo)) 199622944501Smrg return 1; 199722944501Smrg } 199822944501Smrg 199922944501Smrg return 0; 200022944501Smrg} 200122944501Smrg 200222944501Smrg/** Return true if target_bo is referenced by bo's relocation tree. */ 200322944501Smrgstatic int 200422944501Smrgdrm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 200522944501Smrg{ 200622944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 200722944501Smrg 200822944501Smrg if (bo == NULL || target_bo == NULL) 200922944501Smrg return 0; 201022944501Smrg if (target_bo_gem->used_as_reloc_target) 201122944501Smrg return _drm_intel_gem_bo_references(bo, target_bo); 201222944501Smrg return 0; 201322944501Smrg} 201422944501Smrg 2015aaba2545Smrgstatic void 2016aaba2545Smrgadd_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 2017aaba2545Smrg{ 2018aaba2545Smrg unsigned int i = bufmgr_gem->num_buckets; 2019aaba2545Smrg 2020aaba2545Smrg assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2021aaba2545Smrg 2022aaba2545Smrg DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2023aaba2545Smrg bufmgr_gem->cache_bucket[i].size = size; 2024aaba2545Smrg bufmgr_gem->num_buckets++; 2025aaba2545Smrg} 2026aaba2545Smrg 2027aaba2545Smrgstatic void 2028aaba2545Smrginit_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2029aaba2545Smrg{ 2030aaba2545Smrg unsigned long size, cache_max_size = 64 * 1024 * 1024; 2031aaba2545Smrg 2032aaba2545Smrg /* OK, so power of two buckets was too wasteful of memory. 2033aaba2545Smrg * Give 3 other sizes between each power of two, to hopefully 2034aaba2545Smrg * cover things accurately enough. (The alternative is 2035aaba2545Smrg * probably to just go for exact matching of sizes, and assume 2036aaba2545Smrg * that for things like composited window resize the tiled 2037aaba2545Smrg * width/height alignment and rounding of sizes to pages will 2038aaba2545Smrg * get us useful cache hit rates anyway) 2039aaba2545Smrg */ 2040aaba2545Smrg add_bucket(bufmgr_gem, 4096); 2041aaba2545Smrg add_bucket(bufmgr_gem, 4096 * 2); 2042aaba2545Smrg add_bucket(bufmgr_gem, 4096 * 3); 2043aaba2545Smrg 2044aaba2545Smrg /* Initialize the linked lists for BO reuse cache. */ 2045aaba2545Smrg for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2046aaba2545Smrg add_bucket(bufmgr_gem, size); 2047aaba2545Smrg 2048aaba2545Smrg add_bucket(bufmgr_gem, size + size * 1 / 4); 2049aaba2545Smrg add_bucket(bufmgr_gem, size + size * 2 / 4); 2050aaba2545Smrg add_bucket(bufmgr_gem, size + size * 3 / 4); 2051aaba2545Smrg } 2052aaba2545Smrg} 2053aaba2545Smrg 205422944501Smrg/** 205522944501Smrg * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 205622944501Smrg * and manage map buffer objections. 205722944501Smrg * 205822944501Smrg * \param fd File descriptor of the opened DRM device. 205922944501Smrg */ 206022944501Smrgdrm_intel_bufmgr * 206122944501Smrgdrm_intel_bufmgr_gem_init(int fd, int batch_size) 206222944501Smrg{ 206322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem; 206422944501Smrg struct drm_i915_gem_get_aperture aperture; 206522944501Smrg drm_i915_getparam_t gp; 2066aaba2545Smrg int ret; 2067aaba2545Smrg int exec2 = 0, has_bsd = 0; 206822944501Smrg 206922944501Smrg bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 207022944501Smrg if (bufmgr_gem == NULL) 207122944501Smrg return NULL; 207222944501Smrg 207322944501Smrg bufmgr_gem->fd = fd; 207422944501Smrg 207522944501Smrg if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 207622944501Smrg free(bufmgr_gem); 207722944501Smrg return NULL; 207822944501Smrg } 207922944501Smrg 20806d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 20816d98c517Smrg DRM_IOCTL_I915_GEM_GET_APERTURE, 20826d98c517Smrg &aperture); 208322944501Smrg 208422944501Smrg if (ret == 0) 208522944501Smrg bufmgr_gem->gtt_size = aperture.aper_available_size; 208622944501Smrg else { 208722944501Smrg fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 208822944501Smrg strerror(errno)); 208922944501Smrg bufmgr_gem->gtt_size = 128 * 1024 * 1024; 209022944501Smrg fprintf(stderr, "Assuming %dkB available aperture size.\n" 209122944501Smrg "May lead to reduced performance or incorrect " 209222944501Smrg "rendering.\n", 209322944501Smrg (int)bufmgr_gem->gtt_size / 1024); 209422944501Smrg } 209522944501Smrg 209622944501Smrg gp.param = I915_PARAM_CHIPSET_ID; 209722944501Smrg gp.value = &bufmgr_gem->pci_device; 20986d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 209922944501Smrg if (ret) { 210022944501Smrg fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 210122944501Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 210222944501Smrg } 210322944501Smrg 210422944501Smrg if (IS_GEN2(bufmgr_gem)) 210522944501Smrg bufmgr_gem->gen = 2; 210622944501Smrg else if (IS_GEN3(bufmgr_gem)) 210722944501Smrg bufmgr_gem->gen = 3; 210822944501Smrg else if (IS_GEN4(bufmgr_gem)) 210922944501Smrg bufmgr_gem->gen = 4; 211022944501Smrg else 211122944501Smrg bufmgr_gem->gen = 6; 211222944501Smrg 211322944501Smrg gp.param = I915_PARAM_HAS_EXECBUF2; 21146d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 211522944501Smrg if (!ret) 211622944501Smrg exec2 = 1; 211722944501Smrg 2118aaba2545Smrg gp.param = I915_PARAM_HAS_BSD; 21196d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2120aaba2545Smrg if (!ret) 2121aaba2545Smrg has_bsd = 1; 2122aaba2545Smrg 212322944501Smrg if (bufmgr_gem->gen < 4) { 212422944501Smrg gp.param = I915_PARAM_NUM_FENCES_AVAIL; 212522944501Smrg gp.value = &bufmgr_gem->available_fences; 21266d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 212722944501Smrg if (ret) { 212822944501Smrg fprintf(stderr, "get fences failed: %d [%d]\n", ret, 212922944501Smrg errno); 213022944501Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, 213122944501Smrg *gp.value); 213222944501Smrg bufmgr_gem->available_fences = 0; 213322944501Smrg } else { 213422944501Smrg /* XXX The kernel reports the total number of fences, 213522944501Smrg * including any that may be pinned. 213622944501Smrg * 213722944501Smrg * We presume that there will be at least one pinned 213822944501Smrg * fence for the scanout buffer, but there may be more 213922944501Smrg * than one scanout and the user may be manually 214022944501Smrg * pinning buffers. Let's move to execbuffer2 and 214122944501Smrg * thereby forget the insanity of using fences... 214222944501Smrg */ 214322944501Smrg bufmgr_gem->available_fences -= 2; 214422944501Smrg if (bufmgr_gem->available_fences < 0) 214522944501Smrg bufmgr_gem->available_fences = 0; 214622944501Smrg } 214722944501Smrg } 214822944501Smrg 214922944501Smrg /* Let's go with one relocation per every 2 dwords (but round down a bit 215022944501Smrg * since a power of two will mean an extra page allocation for the reloc 215122944501Smrg * buffer). 215222944501Smrg * 215322944501Smrg * Every 4 was too few for the blender benchmark. 215422944501Smrg */ 215522944501Smrg bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 215622944501Smrg 215722944501Smrg bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 215822944501Smrg bufmgr_gem->bufmgr.bo_alloc_for_render = 215922944501Smrg drm_intel_gem_bo_alloc_for_render; 216022944501Smrg bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 216122944501Smrg bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 216222944501Smrg bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 216322944501Smrg bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 216422944501Smrg bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 216522944501Smrg bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 216622944501Smrg bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 216722944501Smrg bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 216822944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 216922944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 217022944501Smrg bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 217122944501Smrg bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 217222944501Smrg bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 217322944501Smrg bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 217422944501Smrg bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 217522944501Smrg /* Use the new one if available */ 2176aaba2545Smrg if (exec2) { 217722944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 2178aaba2545Smrg if (has_bsd) 2179aaba2545Smrg bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 2180aaba2545Smrg } else 218122944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 218222944501Smrg bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 218322944501Smrg bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 218422944501Smrg bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 218522944501Smrg bufmgr_gem->bufmgr.debug = 0; 218622944501Smrg bufmgr_gem->bufmgr.check_aperture_space = 218722944501Smrg drm_intel_gem_check_aperture_space; 218822944501Smrg bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 2189aaba2545Smrg bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 219022944501Smrg bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 219122944501Smrg drm_intel_gem_get_pipe_from_crtc_id; 219222944501Smrg bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 219322944501Smrg 2194aaba2545Smrg init_cache_buckets(bufmgr_gem); 219522944501Smrg 219622944501Smrg return &bufmgr_gem->bufmgr; 219722944501Smrg} 2198