intel_bufmgr_gem.c revision 2e6867f6
122944501Smrg/************************************************************************** 222944501Smrg * 322944501Smrg * Copyright � 2007 Red Hat Inc. 422944501Smrg * Copyright � 2007 Intel Corporation 522944501Smrg * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 622944501Smrg * All Rights Reserved. 722944501Smrg * 822944501Smrg * Permission is hereby granted, free of charge, to any person obtaining a 922944501Smrg * copy of this software and associated documentation files (the 1022944501Smrg * "Software"), to deal in the Software without restriction, including 1122944501Smrg * without limitation the rights to use, copy, modify, merge, publish, 1222944501Smrg * distribute, sub license, and/or sell copies of the Software, and to 1322944501Smrg * permit persons to whom the Software is furnished to do so, subject to 1422944501Smrg * the following conditions: 1522944501Smrg * 1622944501Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1722944501Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1822944501Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 1922944501Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 2022944501Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 2122944501Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2222944501Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2322944501Smrg * 2422944501Smrg * The above copyright notice and this permission notice (including the 2522944501Smrg * next paragraph) shall be included in all copies or substantial portions 2622944501Smrg * of the Software. 2722944501Smrg * 2822944501Smrg * 2922944501Smrg **************************************************************************/ 3022944501Smrg/* 3122944501Smrg * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 3222944501Smrg * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 3322944501Smrg * Eric Anholt <eric@anholt.net> 3422944501Smrg * Dave Airlie <airlied@linux.ie> 3522944501Smrg */ 3622944501Smrg 3722944501Smrg#ifdef HAVE_CONFIG_H 3822944501Smrg#include "config.h" 3922944501Smrg#endif 4022944501Smrg 4122944501Smrg#include <xf86drm.h> 4222944501Smrg#include <xf86atomic.h> 4322944501Smrg#include <fcntl.h> 4422944501Smrg#include <stdio.h> 4522944501Smrg#include <stdlib.h> 4622944501Smrg#include <string.h> 4722944501Smrg#include <unistd.h> 4822944501Smrg#include <assert.h> 4922944501Smrg#include <pthread.h> 502e6867f6Smrg#include <stddef.h> 5122944501Smrg#include <sys/ioctl.h> 5222944501Smrg#include <sys/mman.h> 5322944501Smrg#include <sys/stat.h> 5422944501Smrg#include <sys/types.h> 5522944501Smrg 5622944501Smrg#include "errno.h" 5722944501Smrg#include "libdrm_lists.h" 5822944501Smrg#include "intel_bufmgr.h" 5922944501Smrg#include "intel_bufmgr_priv.h" 6022944501Smrg#include "intel_chipset.h" 6122944501Smrg#include "string.h" 6222944501Smrg 6322944501Smrg#include "i915_drm.h" 6422944501Smrg 6522944501Smrg#define DBG(...) do { \ 6622944501Smrg if (bufmgr_gem->bufmgr.debug) \ 6722944501Smrg fprintf(stderr, __VA_ARGS__); \ 6822944501Smrg} while (0) 6922944501Smrg 7022944501Smrgtypedef struct _drm_intel_bo_gem drm_intel_bo_gem; 7122944501Smrg 7222944501Smrgstruct drm_intel_gem_bo_bucket { 7322944501Smrg drmMMListHead head; 7422944501Smrg unsigned long size; 7522944501Smrg}; 7622944501Smrg 7722944501Smrg/* Only cache objects up to 64MB. Bigger than that, and the rounding of the 7822944501Smrg * size makes many operations fail that wouldn't otherwise. 7922944501Smrg */ 8022944501Smrg#define DRM_INTEL_GEM_BO_BUCKETS 14 8122944501Smrgtypedef struct _drm_intel_bufmgr_gem { 8222944501Smrg drm_intel_bufmgr bufmgr; 8322944501Smrg 8422944501Smrg int fd; 8522944501Smrg 8622944501Smrg int max_relocs; 8722944501Smrg 8822944501Smrg pthread_mutex_t lock; 8922944501Smrg 9022944501Smrg struct drm_i915_gem_exec_object *exec_objects; 9122944501Smrg struct drm_i915_gem_exec_object2 *exec2_objects; 9222944501Smrg drm_intel_bo **exec_bos; 9322944501Smrg int exec_size; 9422944501Smrg int exec_count; 9522944501Smrg 9622944501Smrg /** Array of lists of cached gem objects of power-of-two sizes */ 9722944501Smrg struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS]; 9822944501Smrg 9922944501Smrg uint64_t gtt_size; 10022944501Smrg int available_fences; 10122944501Smrg int pci_device; 10222944501Smrg int gen; 10322944501Smrg char bo_reuse; 10422944501Smrg char fenced_relocs; 10522944501Smrg} drm_intel_bufmgr_gem; 10622944501Smrg 10722944501Smrg#define DRM_INTEL_RELOC_FENCE (1<<0) 10822944501Smrg 10922944501Smrgtypedef struct _drm_intel_reloc_target_info { 11022944501Smrg drm_intel_bo *bo; 11122944501Smrg int flags; 11222944501Smrg} drm_intel_reloc_target; 11322944501Smrg 11422944501Smrgstruct _drm_intel_bo_gem { 11522944501Smrg drm_intel_bo bo; 11622944501Smrg 11722944501Smrg atomic_t refcount; 11822944501Smrg uint32_t gem_handle; 11922944501Smrg const char *name; 12022944501Smrg 12122944501Smrg /** 12222944501Smrg * Kenel-assigned global name for this object 12322944501Smrg */ 12422944501Smrg unsigned int global_name; 12522944501Smrg 12622944501Smrg /** 12722944501Smrg * Index of the buffer within the validation list while preparing a 12822944501Smrg * batchbuffer execution. 12922944501Smrg */ 13022944501Smrg int validate_index; 13122944501Smrg 13222944501Smrg /** 13322944501Smrg * Current tiling mode 13422944501Smrg */ 13522944501Smrg uint32_t tiling_mode; 13622944501Smrg uint32_t swizzle_mode; 13722944501Smrg 13822944501Smrg time_t free_time; 13922944501Smrg 14022944501Smrg /** Array passed to the DRM containing relocation information. */ 14122944501Smrg struct drm_i915_gem_relocation_entry *relocs; 14222944501Smrg /** 14322944501Smrg * Array of info structs corresponding to relocs[i].target_handle etc 14422944501Smrg */ 14522944501Smrg drm_intel_reloc_target *reloc_target_info; 14622944501Smrg /** Number of entries in relocs */ 14722944501Smrg int reloc_count; 14822944501Smrg /** Mapped address for the buffer, saved across map/unmap cycles */ 14922944501Smrg void *mem_virtual; 15022944501Smrg /** GTT virtual address for the buffer, saved across map/unmap cycles */ 15122944501Smrg void *gtt_virtual; 15222944501Smrg 15322944501Smrg /** BO cache list */ 15422944501Smrg drmMMListHead head; 15522944501Smrg 15622944501Smrg /** 15722944501Smrg * Boolean of whether this BO and its children have been included in 15822944501Smrg * the current drm_intel_bufmgr_check_aperture_space() total. 15922944501Smrg */ 16022944501Smrg char included_in_check_aperture; 16122944501Smrg 16222944501Smrg /** 16322944501Smrg * Boolean of whether this buffer has been used as a relocation 16422944501Smrg * target and had its size accounted for, and thus can't have any 16522944501Smrg * further relocations added to it. 16622944501Smrg */ 16722944501Smrg char used_as_reloc_target; 16822944501Smrg 16922944501Smrg /** 17022944501Smrg * Boolean of whether we have encountered an error whilst building the relocation tree. 17122944501Smrg */ 17222944501Smrg char has_error; 17322944501Smrg 17422944501Smrg /** 17522944501Smrg * Boolean of whether this buffer can be re-used 17622944501Smrg */ 17722944501Smrg char reusable; 17822944501Smrg 17922944501Smrg /** 18022944501Smrg * Size in bytes of this buffer and its relocation descendents. 18122944501Smrg * 18222944501Smrg * Used to avoid costly tree walking in 18322944501Smrg * drm_intel_bufmgr_check_aperture in the common case. 18422944501Smrg */ 18522944501Smrg int reloc_tree_size; 18622944501Smrg 18722944501Smrg /** 18822944501Smrg * Number of potential fence registers required by this buffer and its 18922944501Smrg * relocations. 19022944501Smrg */ 19122944501Smrg int reloc_tree_fences; 19222944501Smrg}; 19322944501Smrg 19422944501Smrgstatic unsigned int 19522944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 19622944501Smrg 19722944501Smrgstatic unsigned int 19822944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 19922944501Smrg 20022944501Smrgstatic int 20122944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 20222944501Smrg uint32_t * swizzle_mode); 20322944501Smrg 20422944501Smrgstatic int 20522944501Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 20622944501Smrg uint32_t stride); 20722944501Smrg 20822944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 20922944501Smrg time_t time); 21022944501Smrg 21122944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 21222944501Smrg 21322944501Smrgstatic void drm_intel_gem_bo_free(drm_intel_bo *bo); 21422944501Smrg 21522944501Smrgstatic unsigned long 21622944501Smrgdrm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 21722944501Smrg uint32_t *tiling_mode) 21822944501Smrg{ 21922944501Smrg unsigned long min_size, max_size; 22022944501Smrg unsigned long i; 22122944501Smrg 22222944501Smrg if (*tiling_mode == I915_TILING_NONE) 22322944501Smrg return size; 22422944501Smrg 22522944501Smrg /* 965+ just need multiples of page size for tiling */ 22622944501Smrg if (bufmgr_gem->gen >= 4) 22722944501Smrg return ROUND_UP_TO(size, 4096); 22822944501Smrg 22922944501Smrg /* Older chips need powers of two, of at least 512k or 1M */ 23022944501Smrg if (bufmgr_gem->gen == 3) { 23122944501Smrg min_size = 1024*1024; 23222944501Smrg max_size = 128*1024*1024; 23322944501Smrg } else { 23422944501Smrg min_size = 512*1024; 23522944501Smrg max_size = 64*1024*1024; 23622944501Smrg } 23722944501Smrg 23822944501Smrg if (size > max_size) { 23922944501Smrg *tiling_mode = I915_TILING_NONE; 24022944501Smrg return size; 24122944501Smrg } 24222944501Smrg 24322944501Smrg for (i = min_size; i < size; i <<= 1) 24422944501Smrg ; 24522944501Smrg 24622944501Smrg return i; 24722944501Smrg} 24822944501Smrg 24922944501Smrg/* 25022944501Smrg * Round a given pitch up to the minimum required for X tiling on a 25122944501Smrg * given chip. We use 512 as the minimum to allow for a later tiling 25222944501Smrg * change. 25322944501Smrg */ 25422944501Smrgstatic unsigned long 25522944501Smrgdrm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 25622944501Smrg unsigned long pitch, uint32_t tiling_mode) 25722944501Smrg{ 25822944501Smrg unsigned long tile_width; 25922944501Smrg unsigned long i; 26022944501Smrg 26122944501Smrg /* If untiled, then just align it so that we can do rendering 26222944501Smrg * to it with the 3D engine. 26322944501Smrg */ 26422944501Smrg if (tiling_mode == I915_TILING_NONE) 26522944501Smrg return ALIGN(pitch, 64); 26622944501Smrg 26722944501Smrg if (tiling_mode == I915_TILING_X) 26822944501Smrg tile_width = 512; 26922944501Smrg else 27022944501Smrg tile_width = 128; 27122944501Smrg 27222944501Smrg /* 965 is flexible */ 27322944501Smrg if (bufmgr_gem->gen >= 4) 27422944501Smrg return ROUND_UP_TO(pitch, tile_width); 27522944501Smrg 27622944501Smrg /* Pre-965 needs power of two tile width */ 27722944501Smrg for (i = tile_width; i < pitch; i <<= 1) 27822944501Smrg ; 27922944501Smrg 28022944501Smrg return i; 28122944501Smrg} 28222944501Smrg 28322944501Smrgstatic struct drm_intel_gem_bo_bucket * 28422944501Smrgdrm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 28522944501Smrg unsigned long size) 28622944501Smrg{ 28722944501Smrg int i; 28822944501Smrg 28922944501Smrg for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 29022944501Smrg struct drm_intel_gem_bo_bucket *bucket = 29122944501Smrg &bufmgr_gem->cache_bucket[i]; 29222944501Smrg if (bucket->size >= size) { 29322944501Smrg return bucket; 29422944501Smrg } 29522944501Smrg } 29622944501Smrg 29722944501Smrg return NULL; 29822944501Smrg} 29922944501Smrg 30022944501Smrgstatic void 30122944501Smrgdrm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 30222944501Smrg{ 30322944501Smrg int i, j; 30422944501Smrg 30522944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 30622944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 30722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 30822944501Smrg 30922944501Smrg if (bo_gem->relocs == NULL) { 31022944501Smrg DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 31122944501Smrg bo_gem->name); 31222944501Smrg continue; 31322944501Smrg } 31422944501Smrg 31522944501Smrg for (j = 0; j < bo_gem->reloc_count; j++) { 31622944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 31722944501Smrg drm_intel_bo_gem *target_gem = 31822944501Smrg (drm_intel_bo_gem *) target_bo; 31922944501Smrg 32022944501Smrg DBG("%2d: %d (%s)@0x%08llx -> " 32122944501Smrg "%d (%s)@0x%08lx + 0x%08x\n", 32222944501Smrg i, 32322944501Smrg bo_gem->gem_handle, bo_gem->name, 32422944501Smrg (unsigned long long)bo_gem->relocs[j].offset, 32522944501Smrg target_gem->gem_handle, 32622944501Smrg target_gem->name, 32722944501Smrg target_bo->offset, 32822944501Smrg bo_gem->relocs[j].delta); 32922944501Smrg } 33022944501Smrg } 33122944501Smrg} 33222944501Smrg 33322944501Smrgstatic inline void 33422944501Smrgdrm_intel_gem_bo_reference(drm_intel_bo *bo) 33522944501Smrg{ 33622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 33722944501Smrg 33822944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 33922944501Smrg atomic_inc(&bo_gem->refcount); 34022944501Smrg} 34122944501Smrg 34222944501Smrg/** 34322944501Smrg * Adds the given buffer to the list of buffers to be validated (moved into the 34422944501Smrg * appropriate memory type) with the next batch submission. 34522944501Smrg * 34622944501Smrg * If a buffer is validated multiple times in a batch submission, it ends up 34722944501Smrg * with the intersection of the memory type flags and the union of the 34822944501Smrg * access flags. 34922944501Smrg */ 35022944501Smrgstatic void 35122944501Smrgdrm_intel_add_validate_buffer(drm_intel_bo *bo) 35222944501Smrg{ 35322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 35422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 35522944501Smrg int index; 35622944501Smrg 35722944501Smrg if (bo_gem->validate_index != -1) 35822944501Smrg return; 35922944501Smrg 36022944501Smrg /* Extend the array of validation entries as necessary. */ 36122944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 36222944501Smrg int new_size = bufmgr_gem->exec_size * 2; 36322944501Smrg 36422944501Smrg if (new_size == 0) 36522944501Smrg new_size = 5; 36622944501Smrg 36722944501Smrg bufmgr_gem->exec_objects = 36822944501Smrg realloc(bufmgr_gem->exec_objects, 36922944501Smrg sizeof(*bufmgr_gem->exec_objects) * new_size); 37022944501Smrg bufmgr_gem->exec_bos = 37122944501Smrg realloc(bufmgr_gem->exec_bos, 37222944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 37322944501Smrg bufmgr_gem->exec_size = new_size; 37422944501Smrg } 37522944501Smrg 37622944501Smrg index = bufmgr_gem->exec_count; 37722944501Smrg bo_gem->validate_index = index; 37822944501Smrg /* Fill in array entry */ 37922944501Smrg bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 38022944501Smrg bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 38122944501Smrg bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 38222944501Smrg bufmgr_gem->exec_objects[index].alignment = 0; 38322944501Smrg bufmgr_gem->exec_objects[index].offset = 0; 38422944501Smrg bufmgr_gem->exec_bos[index] = bo; 38522944501Smrg bufmgr_gem->exec_count++; 38622944501Smrg} 38722944501Smrg 38822944501Smrgstatic void 38922944501Smrgdrm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 39022944501Smrg{ 39122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 39222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 39322944501Smrg int index; 39422944501Smrg 39522944501Smrg if (bo_gem->validate_index != -1) { 39622944501Smrg if (need_fence) 39722944501Smrg bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 39822944501Smrg EXEC_OBJECT_NEEDS_FENCE; 39922944501Smrg return; 40022944501Smrg } 40122944501Smrg 40222944501Smrg /* Extend the array of validation entries as necessary. */ 40322944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 40422944501Smrg int new_size = bufmgr_gem->exec_size * 2; 40522944501Smrg 40622944501Smrg if (new_size == 0) 40722944501Smrg new_size = 5; 40822944501Smrg 40922944501Smrg bufmgr_gem->exec2_objects = 41022944501Smrg realloc(bufmgr_gem->exec2_objects, 41122944501Smrg sizeof(*bufmgr_gem->exec2_objects) * new_size); 41222944501Smrg bufmgr_gem->exec_bos = 41322944501Smrg realloc(bufmgr_gem->exec_bos, 41422944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 41522944501Smrg bufmgr_gem->exec_size = new_size; 41622944501Smrg } 41722944501Smrg 41822944501Smrg index = bufmgr_gem->exec_count; 41922944501Smrg bo_gem->validate_index = index; 42022944501Smrg /* Fill in array entry */ 42122944501Smrg bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 42222944501Smrg bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 42322944501Smrg bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 42422944501Smrg bufmgr_gem->exec2_objects[index].alignment = 0; 42522944501Smrg bufmgr_gem->exec2_objects[index].offset = 0; 42622944501Smrg bufmgr_gem->exec_bos[index] = bo; 42722944501Smrg bufmgr_gem->exec2_objects[index].flags = 0; 42822944501Smrg bufmgr_gem->exec2_objects[index].rsvd1 = 0; 42922944501Smrg bufmgr_gem->exec2_objects[index].rsvd2 = 0; 43022944501Smrg if (need_fence) { 43122944501Smrg bufmgr_gem->exec2_objects[index].flags |= 43222944501Smrg EXEC_OBJECT_NEEDS_FENCE; 43322944501Smrg } 43422944501Smrg bufmgr_gem->exec_count++; 43522944501Smrg} 43622944501Smrg 43722944501Smrg#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 43822944501Smrg sizeof(uint32_t)) 43922944501Smrg 44022944501Smrgstatic void 44122944501Smrgdrm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 44222944501Smrg drm_intel_bo_gem *bo_gem) 44322944501Smrg{ 44422944501Smrg int size; 44522944501Smrg 44622944501Smrg assert(!bo_gem->used_as_reloc_target); 44722944501Smrg 44822944501Smrg /* The older chipsets are far-less flexible in terms of tiling, 44922944501Smrg * and require tiled buffer to be size aligned in the aperture. 45022944501Smrg * This means that in the worst possible case we will need a hole 45122944501Smrg * twice as large as the object in order for it to fit into the 45222944501Smrg * aperture. Optimal packing is for wimps. 45322944501Smrg */ 45422944501Smrg size = bo_gem->bo.size; 45522944501Smrg if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) 45622944501Smrg size *= 2; 45722944501Smrg 45822944501Smrg bo_gem->reloc_tree_size = size; 45922944501Smrg} 46022944501Smrg 46122944501Smrgstatic int 46222944501Smrgdrm_intel_setup_reloc_list(drm_intel_bo *bo) 46322944501Smrg{ 46422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 46522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 46622944501Smrg unsigned int max_relocs = bufmgr_gem->max_relocs; 46722944501Smrg 46822944501Smrg if (bo->size / 4 < max_relocs) 46922944501Smrg max_relocs = bo->size / 4; 47022944501Smrg 47122944501Smrg bo_gem->relocs = malloc(max_relocs * 47222944501Smrg sizeof(struct drm_i915_gem_relocation_entry)); 47322944501Smrg bo_gem->reloc_target_info = malloc(max_relocs * 47422944501Smrg sizeof(drm_intel_reloc_target *)); 47522944501Smrg if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 47622944501Smrg bo_gem->has_error = 1; 47722944501Smrg 47822944501Smrg free (bo_gem->relocs); 47922944501Smrg bo_gem->relocs = NULL; 48022944501Smrg 48122944501Smrg free (bo_gem->reloc_target_info); 48222944501Smrg bo_gem->reloc_target_info = NULL; 48322944501Smrg 48422944501Smrg return 1; 48522944501Smrg } 48622944501Smrg 48722944501Smrg return 0; 48822944501Smrg} 48922944501Smrg 49022944501Smrgstatic int 49122944501Smrgdrm_intel_gem_bo_busy(drm_intel_bo *bo) 49222944501Smrg{ 49322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 49422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 49522944501Smrg struct drm_i915_gem_busy busy; 49622944501Smrg int ret; 49722944501Smrg 49822944501Smrg memset(&busy, 0, sizeof(busy)); 49922944501Smrg busy.handle = bo_gem->gem_handle; 50022944501Smrg 50122944501Smrg do { 50222944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 50322944501Smrg } while (ret == -1 && errno == EINTR); 50422944501Smrg 50522944501Smrg return (ret == 0 && busy.busy); 50622944501Smrg} 50722944501Smrg 50822944501Smrgstatic int 50922944501Smrgdrm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 51022944501Smrg drm_intel_bo_gem *bo_gem, int state) 51122944501Smrg{ 51222944501Smrg struct drm_i915_gem_madvise madv; 51322944501Smrg 51422944501Smrg madv.handle = bo_gem->gem_handle; 51522944501Smrg madv.madv = state; 51622944501Smrg madv.retained = 1; 51722944501Smrg ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 51822944501Smrg 51922944501Smrg return madv.retained; 52022944501Smrg} 52122944501Smrg 52222944501Smrgstatic int 52322944501Smrgdrm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 52422944501Smrg{ 52522944501Smrg return drm_intel_gem_bo_madvise_internal 52622944501Smrg ((drm_intel_bufmgr_gem *) bo->bufmgr, 52722944501Smrg (drm_intel_bo_gem *) bo, 52822944501Smrg madv); 52922944501Smrg} 53022944501Smrg 53122944501Smrg/* drop the oldest entries that have been purged by the kernel */ 53222944501Smrgstatic void 53322944501Smrgdrm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 53422944501Smrg struct drm_intel_gem_bo_bucket *bucket) 53522944501Smrg{ 53622944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 53722944501Smrg drm_intel_bo_gem *bo_gem; 53822944501Smrg 53922944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 54022944501Smrg bucket->head.next, head); 54122944501Smrg if (drm_intel_gem_bo_madvise_internal 54222944501Smrg (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 54322944501Smrg break; 54422944501Smrg 54522944501Smrg DRMLISTDEL(&bo_gem->head); 54622944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 54722944501Smrg } 54822944501Smrg} 54922944501Smrg 55022944501Smrgstatic drm_intel_bo * 55122944501Smrgdrm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 55222944501Smrg const char *name, 55322944501Smrg unsigned long size, 55422944501Smrg unsigned long flags) 55522944501Smrg{ 55622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 55722944501Smrg drm_intel_bo_gem *bo_gem; 55822944501Smrg unsigned int page_size = getpagesize(); 55922944501Smrg int ret; 56022944501Smrg struct drm_intel_gem_bo_bucket *bucket; 56122944501Smrg int alloc_from_cache; 56222944501Smrg unsigned long bo_size; 56322944501Smrg int for_render = 0; 56422944501Smrg 56522944501Smrg if (flags & BO_ALLOC_FOR_RENDER) 56622944501Smrg for_render = 1; 56722944501Smrg 56822944501Smrg /* Round the allocated size up to a power of two number of pages. */ 56922944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 57022944501Smrg 57122944501Smrg /* If we don't have caching at this size, don't actually round the 57222944501Smrg * allocation up. 57322944501Smrg */ 57422944501Smrg if (bucket == NULL) { 57522944501Smrg bo_size = size; 57622944501Smrg if (bo_size < page_size) 57722944501Smrg bo_size = page_size; 57822944501Smrg } else { 57922944501Smrg bo_size = bucket->size; 58022944501Smrg } 58122944501Smrg 58222944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 58322944501Smrg /* Get a buffer out of the cache if available */ 58422944501Smrgretry: 58522944501Smrg alloc_from_cache = 0; 58622944501Smrg if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 58722944501Smrg if (for_render) { 58822944501Smrg /* Allocate new render-target BOs from the tail (MRU) 58922944501Smrg * of the list, as it will likely be hot in the GPU 59022944501Smrg * cache and in the aperture for us. 59122944501Smrg */ 59222944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 59322944501Smrg bucket->head.prev, head); 59422944501Smrg DRMLISTDEL(&bo_gem->head); 59522944501Smrg alloc_from_cache = 1; 59622944501Smrg } else { 59722944501Smrg /* For non-render-target BOs (where we're probably 59822944501Smrg * going to map it first thing in order to fill it 59922944501Smrg * with data), check if the last BO in the cache is 60022944501Smrg * unbusy, and only reuse in that case. Otherwise, 60122944501Smrg * allocating a new buffer is probably faster than 60222944501Smrg * waiting for the GPU to finish. 60322944501Smrg */ 60422944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 60522944501Smrg bucket->head.next, head); 60622944501Smrg if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 60722944501Smrg alloc_from_cache = 1; 60822944501Smrg DRMLISTDEL(&bo_gem->head); 60922944501Smrg } 61022944501Smrg } 61122944501Smrg 61222944501Smrg if (alloc_from_cache) { 61322944501Smrg if (!drm_intel_gem_bo_madvise_internal 61422944501Smrg (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 61522944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 61622944501Smrg drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 61722944501Smrg bucket); 61822944501Smrg goto retry; 61922944501Smrg } 62022944501Smrg } 62122944501Smrg } 62222944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 62322944501Smrg 62422944501Smrg if (!alloc_from_cache) { 62522944501Smrg struct drm_i915_gem_create create; 62622944501Smrg 62722944501Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 62822944501Smrg if (!bo_gem) 62922944501Smrg return NULL; 63022944501Smrg 63122944501Smrg bo_gem->bo.size = bo_size; 63222944501Smrg memset(&create, 0, sizeof(create)); 63322944501Smrg create.size = bo_size; 63422944501Smrg 63522944501Smrg do { 63622944501Smrg ret = ioctl(bufmgr_gem->fd, 63722944501Smrg DRM_IOCTL_I915_GEM_CREATE, 63822944501Smrg &create); 63922944501Smrg } while (ret == -1 && errno == EINTR); 64022944501Smrg bo_gem->gem_handle = create.handle; 64122944501Smrg bo_gem->bo.handle = bo_gem->gem_handle; 64222944501Smrg if (ret != 0) { 64322944501Smrg free(bo_gem); 64422944501Smrg return NULL; 64522944501Smrg } 64622944501Smrg bo_gem->bo.bufmgr = bufmgr; 64722944501Smrg } 64822944501Smrg 64922944501Smrg bo_gem->name = name; 65022944501Smrg atomic_set(&bo_gem->refcount, 1); 65122944501Smrg bo_gem->validate_index = -1; 65222944501Smrg bo_gem->reloc_tree_fences = 0; 65322944501Smrg bo_gem->used_as_reloc_target = 0; 65422944501Smrg bo_gem->has_error = 0; 65522944501Smrg bo_gem->tiling_mode = I915_TILING_NONE; 65622944501Smrg bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 65722944501Smrg bo_gem->reusable = 1; 65822944501Smrg 65922944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 66022944501Smrg 66122944501Smrg DBG("bo_create: buf %d (%s) %ldb\n", 66222944501Smrg bo_gem->gem_handle, bo_gem->name, size); 66322944501Smrg 66422944501Smrg return &bo_gem->bo; 66522944501Smrg} 66622944501Smrg 66722944501Smrgstatic drm_intel_bo * 66822944501Smrgdrm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 66922944501Smrg const char *name, 67022944501Smrg unsigned long size, 67122944501Smrg unsigned int alignment) 67222944501Smrg{ 67322944501Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 67422944501Smrg BO_ALLOC_FOR_RENDER); 67522944501Smrg} 67622944501Smrg 67722944501Smrgstatic drm_intel_bo * 67822944501Smrgdrm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 67922944501Smrg const char *name, 68022944501Smrg unsigned long size, 68122944501Smrg unsigned int alignment) 68222944501Smrg{ 68322944501Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0); 68422944501Smrg} 68522944501Smrg 68622944501Smrgstatic drm_intel_bo * 68722944501Smrgdrm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 68822944501Smrg int x, int y, int cpp, uint32_t *tiling_mode, 68922944501Smrg unsigned long *pitch, unsigned long flags) 69022944501Smrg{ 69122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 69222944501Smrg drm_intel_bo *bo; 69322944501Smrg unsigned long size, stride, aligned_y = y; 69422944501Smrg int ret; 69522944501Smrg 69622944501Smrg /* If we're tiled, our allocations are in 8 or 32-row blocks, 69722944501Smrg * so failure to align our height means that we won't allocate 69822944501Smrg * enough pages. 69922944501Smrg * 70022944501Smrg * If we're untiled, we still have to align to 2 rows high 70122944501Smrg * because the data port accesses 2x2 blocks even if the 70222944501Smrg * bottom row isn't to be rendered, so failure to align means 70322944501Smrg * we could walk off the end of the GTT and fault. This is 70422944501Smrg * documented on 965, and may be the case on older chipsets 70522944501Smrg * too so we try to be careful. 70622944501Smrg */ 70722944501Smrg if (*tiling_mode == I915_TILING_NONE) 70822944501Smrg aligned_y = ALIGN(y, 2); 70922944501Smrg else if (*tiling_mode == I915_TILING_X) 71022944501Smrg aligned_y = ALIGN(y, 8); 71122944501Smrg else if (*tiling_mode == I915_TILING_Y) 71222944501Smrg aligned_y = ALIGN(y, 32); 71322944501Smrg 71422944501Smrg stride = x * cpp; 71522944501Smrg stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, *tiling_mode); 71622944501Smrg size = stride * aligned_y; 71722944501Smrg size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 71822944501Smrg 71922944501Smrg bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags); 72022944501Smrg if (!bo) 72122944501Smrg return NULL; 72222944501Smrg 72322944501Smrg ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride); 72422944501Smrg if (ret != 0) { 72522944501Smrg drm_intel_gem_bo_unreference(bo); 72622944501Smrg return NULL; 72722944501Smrg } 72822944501Smrg 72922944501Smrg *pitch = stride; 73022944501Smrg 73122944501Smrg return bo; 73222944501Smrg} 73322944501Smrg 73422944501Smrg/** 73522944501Smrg * Returns a drm_intel_bo wrapping the given buffer object handle. 73622944501Smrg * 73722944501Smrg * This can be used when one application needs to pass a buffer object 73822944501Smrg * to another. 73922944501Smrg */ 74022944501Smrgdrm_intel_bo * 74122944501Smrgdrm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 74222944501Smrg const char *name, 74322944501Smrg unsigned int handle) 74422944501Smrg{ 74522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 74622944501Smrg drm_intel_bo_gem *bo_gem; 74722944501Smrg int ret; 74822944501Smrg struct drm_gem_open open_arg; 74922944501Smrg struct drm_i915_gem_get_tiling get_tiling; 75022944501Smrg 75122944501Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 75222944501Smrg if (!bo_gem) 75322944501Smrg return NULL; 75422944501Smrg 75522944501Smrg memset(&open_arg, 0, sizeof(open_arg)); 75622944501Smrg open_arg.name = handle; 75722944501Smrg do { 75822944501Smrg ret = ioctl(bufmgr_gem->fd, 75922944501Smrg DRM_IOCTL_GEM_OPEN, 76022944501Smrg &open_arg); 76122944501Smrg } while (ret == -1 && errno == EINTR); 76222944501Smrg if (ret != 0) { 76322944501Smrg fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", 76422944501Smrg name, handle, strerror(errno)); 76522944501Smrg free(bo_gem); 76622944501Smrg return NULL; 76722944501Smrg } 76822944501Smrg bo_gem->bo.size = open_arg.size; 76922944501Smrg bo_gem->bo.offset = 0; 77022944501Smrg bo_gem->bo.virtual = NULL; 77122944501Smrg bo_gem->bo.bufmgr = bufmgr; 77222944501Smrg bo_gem->name = name; 77322944501Smrg atomic_set(&bo_gem->refcount, 1); 77422944501Smrg bo_gem->validate_index = -1; 77522944501Smrg bo_gem->gem_handle = open_arg.handle; 77622944501Smrg bo_gem->global_name = handle; 77722944501Smrg bo_gem->reusable = 0; 77822944501Smrg 77922944501Smrg memset(&get_tiling, 0, sizeof(get_tiling)); 78022944501Smrg get_tiling.handle = bo_gem->gem_handle; 78122944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 78222944501Smrg if (ret != 0) { 78322944501Smrg drm_intel_gem_bo_unreference(&bo_gem->bo); 78422944501Smrg return NULL; 78522944501Smrg } 78622944501Smrg bo_gem->tiling_mode = get_tiling.tiling_mode; 78722944501Smrg bo_gem->swizzle_mode = get_tiling.swizzle_mode; 78822944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 78922944501Smrg 79022944501Smrg DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 79122944501Smrg 79222944501Smrg return &bo_gem->bo; 79322944501Smrg} 79422944501Smrg 79522944501Smrgstatic void 79622944501Smrgdrm_intel_gem_bo_free(drm_intel_bo *bo) 79722944501Smrg{ 79822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 79922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 80022944501Smrg struct drm_gem_close close; 80122944501Smrg int ret; 80222944501Smrg 80322944501Smrg if (bo_gem->mem_virtual) 80422944501Smrg munmap(bo_gem->mem_virtual, bo_gem->bo.size); 80522944501Smrg if (bo_gem->gtt_virtual) 80622944501Smrg munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 80722944501Smrg 80822944501Smrg /* Close this object */ 80922944501Smrg memset(&close, 0, sizeof(close)); 81022944501Smrg close.handle = bo_gem->gem_handle; 81122944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 81222944501Smrg if (ret != 0) { 81322944501Smrg fprintf(stderr, 81422944501Smrg "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 81522944501Smrg bo_gem->gem_handle, bo_gem->name, strerror(errno)); 81622944501Smrg } 81722944501Smrg free(bo); 81822944501Smrg} 81922944501Smrg 82022944501Smrg/** Frees all cached buffers significantly older than @time. */ 82122944501Smrgstatic void 82222944501Smrgdrm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 82322944501Smrg{ 82422944501Smrg int i; 82522944501Smrg 82622944501Smrg for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 82722944501Smrg struct drm_intel_gem_bo_bucket *bucket = 82822944501Smrg &bufmgr_gem->cache_bucket[i]; 82922944501Smrg 83022944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 83122944501Smrg drm_intel_bo_gem *bo_gem; 83222944501Smrg 83322944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 83422944501Smrg bucket->head.next, head); 83522944501Smrg if (time - bo_gem->free_time <= 1) 83622944501Smrg break; 83722944501Smrg 83822944501Smrg DRMLISTDEL(&bo_gem->head); 83922944501Smrg 84022944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 84122944501Smrg } 84222944501Smrg } 84322944501Smrg} 84422944501Smrg 84522944501Smrgstatic void 84622944501Smrgdrm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 84722944501Smrg{ 84822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 84922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 85022944501Smrg struct drm_intel_gem_bo_bucket *bucket; 85122944501Smrg uint32_t tiling_mode; 85222944501Smrg int i; 85322944501Smrg 85422944501Smrg /* Unreference all the target buffers */ 85522944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 85622944501Smrg drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 85722944501Smrg reloc_target_info[i].bo, 85822944501Smrg time); 85922944501Smrg } 86022944501Smrg bo_gem->reloc_count = 0; 86122944501Smrg bo_gem->used_as_reloc_target = 0; 86222944501Smrg 86322944501Smrg DBG("bo_unreference final: %d (%s)\n", 86422944501Smrg bo_gem->gem_handle, bo_gem->name); 86522944501Smrg 86622944501Smrg /* release memory associated with this object */ 86722944501Smrg if (bo_gem->reloc_target_info) { 86822944501Smrg free(bo_gem->reloc_target_info); 86922944501Smrg bo_gem->reloc_target_info = NULL; 87022944501Smrg } 87122944501Smrg if (bo_gem->relocs) { 87222944501Smrg free(bo_gem->relocs); 87322944501Smrg bo_gem->relocs = NULL; 87422944501Smrg } 87522944501Smrg 87622944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 87722944501Smrg /* Put the buffer into our internal cache for reuse if we can. */ 87822944501Smrg tiling_mode = I915_TILING_NONE; 87922944501Smrg if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 88022944501Smrg drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 && 88122944501Smrg drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 88222944501Smrg I915_MADV_DONTNEED)) { 88322944501Smrg bo_gem->free_time = time; 88422944501Smrg 88522944501Smrg bo_gem->name = NULL; 88622944501Smrg bo_gem->validate_index = -1; 88722944501Smrg 88822944501Smrg DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 88922944501Smrg 89022944501Smrg drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time); 89122944501Smrg } else { 89222944501Smrg drm_intel_gem_bo_free(bo); 89322944501Smrg } 89422944501Smrg} 89522944501Smrg 89622944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 89722944501Smrg time_t time) 89822944501Smrg{ 89922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 90022944501Smrg 90122944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 90222944501Smrg if (atomic_dec_and_test(&bo_gem->refcount)) 90322944501Smrg drm_intel_gem_bo_unreference_final(bo, time); 90422944501Smrg} 90522944501Smrg 90622944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 90722944501Smrg{ 90822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 90922944501Smrg 91022944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 91122944501Smrg if (atomic_dec_and_test(&bo_gem->refcount)) { 91222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 91322944501Smrg (drm_intel_bufmgr_gem *) bo->bufmgr; 91422944501Smrg struct timespec time; 91522944501Smrg 91622944501Smrg clock_gettime(CLOCK_MONOTONIC, &time); 91722944501Smrg 91822944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 91922944501Smrg drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 92022944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 92122944501Smrg } 92222944501Smrg} 92322944501Smrg 92422944501Smrgstatic int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 92522944501Smrg{ 92622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 92722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 92822944501Smrg struct drm_i915_gem_set_domain set_domain; 92922944501Smrg int ret; 93022944501Smrg 93122944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 93222944501Smrg 93322944501Smrg /* Allow recursive mapping. Mesa may recursively map buffers with 93422944501Smrg * nested display loops. 93522944501Smrg */ 93622944501Smrg if (!bo_gem->mem_virtual) { 93722944501Smrg struct drm_i915_gem_mmap mmap_arg; 93822944501Smrg 93922944501Smrg DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 94022944501Smrg 94122944501Smrg memset(&mmap_arg, 0, sizeof(mmap_arg)); 94222944501Smrg mmap_arg.handle = bo_gem->gem_handle; 94322944501Smrg mmap_arg.offset = 0; 94422944501Smrg mmap_arg.size = bo->size; 94522944501Smrg do { 94622944501Smrg ret = ioctl(bufmgr_gem->fd, 94722944501Smrg DRM_IOCTL_I915_GEM_MMAP, 94822944501Smrg &mmap_arg); 94922944501Smrg } while (ret == -1 && errno == EINTR); 95022944501Smrg if (ret != 0) { 95122944501Smrg ret = -errno; 95222944501Smrg fprintf(stderr, 95322944501Smrg "%s:%d: Error mapping buffer %d (%s): %s .\n", 95422944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 95522944501Smrg bo_gem->name, strerror(errno)); 95622944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 95722944501Smrg return ret; 95822944501Smrg } 95922944501Smrg bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 96022944501Smrg } 96122944501Smrg DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 96222944501Smrg bo_gem->mem_virtual); 96322944501Smrg bo->virtual = bo_gem->mem_virtual; 96422944501Smrg 96522944501Smrg set_domain.handle = bo_gem->gem_handle; 96622944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_CPU; 96722944501Smrg if (write_enable) 96822944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_CPU; 96922944501Smrg else 97022944501Smrg set_domain.write_domain = 0; 97122944501Smrg do { 97222944501Smrg ret = ioctl(bufmgr_gem->fd, 97322944501Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 97422944501Smrg &set_domain); 97522944501Smrg } while (ret == -1 && errno == EINTR); 97622944501Smrg if (ret != 0) { 97722944501Smrg ret = -errno; 97822944501Smrg fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n", 97922944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 98022944501Smrg strerror(errno)); 98122944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 98222944501Smrg return ret; 98322944501Smrg } 98422944501Smrg 98522944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 98622944501Smrg 98722944501Smrg return 0; 98822944501Smrg} 98922944501Smrg 99022944501Smrgint drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 99122944501Smrg{ 99222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 99322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 99422944501Smrg struct drm_i915_gem_set_domain set_domain; 99522944501Smrg int ret; 99622944501Smrg 99722944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 99822944501Smrg 99922944501Smrg /* Get a mapping of the buffer if we haven't before. */ 100022944501Smrg if (bo_gem->gtt_virtual == NULL) { 100122944501Smrg struct drm_i915_gem_mmap_gtt mmap_arg; 100222944501Smrg 100322944501Smrg DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, 100422944501Smrg bo_gem->name); 100522944501Smrg 100622944501Smrg memset(&mmap_arg, 0, sizeof(mmap_arg)); 100722944501Smrg mmap_arg.handle = bo_gem->gem_handle; 100822944501Smrg 100922944501Smrg /* Get the fake offset back... */ 101022944501Smrg do { 101122944501Smrg ret = ioctl(bufmgr_gem->fd, 101222944501Smrg DRM_IOCTL_I915_GEM_MMAP_GTT, 101322944501Smrg &mmap_arg); 101422944501Smrg } while (ret == -1 && errno == EINTR); 101522944501Smrg if (ret != 0) { 101622944501Smrg ret = -errno; 101722944501Smrg fprintf(stderr, 101822944501Smrg "%s:%d: Error preparing buffer map %d (%s): %s .\n", 101922944501Smrg __FILE__, __LINE__, 102022944501Smrg bo_gem->gem_handle, bo_gem->name, 102122944501Smrg strerror(errno)); 102222944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 102322944501Smrg return ret; 102422944501Smrg } 102522944501Smrg 102622944501Smrg /* and mmap it */ 102722944501Smrg bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 102822944501Smrg MAP_SHARED, bufmgr_gem->fd, 102922944501Smrg mmap_arg.offset); 103022944501Smrg if (bo_gem->gtt_virtual == MAP_FAILED) { 103122944501Smrg bo_gem->gtt_virtual = NULL; 103222944501Smrg ret = -errno; 103322944501Smrg fprintf(stderr, 103422944501Smrg "%s:%d: Error mapping buffer %d (%s): %s .\n", 103522944501Smrg __FILE__, __LINE__, 103622944501Smrg bo_gem->gem_handle, bo_gem->name, 103722944501Smrg strerror(errno)); 103822944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 103922944501Smrg return ret; 104022944501Smrg } 104122944501Smrg } 104222944501Smrg 104322944501Smrg bo->virtual = bo_gem->gtt_virtual; 104422944501Smrg 104522944501Smrg DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 104622944501Smrg bo_gem->gtt_virtual); 104722944501Smrg 104822944501Smrg /* Now move it to the GTT domain so that the CPU caches are flushed */ 104922944501Smrg set_domain.handle = bo_gem->gem_handle; 105022944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 105122944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_GTT; 105222944501Smrg do { 105322944501Smrg ret = ioctl(bufmgr_gem->fd, 105422944501Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 105522944501Smrg &set_domain); 105622944501Smrg } while (ret == -1 && errno == EINTR); 105722944501Smrg 105822944501Smrg if (ret != 0) { 105922944501Smrg ret = -errno; 106022944501Smrg fprintf(stderr, "%s:%d: Error setting domain %d: %s\n", 106122944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 106222944501Smrg strerror(errno)); 106322944501Smrg } 106422944501Smrg 106522944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 106622944501Smrg 106722944501Smrg return ret; 106822944501Smrg} 106922944501Smrg 107022944501Smrgint drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 107122944501Smrg{ 107222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 107322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 107422944501Smrg int ret = 0; 107522944501Smrg 107622944501Smrg if (bo == NULL) 107722944501Smrg return 0; 107822944501Smrg 107922944501Smrg assert(bo_gem->gtt_virtual != NULL); 108022944501Smrg 108122944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 108222944501Smrg bo->virtual = NULL; 108322944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 108422944501Smrg 108522944501Smrg return ret; 108622944501Smrg} 108722944501Smrg 108822944501Smrgstatic int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 108922944501Smrg{ 109022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 109122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 109222944501Smrg struct drm_i915_gem_sw_finish sw_finish; 109322944501Smrg int ret; 109422944501Smrg 109522944501Smrg if (bo == NULL) 109622944501Smrg return 0; 109722944501Smrg 109822944501Smrg assert(bo_gem->mem_virtual != NULL); 109922944501Smrg 110022944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 110122944501Smrg 110222944501Smrg /* Cause a flush to happen if the buffer's pinned for scanout, so the 110322944501Smrg * results show up in a timely manner. 110422944501Smrg */ 110522944501Smrg sw_finish.handle = bo_gem->gem_handle; 110622944501Smrg do { 110722944501Smrg ret = ioctl(bufmgr_gem->fd, 110822944501Smrg DRM_IOCTL_I915_GEM_SW_FINISH, 110922944501Smrg &sw_finish); 111022944501Smrg } while (ret == -1 && errno == EINTR); 111122944501Smrg ret = ret == -1 ? -errno : 0; 111222944501Smrg 111322944501Smrg bo->virtual = NULL; 111422944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 111522944501Smrg 111622944501Smrg return ret; 111722944501Smrg} 111822944501Smrg 111922944501Smrgstatic int 112022944501Smrgdrm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 112122944501Smrg unsigned long size, const void *data) 112222944501Smrg{ 112322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 112422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 112522944501Smrg struct drm_i915_gem_pwrite pwrite; 112622944501Smrg int ret; 112722944501Smrg 112822944501Smrg memset(&pwrite, 0, sizeof(pwrite)); 112922944501Smrg pwrite.handle = bo_gem->gem_handle; 113022944501Smrg pwrite.offset = offset; 113122944501Smrg pwrite.size = size; 113222944501Smrg pwrite.data_ptr = (uint64_t) (uintptr_t) data; 113322944501Smrg do { 113422944501Smrg ret = ioctl(bufmgr_gem->fd, 113522944501Smrg DRM_IOCTL_I915_GEM_PWRITE, 113622944501Smrg &pwrite); 113722944501Smrg } while (ret == -1 && errno == EINTR); 113822944501Smrg if (ret != 0) { 113922944501Smrg ret = -errno; 114022944501Smrg fprintf(stderr, 114122944501Smrg "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 114222944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 114322944501Smrg (int)size, strerror(errno)); 114422944501Smrg } 114522944501Smrg 114622944501Smrg return ret; 114722944501Smrg} 114822944501Smrg 114922944501Smrgstatic int 115022944501Smrgdrm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 115122944501Smrg{ 115222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 115322944501Smrg struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 115422944501Smrg int ret; 115522944501Smrg 115622944501Smrg get_pipe_from_crtc_id.crtc_id = crtc_id; 115722944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 115822944501Smrg &get_pipe_from_crtc_id); 115922944501Smrg if (ret != 0) { 116022944501Smrg /* We return -1 here to signal that we don't 116122944501Smrg * know which pipe is associated with this crtc. 116222944501Smrg * This lets the caller know that this information 116322944501Smrg * isn't available; using the wrong pipe for 116422944501Smrg * vblank waiting can cause the chipset to lock up 116522944501Smrg */ 116622944501Smrg return -1; 116722944501Smrg } 116822944501Smrg 116922944501Smrg return get_pipe_from_crtc_id.pipe; 117022944501Smrg} 117122944501Smrg 117222944501Smrgstatic int 117322944501Smrgdrm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 117422944501Smrg unsigned long size, void *data) 117522944501Smrg{ 117622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 117722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 117822944501Smrg struct drm_i915_gem_pread pread; 117922944501Smrg int ret; 118022944501Smrg 118122944501Smrg memset(&pread, 0, sizeof(pread)); 118222944501Smrg pread.handle = bo_gem->gem_handle; 118322944501Smrg pread.offset = offset; 118422944501Smrg pread.size = size; 118522944501Smrg pread.data_ptr = (uint64_t) (uintptr_t) data; 118622944501Smrg do { 118722944501Smrg ret = ioctl(bufmgr_gem->fd, 118822944501Smrg DRM_IOCTL_I915_GEM_PREAD, 118922944501Smrg &pread); 119022944501Smrg } while (ret == -1 && errno == EINTR); 119122944501Smrg if (ret != 0) { 119222944501Smrg ret = -errno; 119322944501Smrg fprintf(stderr, 119422944501Smrg "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 119522944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 119622944501Smrg (int)size, strerror(errno)); 119722944501Smrg } 119822944501Smrg 119922944501Smrg return ret; 120022944501Smrg} 120122944501Smrg 120222944501Smrg/** Waits for all GPU rendering to the object to have completed. */ 120322944501Smrgstatic void 120422944501Smrgdrm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 120522944501Smrg{ 120622944501Smrg drm_intel_gem_bo_start_gtt_access(bo, 0); 120722944501Smrg} 120822944501Smrg 120922944501Smrg/** 121022944501Smrg * Sets the object to the GTT read and possibly write domain, used by the X 121122944501Smrg * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 121222944501Smrg * 121322944501Smrg * In combination with drm_intel_gem_bo_pin() and manual fence management, we 121422944501Smrg * can do tiled pixmaps this way. 121522944501Smrg */ 121622944501Smrgvoid 121722944501Smrgdrm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 121822944501Smrg{ 121922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 122022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 122122944501Smrg struct drm_i915_gem_set_domain set_domain; 122222944501Smrg int ret; 122322944501Smrg 122422944501Smrg set_domain.handle = bo_gem->gem_handle; 122522944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 122622944501Smrg set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 122722944501Smrg do { 122822944501Smrg ret = ioctl(bufmgr_gem->fd, 122922944501Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 123022944501Smrg &set_domain); 123122944501Smrg } while (ret == -1 && errno == EINTR); 123222944501Smrg if (ret != 0) { 123322944501Smrg fprintf(stderr, 123422944501Smrg "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 123522944501Smrg __FILE__, __LINE__, bo_gem->gem_handle, 123622944501Smrg set_domain.read_domains, set_domain.write_domain, 123722944501Smrg strerror(errno)); 123822944501Smrg } 123922944501Smrg} 124022944501Smrg 124122944501Smrgstatic void 124222944501Smrgdrm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 124322944501Smrg{ 124422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 124522944501Smrg int i; 124622944501Smrg 124722944501Smrg free(bufmgr_gem->exec2_objects); 124822944501Smrg free(bufmgr_gem->exec_objects); 124922944501Smrg free(bufmgr_gem->exec_bos); 125022944501Smrg 125122944501Smrg pthread_mutex_destroy(&bufmgr_gem->lock); 125222944501Smrg 125322944501Smrg /* Free any cached buffer objects we were going to reuse */ 125422944501Smrg for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 125522944501Smrg struct drm_intel_gem_bo_bucket *bucket = 125622944501Smrg &bufmgr_gem->cache_bucket[i]; 125722944501Smrg drm_intel_bo_gem *bo_gem; 125822944501Smrg 125922944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 126022944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 126122944501Smrg bucket->head.next, head); 126222944501Smrg DRMLISTDEL(&bo_gem->head); 126322944501Smrg 126422944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 126522944501Smrg } 126622944501Smrg } 126722944501Smrg 126822944501Smrg free(bufmgr); 126922944501Smrg} 127022944501Smrg 127122944501Smrg/** 127222944501Smrg * Adds the target buffer to the validation list and adds the relocation 127322944501Smrg * to the reloc_buffer's relocation list. 127422944501Smrg * 127522944501Smrg * The relocation entry at the given offset must already contain the 127622944501Smrg * precomputed relocation value, because the kernel will optimize out 127722944501Smrg * the relocation entry write when the buffer hasn't moved from the 127822944501Smrg * last known offset in target_bo. 127922944501Smrg */ 128022944501Smrgstatic int 128122944501Smrgdo_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 128222944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 128322944501Smrg uint32_t read_domains, uint32_t write_domain, 128422944501Smrg int need_fence) 128522944501Smrg{ 128622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 128722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 128822944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 128922944501Smrg 129022944501Smrg if (bo_gem->has_error) 129122944501Smrg return -ENOMEM; 129222944501Smrg 129322944501Smrg if (target_bo_gem->has_error) { 129422944501Smrg bo_gem->has_error = 1; 129522944501Smrg return -ENOMEM; 129622944501Smrg } 129722944501Smrg 129822944501Smrg if (target_bo_gem->tiling_mode == I915_TILING_NONE) 129922944501Smrg need_fence = 0; 130022944501Smrg 130122944501Smrg /* We never use HW fences for rendering on 965+ */ 130222944501Smrg if (bufmgr_gem->gen >= 4) 130322944501Smrg need_fence = 0; 130422944501Smrg 130522944501Smrg /* Create a new relocation list if needed */ 130622944501Smrg if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 130722944501Smrg return -ENOMEM; 130822944501Smrg 130922944501Smrg /* Check overflow */ 131022944501Smrg assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 131122944501Smrg 131222944501Smrg /* Check args */ 131322944501Smrg assert(offset <= bo->size - 4); 131422944501Smrg assert((write_domain & (write_domain - 1)) == 0); 131522944501Smrg 131622944501Smrg /* Make sure that we're not adding a reloc to something whose size has 131722944501Smrg * already been accounted for. 131822944501Smrg */ 131922944501Smrg assert(!bo_gem->used_as_reloc_target); 132022944501Smrg bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 132122944501Smrg /* An object needing a fence is a tiled buffer, so it won't have 132222944501Smrg * relocs to other buffers. 132322944501Smrg */ 132422944501Smrg if (need_fence) 132522944501Smrg target_bo_gem->reloc_tree_fences = 1; 132622944501Smrg bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 132722944501Smrg 132822944501Smrg /* Flag the target to disallow further relocations in it. */ 132922944501Smrg target_bo_gem->used_as_reloc_target = 1; 133022944501Smrg 133122944501Smrg bo_gem->relocs[bo_gem->reloc_count].offset = offset; 133222944501Smrg bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 133322944501Smrg bo_gem->relocs[bo_gem->reloc_count].target_handle = 133422944501Smrg target_bo_gem->gem_handle; 133522944501Smrg bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 133622944501Smrg bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 133722944501Smrg bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 133822944501Smrg 133922944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 134022944501Smrg drm_intel_gem_bo_reference(target_bo); 134122944501Smrg if (need_fence) 134222944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 134322944501Smrg DRM_INTEL_RELOC_FENCE; 134422944501Smrg else 134522944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 134622944501Smrg 134722944501Smrg bo_gem->reloc_count++; 134822944501Smrg 134922944501Smrg return 0; 135022944501Smrg} 135122944501Smrg 135222944501Smrgstatic int 135322944501Smrgdrm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 135422944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 135522944501Smrg uint32_t read_domains, uint32_t write_domain) 135622944501Smrg{ 135722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 135822944501Smrg 135922944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 136022944501Smrg read_domains, write_domain, 136122944501Smrg !bufmgr_gem->fenced_relocs); 136222944501Smrg} 136322944501Smrg 136422944501Smrgstatic int 136522944501Smrgdrm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 136622944501Smrg drm_intel_bo *target_bo, 136722944501Smrg uint32_t target_offset, 136822944501Smrg uint32_t read_domains, uint32_t write_domain) 136922944501Smrg{ 137022944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 137122944501Smrg read_domains, write_domain, 1); 137222944501Smrg} 137322944501Smrg 137422944501Smrg/** 137522944501Smrg * Walk the tree of relocations rooted at BO and accumulate the list of 137622944501Smrg * validations to be performed and update the relocation buffers with 137722944501Smrg * index values into the validation list. 137822944501Smrg */ 137922944501Smrgstatic void 138022944501Smrgdrm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 138122944501Smrg{ 138222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 138322944501Smrg int i; 138422944501Smrg 138522944501Smrg if (bo_gem->relocs == NULL) 138622944501Smrg return; 138722944501Smrg 138822944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 138922944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 139022944501Smrg 139122944501Smrg /* Continue walking the tree depth-first. */ 139222944501Smrg drm_intel_gem_bo_process_reloc(target_bo); 139322944501Smrg 139422944501Smrg /* Add the target to the validate list */ 139522944501Smrg drm_intel_add_validate_buffer(target_bo); 139622944501Smrg } 139722944501Smrg} 139822944501Smrg 139922944501Smrgstatic void 140022944501Smrgdrm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 140122944501Smrg{ 140222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 140322944501Smrg int i; 140422944501Smrg 140522944501Smrg if (bo_gem->relocs == NULL) 140622944501Smrg return; 140722944501Smrg 140822944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 140922944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 141022944501Smrg int need_fence; 141122944501Smrg 141222944501Smrg /* Continue walking the tree depth-first. */ 141322944501Smrg drm_intel_gem_bo_process_reloc2(target_bo); 141422944501Smrg 141522944501Smrg need_fence = (bo_gem->reloc_target_info[i].flags & 141622944501Smrg DRM_INTEL_RELOC_FENCE); 141722944501Smrg 141822944501Smrg /* Add the target to the validate list */ 141922944501Smrg drm_intel_add_validate_buffer2(target_bo, need_fence); 142022944501Smrg } 142122944501Smrg} 142222944501Smrg 142322944501Smrg 142422944501Smrgstatic void 142522944501Smrgdrm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 142622944501Smrg{ 142722944501Smrg int i; 142822944501Smrg 142922944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 143022944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 143122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 143222944501Smrg 143322944501Smrg /* Update the buffer offset */ 143422944501Smrg if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 143522944501Smrg DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 143622944501Smrg bo_gem->gem_handle, bo_gem->name, bo->offset, 143722944501Smrg (unsigned long long)bufmgr_gem->exec_objects[i]. 143822944501Smrg offset); 143922944501Smrg bo->offset = bufmgr_gem->exec_objects[i].offset; 144022944501Smrg } 144122944501Smrg } 144222944501Smrg} 144322944501Smrg 144422944501Smrgstatic void 144522944501Smrgdrm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 144622944501Smrg{ 144722944501Smrg int i; 144822944501Smrg 144922944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 145022944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 145122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 145222944501Smrg 145322944501Smrg /* Update the buffer offset */ 145422944501Smrg if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { 145522944501Smrg DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 145622944501Smrg bo_gem->gem_handle, bo_gem->name, bo->offset, 145722944501Smrg (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 145822944501Smrg bo->offset = bufmgr_gem->exec2_objects[i].offset; 145922944501Smrg } 146022944501Smrg } 146122944501Smrg} 146222944501Smrg 146322944501Smrgstatic int 146422944501Smrgdrm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 146522944501Smrg drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 146622944501Smrg{ 146722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 146822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 146922944501Smrg struct drm_i915_gem_execbuffer execbuf; 147022944501Smrg int ret, i; 147122944501Smrg 147222944501Smrg if (bo_gem->has_error) 147322944501Smrg return -ENOMEM; 147422944501Smrg 147522944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 147622944501Smrg /* Update indices and set up the validate list. */ 147722944501Smrg drm_intel_gem_bo_process_reloc(bo); 147822944501Smrg 147922944501Smrg /* Add the batch buffer to the validation list. There are no 148022944501Smrg * relocations pointing to it. 148122944501Smrg */ 148222944501Smrg drm_intel_add_validate_buffer(bo); 148322944501Smrg 148422944501Smrg execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 148522944501Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 148622944501Smrg execbuf.batch_start_offset = 0; 148722944501Smrg execbuf.batch_len = used; 148822944501Smrg execbuf.cliprects_ptr = (uintptr_t) cliprects; 148922944501Smrg execbuf.num_cliprects = num_cliprects; 149022944501Smrg execbuf.DR1 = 0; 149122944501Smrg execbuf.DR4 = DR4; 149222944501Smrg 149322944501Smrg do { 149422944501Smrg ret = ioctl(bufmgr_gem->fd, 149522944501Smrg DRM_IOCTL_I915_GEM_EXECBUFFER, 149622944501Smrg &execbuf); 149722944501Smrg } while (ret != 0 && errno == EINTR); 149822944501Smrg 149922944501Smrg if (ret != 0) { 150022944501Smrg ret = -errno; 150122944501Smrg if (errno == ENOSPC) { 150222944501Smrg fprintf(stderr, 150322944501Smrg "Execbuffer fails to pin. " 150422944501Smrg "Estimate: %u. Actual: %u. Available: %u\n", 150522944501Smrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 150622944501Smrg bufmgr_gem-> 150722944501Smrg exec_count), 150822944501Smrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 150922944501Smrg bufmgr_gem-> 151022944501Smrg exec_count), 151122944501Smrg (unsigned int)bufmgr_gem->gtt_size); 151222944501Smrg } 151322944501Smrg } 151422944501Smrg drm_intel_update_buffer_offsets(bufmgr_gem); 151522944501Smrg 151622944501Smrg if (bufmgr_gem->bufmgr.debug) 151722944501Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 151822944501Smrg 151922944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 152022944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 152122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 152222944501Smrg 152322944501Smrg /* Disconnect the buffer from the validate list */ 152422944501Smrg bo_gem->validate_index = -1; 152522944501Smrg bufmgr_gem->exec_bos[i] = NULL; 152622944501Smrg } 152722944501Smrg bufmgr_gem->exec_count = 0; 152822944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 152922944501Smrg 153022944501Smrg return ret; 153122944501Smrg} 153222944501Smrg 153322944501Smrgstatic int 153422944501Smrgdrm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 153522944501Smrg drm_clip_rect_t *cliprects, int num_cliprects, 153622944501Smrg int DR4) 153722944501Smrg{ 153822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 153922944501Smrg struct drm_i915_gem_execbuffer2 execbuf; 154022944501Smrg int ret, i; 154122944501Smrg 154222944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 154322944501Smrg /* Update indices and set up the validate list. */ 154422944501Smrg drm_intel_gem_bo_process_reloc2(bo); 154522944501Smrg 154622944501Smrg /* Add the batch buffer to the validation list. There are no relocations 154722944501Smrg * pointing to it. 154822944501Smrg */ 154922944501Smrg drm_intel_add_validate_buffer2(bo, 0); 155022944501Smrg 155122944501Smrg execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 155222944501Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 155322944501Smrg execbuf.batch_start_offset = 0; 155422944501Smrg execbuf.batch_len = used; 155522944501Smrg execbuf.cliprects_ptr = (uintptr_t)cliprects; 155622944501Smrg execbuf.num_cliprects = num_cliprects; 155722944501Smrg execbuf.DR1 = 0; 155822944501Smrg execbuf.DR4 = DR4; 155922944501Smrg execbuf.flags = 0; 156022944501Smrg execbuf.rsvd1 = 0; 156122944501Smrg execbuf.rsvd2 = 0; 156222944501Smrg 156322944501Smrg do { 156422944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, 156522944501Smrg &execbuf); 156622944501Smrg } while (ret != 0 && errno == EINTR); 156722944501Smrg 156822944501Smrg if (ret != 0) { 156922944501Smrg ret = -errno; 157022944501Smrg if (ret == -ENOMEM) { 157122944501Smrg fprintf(stderr, 157222944501Smrg "Execbuffer fails to pin. " 157322944501Smrg "Estimate: %u. Actual: %u. Available: %u\n", 157422944501Smrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 157522944501Smrg bufmgr_gem->exec_count), 157622944501Smrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 157722944501Smrg bufmgr_gem->exec_count), 157822944501Smrg (unsigned int) bufmgr_gem->gtt_size); 157922944501Smrg } 158022944501Smrg } 158122944501Smrg drm_intel_update_buffer_offsets2(bufmgr_gem); 158222944501Smrg 158322944501Smrg if (bufmgr_gem->bufmgr.debug) 158422944501Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 158522944501Smrg 158622944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 158722944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 158822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 158922944501Smrg 159022944501Smrg /* Disconnect the buffer from the validate list */ 159122944501Smrg bo_gem->validate_index = -1; 159222944501Smrg bufmgr_gem->exec_bos[i] = NULL; 159322944501Smrg } 159422944501Smrg bufmgr_gem->exec_count = 0; 159522944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 159622944501Smrg 159722944501Smrg return ret; 159822944501Smrg} 159922944501Smrg 160022944501Smrgstatic int 160122944501Smrgdrm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 160222944501Smrg{ 160322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 160422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 160522944501Smrg struct drm_i915_gem_pin pin; 160622944501Smrg int ret; 160722944501Smrg 160822944501Smrg memset(&pin, 0, sizeof(pin)); 160922944501Smrg pin.handle = bo_gem->gem_handle; 161022944501Smrg pin.alignment = alignment; 161122944501Smrg 161222944501Smrg do { 161322944501Smrg ret = ioctl(bufmgr_gem->fd, 161422944501Smrg DRM_IOCTL_I915_GEM_PIN, 161522944501Smrg &pin); 161622944501Smrg } while (ret == -1 && errno == EINTR); 161722944501Smrg 161822944501Smrg if (ret != 0) 161922944501Smrg return -errno; 162022944501Smrg 162122944501Smrg bo->offset = pin.offset; 162222944501Smrg return 0; 162322944501Smrg} 162422944501Smrg 162522944501Smrgstatic int 162622944501Smrgdrm_intel_gem_bo_unpin(drm_intel_bo *bo) 162722944501Smrg{ 162822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 162922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 163022944501Smrg struct drm_i915_gem_unpin unpin; 163122944501Smrg int ret; 163222944501Smrg 163322944501Smrg memset(&unpin, 0, sizeof(unpin)); 163422944501Smrg unpin.handle = bo_gem->gem_handle; 163522944501Smrg 163622944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 163722944501Smrg if (ret != 0) 163822944501Smrg return -errno; 163922944501Smrg 164022944501Smrg return 0; 164122944501Smrg} 164222944501Smrg 164322944501Smrgstatic int 164422944501Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 164522944501Smrg uint32_t stride) 164622944501Smrg{ 164722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 164822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 164922944501Smrg struct drm_i915_gem_set_tiling set_tiling; 165022944501Smrg int ret; 165122944501Smrg 165222944501Smrg if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode) 165322944501Smrg return 0; 165422944501Smrg 165522944501Smrg memset(&set_tiling, 0, sizeof(set_tiling)); 165622944501Smrg set_tiling.handle = bo_gem->gem_handle; 165722944501Smrg 165822944501Smrg do { 165922944501Smrg set_tiling.tiling_mode = *tiling_mode; 166022944501Smrg set_tiling.stride = stride; 166122944501Smrg 166222944501Smrg ret = ioctl(bufmgr_gem->fd, 166322944501Smrg DRM_IOCTL_I915_GEM_SET_TILING, 166422944501Smrg &set_tiling); 166522944501Smrg } while (ret == -1 && errno == EINTR); 166622944501Smrg bo_gem->tiling_mode = set_tiling.tiling_mode; 166722944501Smrg bo_gem->swizzle_mode = set_tiling.swizzle_mode; 166822944501Smrg 166922944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 167022944501Smrg 167122944501Smrg *tiling_mode = bo_gem->tiling_mode; 167222944501Smrg return ret == 0 ? 0 : -errno; 167322944501Smrg} 167422944501Smrg 167522944501Smrgstatic int 167622944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 167722944501Smrg uint32_t * swizzle_mode) 167822944501Smrg{ 167922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 168022944501Smrg 168122944501Smrg *tiling_mode = bo_gem->tiling_mode; 168222944501Smrg *swizzle_mode = bo_gem->swizzle_mode; 168322944501Smrg return 0; 168422944501Smrg} 168522944501Smrg 168622944501Smrgstatic int 168722944501Smrgdrm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 168822944501Smrg{ 168922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 169022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 169122944501Smrg struct drm_gem_flink flink; 169222944501Smrg int ret; 169322944501Smrg 169422944501Smrg if (!bo_gem->global_name) { 169522944501Smrg memset(&flink, 0, sizeof(flink)); 169622944501Smrg flink.handle = bo_gem->gem_handle; 169722944501Smrg 169822944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 169922944501Smrg if (ret != 0) 170022944501Smrg return -errno; 170122944501Smrg bo_gem->global_name = flink.name; 170222944501Smrg bo_gem->reusable = 0; 170322944501Smrg } 170422944501Smrg 170522944501Smrg *name = bo_gem->global_name; 170622944501Smrg return 0; 170722944501Smrg} 170822944501Smrg 170922944501Smrg/** 171022944501Smrg * Enables unlimited caching of buffer objects for reuse. 171122944501Smrg * 171222944501Smrg * This is potentially very memory expensive, as the cache at each bucket 171322944501Smrg * size is only bounded by how many buffers of that size we've managed to have 171422944501Smrg * in flight at once. 171522944501Smrg */ 171622944501Smrgvoid 171722944501Smrgdrm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 171822944501Smrg{ 171922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 172022944501Smrg 172122944501Smrg bufmgr_gem->bo_reuse = 1; 172222944501Smrg} 172322944501Smrg 172422944501Smrg/** 172522944501Smrg * Enable use of fenced reloc type. 172622944501Smrg * 172722944501Smrg * New code should enable this to avoid unnecessary fence register 172822944501Smrg * allocation. If this option is not enabled, all relocs will have fence 172922944501Smrg * register allocated. 173022944501Smrg */ 173122944501Smrgvoid 173222944501Smrgdrm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 173322944501Smrg{ 173422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 173522944501Smrg 173622944501Smrg if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 173722944501Smrg bufmgr_gem->fenced_relocs = 1; 173822944501Smrg} 173922944501Smrg 174022944501Smrg/** 174122944501Smrg * Return the additional aperture space required by the tree of buffer objects 174222944501Smrg * rooted at bo. 174322944501Smrg */ 174422944501Smrgstatic int 174522944501Smrgdrm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 174622944501Smrg{ 174722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 174822944501Smrg int i; 174922944501Smrg int total = 0; 175022944501Smrg 175122944501Smrg if (bo == NULL || bo_gem->included_in_check_aperture) 175222944501Smrg return 0; 175322944501Smrg 175422944501Smrg total += bo->size; 175522944501Smrg bo_gem->included_in_check_aperture = 1; 175622944501Smrg 175722944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 175822944501Smrg total += 175922944501Smrg drm_intel_gem_bo_get_aperture_space(bo_gem-> 176022944501Smrg reloc_target_info[i].bo); 176122944501Smrg 176222944501Smrg return total; 176322944501Smrg} 176422944501Smrg 176522944501Smrg/** 176622944501Smrg * Count the number of buffers in this list that need a fence reg 176722944501Smrg * 176822944501Smrg * If the count is greater than the number of available regs, we'll have 176922944501Smrg * to ask the caller to resubmit a batch with fewer tiled buffers. 177022944501Smrg * 177122944501Smrg * This function over-counts if the same buffer is used multiple times. 177222944501Smrg */ 177322944501Smrgstatic unsigned int 177422944501Smrgdrm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 177522944501Smrg{ 177622944501Smrg int i; 177722944501Smrg unsigned int total = 0; 177822944501Smrg 177922944501Smrg for (i = 0; i < count; i++) { 178022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 178122944501Smrg 178222944501Smrg if (bo_gem == NULL) 178322944501Smrg continue; 178422944501Smrg 178522944501Smrg total += bo_gem->reloc_tree_fences; 178622944501Smrg } 178722944501Smrg return total; 178822944501Smrg} 178922944501Smrg 179022944501Smrg/** 179122944501Smrg * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 179222944501Smrg * for the next drm_intel_bufmgr_check_aperture_space() call. 179322944501Smrg */ 179422944501Smrgstatic void 179522944501Smrgdrm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 179622944501Smrg{ 179722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 179822944501Smrg int i; 179922944501Smrg 180022944501Smrg if (bo == NULL || !bo_gem->included_in_check_aperture) 180122944501Smrg return; 180222944501Smrg 180322944501Smrg bo_gem->included_in_check_aperture = 0; 180422944501Smrg 180522944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 180622944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 180722944501Smrg reloc_target_info[i].bo); 180822944501Smrg} 180922944501Smrg 181022944501Smrg/** 181122944501Smrg * Return a conservative estimate for the amount of aperture required 181222944501Smrg * for a collection of buffers. This may double-count some buffers. 181322944501Smrg */ 181422944501Smrgstatic unsigned int 181522944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 181622944501Smrg{ 181722944501Smrg int i; 181822944501Smrg unsigned int total = 0; 181922944501Smrg 182022944501Smrg for (i = 0; i < count; i++) { 182122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 182222944501Smrg if (bo_gem != NULL) 182322944501Smrg total += bo_gem->reloc_tree_size; 182422944501Smrg } 182522944501Smrg return total; 182622944501Smrg} 182722944501Smrg 182822944501Smrg/** 182922944501Smrg * Return the amount of aperture needed for a collection of buffers. 183022944501Smrg * This avoids double counting any buffers, at the cost of looking 183122944501Smrg * at every buffer in the set. 183222944501Smrg */ 183322944501Smrgstatic unsigned int 183422944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 183522944501Smrg{ 183622944501Smrg int i; 183722944501Smrg unsigned int total = 0; 183822944501Smrg 183922944501Smrg for (i = 0; i < count; i++) { 184022944501Smrg total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 184122944501Smrg /* For the first buffer object in the array, we get an 184222944501Smrg * accurate count back for its reloc_tree size (since nothing 184322944501Smrg * had been flagged as being counted yet). We can save that 184422944501Smrg * value out as a more conservative reloc_tree_size that 184522944501Smrg * avoids double-counting target buffers. Since the first 184622944501Smrg * buffer happens to usually be the batch buffer in our 184722944501Smrg * callers, this can pull us back from doing the tree 184822944501Smrg * walk on every new batch emit. 184922944501Smrg */ 185022944501Smrg if (i == 0) { 185122944501Smrg drm_intel_bo_gem *bo_gem = 185222944501Smrg (drm_intel_bo_gem *) bo_array[i]; 185322944501Smrg bo_gem->reloc_tree_size = total; 185422944501Smrg } 185522944501Smrg } 185622944501Smrg 185722944501Smrg for (i = 0; i < count; i++) 185822944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 185922944501Smrg return total; 186022944501Smrg} 186122944501Smrg 186222944501Smrg/** 186322944501Smrg * Return -1 if the batchbuffer should be flushed before attempting to 186422944501Smrg * emit rendering referencing the buffers pointed to by bo_array. 186522944501Smrg * 186622944501Smrg * This is required because if we try to emit a batchbuffer with relocations 186722944501Smrg * to a tree of buffers that won't simultaneously fit in the aperture, 186822944501Smrg * the rendering will return an error at a point where the software is not 186922944501Smrg * prepared to recover from it. 187022944501Smrg * 187122944501Smrg * However, we also want to emit the batchbuffer significantly before we reach 187222944501Smrg * the limit, as a series of batchbuffers each of which references buffers 187322944501Smrg * covering almost all of the aperture means that at each emit we end up 187422944501Smrg * waiting to evict a buffer from the last rendering, and we get synchronous 187522944501Smrg * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 187622944501Smrg * get better parallelism. 187722944501Smrg */ 187822944501Smrgstatic int 187922944501Smrgdrm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 188022944501Smrg{ 188122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 188222944501Smrg (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 188322944501Smrg unsigned int total = 0; 188422944501Smrg unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 188522944501Smrg int total_fences; 188622944501Smrg 188722944501Smrg /* Check for fence reg constraints if necessary */ 188822944501Smrg if (bufmgr_gem->available_fences) { 188922944501Smrg total_fences = drm_intel_gem_total_fences(bo_array, count); 189022944501Smrg if (total_fences > bufmgr_gem->available_fences) 189122944501Smrg return -ENOSPC; 189222944501Smrg } 189322944501Smrg 189422944501Smrg total = drm_intel_gem_estimate_batch_space(bo_array, count); 189522944501Smrg 189622944501Smrg if (total > threshold) 189722944501Smrg total = drm_intel_gem_compute_batch_space(bo_array, count); 189822944501Smrg 189922944501Smrg if (total > threshold) { 190022944501Smrg DBG("check_space: overflowed available aperture, " 190122944501Smrg "%dkb vs %dkb\n", 190222944501Smrg total / 1024, (int)bufmgr_gem->gtt_size / 1024); 190322944501Smrg return -ENOSPC; 190422944501Smrg } else { 190522944501Smrg DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 190622944501Smrg (int)bufmgr_gem->gtt_size / 1024); 190722944501Smrg return 0; 190822944501Smrg } 190922944501Smrg} 191022944501Smrg 191122944501Smrg/* 191222944501Smrg * Disable buffer reuse for objects which are shared with the kernel 191322944501Smrg * as scanout buffers 191422944501Smrg */ 191522944501Smrgstatic int 191622944501Smrgdrm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 191722944501Smrg{ 191822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 191922944501Smrg 192022944501Smrg bo_gem->reusable = 0; 192122944501Smrg return 0; 192222944501Smrg} 192322944501Smrg 192422944501Smrgstatic int 192522944501Smrg_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 192622944501Smrg{ 192722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 192822944501Smrg int i; 192922944501Smrg 193022944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 193122944501Smrg if (bo_gem->reloc_target_info[i].bo == target_bo) 193222944501Smrg return 1; 193322944501Smrg if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 193422944501Smrg target_bo)) 193522944501Smrg return 1; 193622944501Smrg } 193722944501Smrg 193822944501Smrg return 0; 193922944501Smrg} 194022944501Smrg 194122944501Smrg/** Return true if target_bo is referenced by bo's relocation tree. */ 194222944501Smrgstatic int 194322944501Smrgdrm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 194422944501Smrg{ 194522944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 194622944501Smrg 194722944501Smrg if (bo == NULL || target_bo == NULL) 194822944501Smrg return 0; 194922944501Smrg if (target_bo_gem->used_as_reloc_target) 195022944501Smrg return _drm_intel_gem_bo_references(bo, target_bo); 195122944501Smrg return 0; 195222944501Smrg} 195322944501Smrg 195422944501Smrg/** 195522944501Smrg * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 195622944501Smrg * and manage map buffer objections. 195722944501Smrg * 195822944501Smrg * \param fd File descriptor of the opened DRM device. 195922944501Smrg */ 196022944501Smrgdrm_intel_bufmgr * 196122944501Smrgdrm_intel_bufmgr_gem_init(int fd, int batch_size) 196222944501Smrg{ 196322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem; 196422944501Smrg struct drm_i915_gem_get_aperture aperture; 196522944501Smrg drm_i915_getparam_t gp; 196622944501Smrg int ret, i; 196722944501Smrg unsigned long size; 196822944501Smrg int exec2 = 0; 196922944501Smrg 197022944501Smrg bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 197122944501Smrg if (bufmgr_gem == NULL) 197222944501Smrg return NULL; 197322944501Smrg 197422944501Smrg bufmgr_gem->fd = fd; 197522944501Smrg 197622944501Smrg if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 197722944501Smrg free(bufmgr_gem); 197822944501Smrg return NULL; 197922944501Smrg } 198022944501Smrg 198122944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 198222944501Smrg 198322944501Smrg if (ret == 0) 198422944501Smrg bufmgr_gem->gtt_size = aperture.aper_available_size; 198522944501Smrg else { 198622944501Smrg fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 198722944501Smrg strerror(errno)); 198822944501Smrg bufmgr_gem->gtt_size = 128 * 1024 * 1024; 198922944501Smrg fprintf(stderr, "Assuming %dkB available aperture size.\n" 199022944501Smrg "May lead to reduced performance or incorrect " 199122944501Smrg "rendering.\n", 199222944501Smrg (int)bufmgr_gem->gtt_size / 1024); 199322944501Smrg } 199422944501Smrg 199522944501Smrg gp.param = I915_PARAM_CHIPSET_ID; 199622944501Smrg gp.value = &bufmgr_gem->pci_device; 199722944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 199822944501Smrg if (ret) { 199922944501Smrg fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 200022944501Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 200122944501Smrg } 200222944501Smrg 200322944501Smrg if (IS_GEN2(bufmgr_gem)) 200422944501Smrg bufmgr_gem->gen = 2; 200522944501Smrg else if (IS_GEN3(bufmgr_gem)) 200622944501Smrg bufmgr_gem->gen = 3; 200722944501Smrg else if (IS_GEN4(bufmgr_gem)) 200822944501Smrg bufmgr_gem->gen = 4; 200922944501Smrg else 201022944501Smrg bufmgr_gem->gen = 6; 201122944501Smrg 201222944501Smrg gp.param = I915_PARAM_HAS_EXECBUF2; 201322944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 201422944501Smrg if (!ret) 201522944501Smrg exec2 = 1; 201622944501Smrg 201722944501Smrg if (bufmgr_gem->gen < 4) { 201822944501Smrg gp.param = I915_PARAM_NUM_FENCES_AVAIL; 201922944501Smrg gp.value = &bufmgr_gem->available_fences; 202022944501Smrg ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 202122944501Smrg if (ret) { 202222944501Smrg fprintf(stderr, "get fences failed: %d [%d]\n", ret, 202322944501Smrg errno); 202422944501Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, 202522944501Smrg *gp.value); 202622944501Smrg bufmgr_gem->available_fences = 0; 202722944501Smrg } else { 202822944501Smrg /* XXX The kernel reports the total number of fences, 202922944501Smrg * including any that may be pinned. 203022944501Smrg * 203122944501Smrg * We presume that there will be at least one pinned 203222944501Smrg * fence for the scanout buffer, but there may be more 203322944501Smrg * than one scanout and the user may be manually 203422944501Smrg * pinning buffers. Let's move to execbuffer2 and 203522944501Smrg * thereby forget the insanity of using fences... 203622944501Smrg */ 203722944501Smrg bufmgr_gem->available_fences -= 2; 203822944501Smrg if (bufmgr_gem->available_fences < 0) 203922944501Smrg bufmgr_gem->available_fences = 0; 204022944501Smrg } 204122944501Smrg } 204222944501Smrg 204322944501Smrg /* Let's go with one relocation per every 2 dwords (but round down a bit 204422944501Smrg * since a power of two will mean an extra page allocation for the reloc 204522944501Smrg * buffer). 204622944501Smrg * 204722944501Smrg * Every 4 was too few for the blender benchmark. 204822944501Smrg */ 204922944501Smrg bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 205022944501Smrg 205122944501Smrg bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 205222944501Smrg bufmgr_gem->bufmgr.bo_alloc_for_render = 205322944501Smrg drm_intel_gem_bo_alloc_for_render; 205422944501Smrg bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 205522944501Smrg bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 205622944501Smrg bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 205722944501Smrg bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 205822944501Smrg bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 205922944501Smrg bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 206022944501Smrg bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 206122944501Smrg bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 206222944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 206322944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 206422944501Smrg bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 206522944501Smrg bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 206622944501Smrg bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 206722944501Smrg bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 206822944501Smrg bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 206922944501Smrg /* Use the new one if available */ 207022944501Smrg if (exec2) 207122944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 207222944501Smrg else 207322944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 207422944501Smrg bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 207522944501Smrg bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 207622944501Smrg bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 207722944501Smrg bufmgr_gem->bufmgr.debug = 0; 207822944501Smrg bufmgr_gem->bufmgr.check_aperture_space = 207922944501Smrg drm_intel_gem_check_aperture_space; 208022944501Smrg bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 208122944501Smrg bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 208222944501Smrg drm_intel_gem_get_pipe_from_crtc_id; 208322944501Smrg bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 208422944501Smrg 208522944501Smrg /* Initialize the linked lists for BO reuse cache. */ 208622944501Smrg for (i = 0, size = 4096; i < DRM_INTEL_GEM_BO_BUCKETS; i++, size *= 2) { 208722944501Smrg DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 208822944501Smrg bufmgr_gem->cache_bucket[i].size = size; 208922944501Smrg } 209022944501Smrg 209122944501Smrg return &bufmgr_gem->bufmgr; 209222944501Smrg} 2093