intel_bufmgr_gem.c revision 424e9256
122944501Smrg/************************************************************************** 222944501Smrg * 322944501Smrg * Copyright � 2007 Red Hat Inc. 420131375Smrg * Copyright � 2007-2012 Intel Corporation 522944501Smrg * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 622944501Smrg * All Rights Reserved. 722944501Smrg * 822944501Smrg * Permission is hereby granted, free of charge, to any person obtaining a 922944501Smrg * copy of this software and associated documentation files (the 1022944501Smrg * "Software"), to deal in the Software without restriction, including 1122944501Smrg * without limitation the rights to use, copy, modify, merge, publish, 1222944501Smrg * distribute, sub license, and/or sell copies of the Software, and to 1322944501Smrg * permit persons to whom the Software is furnished to do so, subject to 1422944501Smrg * the following conditions: 1522944501Smrg * 1622944501Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1722944501Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1822944501Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 1922944501Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 2022944501Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 2122944501Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2222944501Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2322944501Smrg * 2422944501Smrg * The above copyright notice and this permission notice (including the 2522944501Smrg * next paragraph) shall be included in all copies or substantial portions 2622944501Smrg * of the Software. 2722944501Smrg * 2822944501Smrg * 2922944501Smrg **************************************************************************/ 3022944501Smrg/* 3122944501Smrg * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 3222944501Smrg * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 3322944501Smrg * Eric Anholt <eric@anholt.net> 3422944501Smrg * Dave Airlie <airlied@linux.ie> 3522944501Smrg */ 3622944501Smrg 3722944501Smrg#ifdef HAVE_CONFIG_H 3822944501Smrg#include "config.h" 3922944501Smrg#endif 4022944501Smrg 4122944501Smrg#include <xf86drm.h> 4222944501Smrg#include <xf86atomic.h> 4322944501Smrg#include <fcntl.h> 4422944501Smrg#include <stdio.h> 4522944501Smrg#include <stdlib.h> 4622944501Smrg#include <string.h> 4722944501Smrg#include <unistd.h> 4822944501Smrg#include <assert.h> 4922944501Smrg#include <pthread.h> 502e6867f6Smrg#include <stddef.h> 5122944501Smrg#include <sys/ioctl.h> 5222944501Smrg#include <sys/stat.h> 5322944501Smrg#include <sys/types.h> 5420131375Smrg#include <stdbool.h> 5522944501Smrg 5622944501Smrg#include "errno.h" 5720131375Smrg#ifndef ETIME 5820131375Smrg#define ETIME ETIMEDOUT 5920131375Smrg#endif 60424e9256Smrg#include "libdrm_macros.h" 6122944501Smrg#include "libdrm_lists.h" 6222944501Smrg#include "intel_bufmgr.h" 6322944501Smrg#include "intel_bufmgr_priv.h" 6422944501Smrg#include "intel_chipset.h" 6520131375Smrg#include "intel_aub.h" 6622944501Smrg#include "string.h" 6722944501Smrg 6822944501Smrg#include "i915_drm.h" 6922944501Smrg 7020131375Smrg#ifdef HAVE_VALGRIND 7120131375Smrg#include <valgrind.h> 7220131375Smrg#include <memcheck.h> 7320131375Smrg#define VG(x) x 7420131375Smrg#else 7520131375Smrg#define VG(x) 7620131375Smrg#endif 7720131375Smrg 78424e9256Smrg#define memclear(s) memset(&s, 0, sizeof(s)) 7920131375Smrg 8022944501Smrg#define DBG(...) do { \ 8122944501Smrg if (bufmgr_gem->bufmgr.debug) \ 8222944501Smrg fprintf(stderr, __VA_ARGS__); \ 8322944501Smrg} while (0) 8422944501Smrg 85aaba2545Smrg#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 86aaba2545Smrg 8722944501Smrgtypedef struct _drm_intel_bo_gem drm_intel_bo_gem; 8822944501Smrg 8922944501Smrgstruct drm_intel_gem_bo_bucket { 9022944501Smrg drmMMListHead head; 9122944501Smrg unsigned long size; 9222944501Smrg}; 9322944501Smrg 9422944501Smrgtypedef struct _drm_intel_bufmgr_gem { 9522944501Smrg drm_intel_bufmgr bufmgr; 9622944501Smrg 97a884aba1Smrg atomic_t refcount; 98a884aba1Smrg 9922944501Smrg int fd; 10022944501Smrg 10122944501Smrg int max_relocs; 10222944501Smrg 10322944501Smrg pthread_mutex_t lock; 10422944501Smrg 10522944501Smrg struct drm_i915_gem_exec_object *exec_objects; 10622944501Smrg struct drm_i915_gem_exec_object2 *exec2_objects; 10722944501Smrg drm_intel_bo **exec_bos; 10822944501Smrg int exec_size; 10922944501Smrg int exec_count; 11022944501Smrg 11122944501Smrg /** Array of lists of cached gem objects of power-of-two sizes */ 112aaba2545Smrg struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 113aaba2545Smrg int num_buckets; 1146d98c517Smrg time_t time; 11522944501Smrg 116a884aba1Smrg drmMMListHead managers; 117a884aba1Smrg 11820131375Smrg drmMMListHead named; 11920131375Smrg drmMMListHead vma_cache; 12020131375Smrg int vma_count, vma_open, vma_max; 12120131375Smrg 12222944501Smrg uint64_t gtt_size; 12322944501Smrg int available_fences; 12422944501Smrg int pci_device; 12522944501Smrg int gen; 1269ce4edccSmrg unsigned int has_bsd : 1; 1279ce4edccSmrg unsigned int has_blt : 1; 1289ce4edccSmrg unsigned int has_relaxed_fencing : 1; 12920131375Smrg unsigned int has_llc : 1; 13020131375Smrg unsigned int has_wait_timeout : 1; 1319ce4edccSmrg unsigned int bo_reuse : 1; 13220131375Smrg unsigned int no_exec : 1; 13320131375Smrg unsigned int has_vebox : 1; 13420131375Smrg bool fenced_relocs; 13520131375Smrg 136424e9256Smrg struct { 137424e9256Smrg void *ptr; 138424e9256Smrg uint32_t handle; 139424e9256Smrg } userptr_active; 140424e9256Smrg 14120131375Smrg char *aub_filename; 14220131375Smrg FILE *aub_file; 14320131375Smrg uint32_t aub_offset; 14422944501Smrg} drm_intel_bufmgr_gem; 14522944501Smrg 14622944501Smrg#define DRM_INTEL_RELOC_FENCE (1<<0) 14722944501Smrg 14822944501Smrgtypedef struct _drm_intel_reloc_target_info { 14922944501Smrg drm_intel_bo *bo; 15022944501Smrg int flags; 15122944501Smrg} drm_intel_reloc_target; 15222944501Smrg 15322944501Smrgstruct _drm_intel_bo_gem { 15422944501Smrg drm_intel_bo bo; 15522944501Smrg 15622944501Smrg atomic_t refcount; 15722944501Smrg uint32_t gem_handle; 15822944501Smrg const char *name; 15922944501Smrg 16022944501Smrg /** 16122944501Smrg * Kenel-assigned global name for this object 16220131375Smrg * 16320131375Smrg * List contains both flink named and prime fd'd objects 16422944501Smrg */ 16522944501Smrg unsigned int global_name; 16620131375Smrg drmMMListHead name_list; 16722944501Smrg 16822944501Smrg /** 16922944501Smrg * Index of the buffer within the validation list while preparing a 17022944501Smrg * batchbuffer execution. 17122944501Smrg */ 17222944501Smrg int validate_index; 17322944501Smrg 17422944501Smrg /** 17522944501Smrg * Current tiling mode 17622944501Smrg */ 17722944501Smrg uint32_t tiling_mode; 17822944501Smrg uint32_t swizzle_mode; 1796d98c517Smrg unsigned long stride; 18022944501Smrg 18122944501Smrg time_t free_time; 18222944501Smrg 18322944501Smrg /** Array passed to the DRM containing relocation information. */ 18422944501Smrg struct drm_i915_gem_relocation_entry *relocs; 18522944501Smrg /** 18622944501Smrg * Array of info structs corresponding to relocs[i].target_handle etc 18722944501Smrg */ 18822944501Smrg drm_intel_reloc_target *reloc_target_info; 18922944501Smrg /** Number of entries in relocs */ 19022944501Smrg int reloc_count; 19122944501Smrg /** Mapped address for the buffer, saved across map/unmap cycles */ 19222944501Smrg void *mem_virtual; 19322944501Smrg /** GTT virtual address for the buffer, saved across map/unmap cycles */ 19422944501Smrg void *gtt_virtual; 195a884aba1Smrg /** 196a884aba1Smrg * Virtual address of the buffer allocated by user, used for userptr 197a884aba1Smrg * objects only. 198a884aba1Smrg */ 199a884aba1Smrg void *user_virtual; 20020131375Smrg int map_count; 20120131375Smrg drmMMListHead vma_list; 20222944501Smrg 20322944501Smrg /** BO cache list */ 20422944501Smrg drmMMListHead head; 20522944501Smrg 20622944501Smrg /** 20722944501Smrg * Boolean of whether this BO and its children have been included in 20822944501Smrg * the current drm_intel_bufmgr_check_aperture_space() total. 20922944501Smrg */ 21020131375Smrg bool included_in_check_aperture; 21122944501Smrg 21222944501Smrg /** 21322944501Smrg * Boolean of whether this buffer has been used as a relocation 21422944501Smrg * target and had its size accounted for, and thus can't have any 21522944501Smrg * further relocations added to it. 21622944501Smrg */ 21720131375Smrg bool used_as_reloc_target; 21822944501Smrg 21922944501Smrg /** 22022944501Smrg * Boolean of whether we have encountered an error whilst building the relocation tree. 22122944501Smrg */ 22220131375Smrg bool has_error; 22322944501Smrg 22422944501Smrg /** 22522944501Smrg * Boolean of whether this buffer can be re-used 22622944501Smrg */ 22720131375Smrg bool reusable; 22820131375Smrg 22920131375Smrg /** 23020131375Smrg * Boolean of whether the GPU is definitely not accessing the buffer. 23120131375Smrg * 23220131375Smrg * This is only valid when reusable, since non-reusable 23320131375Smrg * buffers are those that have been shared wth other 23420131375Smrg * processes, so we don't know their state. 23520131375Smrg */ 23620131375Smrg bool idle; 23722944501Smrg 238a884aba1Smrg /** 239a884aba1Smrg * Boolean of whether this buffer was allocated with userptr 240a884aba1Smrg */ 241a884aba1Smrg bool is_userptr; 242a884aba1Smrg 24322944501Smrg /** 24422944501Smrg * Size in bytes of this buffer and its relocation descendents. 24522944501Smrg * 24622944501Smrg * Used to avoid costly tree walking in 24722944501Smrg * drm_intel_bufmgr_check_aperture in the common case. 24822944501Smrg */ 24922944501Smrg int reloc_tree_size; 25022944501Smrg 25122944501Smrg /** 25222944501Smrg * Number of potential fence registers required by this buffer and its 25322944501Smrg * relocations. 25422944501Smrg */ 25522944501Smrg int reloc_tree_fences; 25620131375Smrg 25720131375Smrg /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ 25820131375Smrg bool mapped_cpu_write; 25920131375Smrg 26020131375Smrg uint32_t aub_offset; 26120131375Smrg 26220131375Smrg drm_intel_aub_annotation *aub_annotations; 26320131375Smrg unsigned aub_annotation_count; 26422944501Smrg}; 26522944501Smrg 26622944501Smrgstatic unsigned int 26722944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 26822944501Smrg 26922944501Smrgstatic unsigned int 27022944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 27122944501Smrg 27222944501Smrgstatic int 27322944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 27422944501Smrg uint32_t * swizzle_mode); 27522944501Smrg 27622944501Smrgstatic int 2776d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2786d98c517Smrg uint32_t tiling_mode, 2796d98c517Smrg uint32_t stride); 28022944501Smrg 28122944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 28222944501Smrg time_t time); 28322944501Smrg 28422944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 28522944501Smrg 28622944501Smrgstatic void drm_intel_gem_bo_free(drm_intel_bo *bo); 28722944501Smrg 28822944501Smrgstatic unsigned long 28922944501Smrgdrm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 29022944501Smrg uint32_t *tiling_mode) 29122944501Smrg{ 29222944501Smrg unsigned long min_size, max_size; 29322944501Smrg unsigned long i; 29422944501Smrg 29522944501Smrg if (*tiling_mode == I915_TILING_NONE) 29622944501Smrg return size; 29722944501Smrg 29822944501Smrg /* 965+ just need multiples of page size for tiling */ 29922944501Smrg if (bufmgr_gem->gen >= 4) 30022944501Smrg return ROUND_UP_TO(size, 4096); 30122944501Smrg 30222944501Smrg /* Older chips need powers of two, of at least 512k or 1M */ 30322944501Smrg if (bufmgr_gem->gen == 3) { 30422944501Smrg min_size = 1024*1024; 30522944501Smrg max_size = 128*1024*1024; 30622944501Smrg } else { 30722944501Smrg min_size = 512*1024; 30822944501Smrg max_size = 64*1024*1024; 30922944501Smrg } 31022944501Smrg 31122944501Smrg if (size > max_size) { 31222944501Smrg *tiling_mode = I915_TILING_NONE; 31322944501Smrg return size; 31422944501Smrg } 31522944501Smrg 3169ce4edccSmrg /* Do we need to allocate every page for the fence? */ 3179ce4edccSmrg if (bufmgr_gem->has_relaxed_fencing) 3189ce4edccSmrg return ROUND_UP_TO(size, 4096); 3199ce4edccSmrg 32022944501Smrg for (i = min_size; i < size; i <<= 1) 32122944501Smrg ; 32222944501Smrg 32322944501Smrg return i; 32422944501Smrg} 32522944501Smrg 32622944501Smrg/* 32722944501Smrg * Round a given pitch up to the minimum required for X tiling on a 32822944501Smrg * given chip. We use 512 as the minimum to allow for a later tiling 32922944501Smrg * change. 33022944501Smrg */ 33122944501Smrgstatic unsigned long 33222944501Smrgdrm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 3336d98c517Smrg unsigned long pitch, uint32_t *tiling_mode) 33422944501Smrg{ 33522944501Smrg unsigned long tile_width; 33622944501Smrg unsigned long i; 33722944501Smrg 33822944501Smrg /* If untiled, then just align it so that we can do rendering 33922944501Smrg * to it with the 3D engine. 34022944501Smrg */ 3416d98c517Smrg if (*tiling_mode == I915_TILING_NONE) 34222944501Smrg return ALIGN(pitch, 64); 34322944501Smrg 34420131375Smrg if (*tiling_mode == I915_TILING_X 34520131375Smrg || (IS_915(bufmgr_gem->pci_device) 34620131375Smrg && *tiling_mode == I915_TILING_Y)) 34722944501Smrg tile_width = 512; 34822944501Smrg else 34922944501Smrg tile_width = 128; 35022944501Smrg 35122944501Smrg /* 965 is flexible */ 35222944501Smrg if (bufmgr_gem->gen >= 4) 35322944501Smrg return ROUND_UP_TO(pitch, tile_width); 35422944501Smrg 3556d98c517Smrg /* The older hardware has a maximum pitch of 8192 with tiled 3566d98c517Smrg * surfaces, so fallback to untiled if it's too large. 3576d98c517Smrg */ 3586d98c517Smrg if (pitch > 8192) { 3596d98c517Smrg *tiling_mode = I915_TILING_NONE; 3606d98c517Smrg return ALIGN(pitch, 64); 3616d98c517Smrg } 3626d98c517Smrg 36322944501Smrg /* Pre-965 needs power of two tile width */ 36422944501Smrg for (i = tile_width; i < pitch; i <<= 1) 36522944501Smrg ; 36622944501Smrg 36722944501Smrg return i; 36822944501Smrg} 36922944501Smrg 37022944501Smrgstatic struct drm_intel_gem_bo_bucket * 37122944501Smrgdrm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 37222944501Smrg unsigned long size) 37322944501Smrg{ 37422944501Smrg int i; 37522944501Smrg 376aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 37722944501Smrg struct drm_intel_gem_bo_bucket *bucket = 37822944501Smrg &bufmgr_gem->cache_bucket[i]; 37922944501Smrg if (bucket->size >= size) { 38022944501Smrg return bucket; 38122944501Smrg } 38222944501Smrg } 38322944501Smrg 38422944501Smrg return NULL; 38522944501Smrg} 38622944501Smrg 38722944501Smrgstatic void 38822944501Smrgdrm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 38922944501Smrg{ 39022944501Smrg int i, j; 39122944501Smrg 39222944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 39322944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 39422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 39522944501Smrg 39622944501Smrg if (bo_gem->relocs == NULL) { 39722944501Smrg DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 39822944501Smrg bo_gem->name); 39922944501Smrg continue; 40022944501Smrg } 40122944501Smrg 40222944501Smrg for (j = 0; j < bo_gem->reloc_count; j++) { 40322944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 40422944501Smrg drm_intel_bo_gem *target_gem = 40522944501Smrg (drm_intel_bo_gem *) target_bo; 40622944501Smrg 40722944501Smrg DBG("%2d: %d (%s)@0x%08llx -> " 408d82d45b3Sjoerg "%d (%s)@0x%08llx + 0x%08x\n", 40922944501Smrg i, 41022944501Smrg bo_gem->gem_handle, bo_gem->name, 41122944501Smrg (unsigned long long)bo_gem->relocs[j].offset, 41222944501Smrg target_gem->gem_handle, 41322944501Smrg target_gem->name, 414d82d45b3Sjoerg (unsigned long long)target_bo->offset64, 41522944501Smrg bo_gem->relocs[j].delta); 41622944501Smrg } 41722944501Smrg } 41822944501Smrg} 41922944501Smrg 42022944501Smrgstatic inline void 42122944501Smrgdrm_intel_gem_bo_reference(drm_intel_bo *bo) 42222944501Smrg{ 42322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 42422944501Smrg 42522944501Smrg atomic_inc(&bo_gem->refcount); 42622944501Smrg} 42722944501Smrg 42822944501Smrg/** 42922944501Smrg * Adds the given buffer to the list of buffers to be validated (moved into the 43022944501Smrg * appropriate memory type) with the next batch submission. 43122944501Smrg * 43222944501Smrg * If a buffer is validated multiple times in a batch submission, it ends up 43322944501Smrg * with the intersection of the memory type flags and the union of the 43422944501Smrg * access flags. 43522944501Smrg */ 43622944501Smrgstatic void 43722944501Smrgdrm_intel_add_validate_buffer(drm_intel_bo *bo) 43822944501Smrg{ 43922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 44022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 44122944501Smrg int index; 44222944501Smrg 44322944501Smrg if (bo_gem->validate_index != -1) 44422944501Smrg return; 44522944501Smrg 44622944501Smrg /* Extend the array of validation entries as necessary. */ 44722944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 44822944501Smrg int new_size = bufmgr_gem->exec_size * 2; 44922944501Smrg 45022944501Smrg if (new_size == 0) 45122944501Smrg new_size = 5; 45222944501Smrg 45322944501Smrg bufmgr_gem->exec_objects = 45422944501Smrg realloc(bufmgr_gem->exec_objects, 45522944501Smrg sizeof(*bufmgr_gem->exec_objects) * new_size); 45622944501Smrg bufmgr_gem->exec_bos = 45722944501Smrg realloc(bufmgr_gem->exec_bos, 45822944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 45922944501Smrg bufmgr_gem->exec_size = new_size; 46022944501Smrg } 46122944501Smrg 46222944501Smrg index = bufmgr_gem->exec_count; 46322944501Smrg bo_gem->validate_index = index; 46422944501Smrg /* Fill in array entry */ 46522944501Smrg bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 46622944501Smrg bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 46722944501Smrg bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 46822944501Smrg bufmgr_gem->exec_objects[index].alignment = 0; 46922944501Smrg bufmgr_gem->exec_objects[index].offset = 0; 47022944501Smrg bufmgr_gem->exec_bos[index] = bo; 47122944501Smrg bufmgr_gem->exec_count++; 47222944501Smrg} 47322944501Smrg 47422944501Smrgstatic void 47522944501Smrgdrm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 47622944501Smrg{ 47722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 47822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 47922944501Smrg int index; 48022944501Smrg 48122944501Smrg if (bo_gem->validate_index != -1) { 48222944501Smrg if (need_fence) 48322944501Smrg bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 48422944501Smrg EXEC_OBJECT_NEEDS_FENCE; 48522944501Smrg return; 48622944501Smrg } 48722944501Smrg 48822944501Smrg /* Extend the array of validation entries as necessary. */ 48922944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 49022944501Smrg int new_size = bufmgr_gem->exec_size * 2; 49122944501Smrg 49222944501Smrg if (new_size == 0) 49322944501Smrg new_size = 5; 49422944501Smrg 49522944501Smrg bufmgr_gem->exec2_objects = 49622944501Smrg realloc(bufmgr_gem->exec2_objects, 49722944501Smrg sizeof(*bufmgr_gem->exec2_objects) * new_size); 49822944501Smrg bufmgr_gem->exec_bos = 49922944501Smrg realloc(bufmgr_gem->exec_bos, 50022944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 50122944501Smrg bufmgr_gem->exec_size = new_size; 50222944501Smrg } 50322944501Smrg 50422944501Smrg index = bufmgr_gem->exec_count; 50522944501Smrg bo_gem->validate_index = index; 50622944501Smrg /* Fill in array entry */ 50722944501Smrg bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 50822944501Smrg bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 50922944501Smrg bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 51022944501Smrg bufmgr_gem->exec2_objects[index].alignment = 0; 51122944501Smrg bufmgr_gem->exec2_objects[index].offset = 0; 51222944501Smrg bufmgr_gem->exec_bos[index] = bo; 51322944501Smrg bufmgr_gem->exec2_objects[index].flags = 0; 51422944501Smrg bufmgr_gem->exec2_objects[index].rsvd1 = 0; 51522944501Smrg bufmgr_gem->exec2_objects[index].rsvd2 = 0; 51622944501Smrg if (need_fence) { 51722944501Smrg bufmgr_gem->exec2_objects[index].flags |= 51822944501Smrg EXEC_OBJECT_NEEDS_FENCE; 51922944501Smrg } 52022944501Smrg bufmgr_gem->exec_count++; 52122944501Smrg} 52222944501Smrg 52322944501Smrg#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 52422944501Smrg sizeof(uint32_t)) 52522944501Smrg 52622944501Smrgstatic void 52722944501Smrgdrm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 52822944501Smrg drm_intel_bo_gem *bo_gem) 52922944501Smrg{ 53022944501Smrg int size; 53122944501Smrg 53222944501Smrg assert(!bo_gem->used_as_reloc_target); 53322944501Smrg 53422944501Smrg /* The older chipsets are far-less flexible in terms of tiling, 53522944501Smrg * and require tiled buffer to be size aligned in the aperture. 53622944501Smrg * This means that in the worst possible case we will need a hole 53722944501Smrg * twice as large as the object in order for it to fit into the 53822944501Smrg * aperture. Optimal packing is for wimps. 53922944501Smrg */ 54022944501Smrg size = bo_gem->bo.size; 5419ce4edccSmrg if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 5429ce4edccSmrg int min_size; 5439ce4edccSmrg 5449ce4edccSmrg if (bufmgr_gem->has_relaxed_fencing) { 5459ce4edccSmrg if (bufmgr_gem->gen == 3) 5469ce4edccSmrg min_size = 1024*1024; 5479ce4edccSmrg else 5489ce4edccSmrg min_size = 512*1024; 5499ce4edccSmrg 5509ce4edccSmrg while (min_size < size) 5519ce4edccSmrg min_size *= 2; 5529ce4edccSmrg } else 5539ce4edccSmrg min_size = size; 5549ce4edccSmrg 5559ce4edccSmrg /* Account for worst-case alignment. */ 5569ce4edccSmrg size = 2 * min_size; 5579ce4edccSmrg } 55822944501Smrg 55922944501Smrg bo_gem->reloc_tree_size = size; 56022944501Smrg} 56122944501Smrg 56222944501Smrgstatic int 56322944501Smrgdrm_intel_setup_reloc_list(drm_intel_bo *bo) 56422944501Smrg{ 56522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 56622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 56722944501Smrg unsigned int max_relocs = bufmgr_gem->max_relocs; 56822944501Smrg 56922944501Smrg if (bo->size / 4 < max_relocs) 57022944501Smrg max_relocs = bo->size / 4; 57122944501Smrg 57222944501Smrg bo_gem->relocs = malloc(max_relocs * 57322944501Smrg sizeof(struct drm_i915_gem_relocation_entry)); 57422944501Smrg bo_gem->reloc_target_info = malloc(max_relocs * 575aaba2545Smrg sizeof(drm_intel_reloc_target)); 57622944501Smrg if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 57720131375Smrg bo_gem->has_error = true; 57822944501Smrg 57922944501Smrg free (bo_gem->relocs); 58022944501Smrg bo_gem->relocs = NULL; 58122944501Smrg 58222944501Smrg free (bo_gem->reloc_target_info); 58322944501Smrg bo_gem->reloc_target_info = NULL; 58422944501Smrg 58522944501Smrg return 1; 58622944501Smrg } 58722944501Smrg 58822944501Smrg return 0; 58922944501Smrg} 59022944501Smrg 59122944501Smrgstatic int 59222944501Smrgdrm_intel_gem_bo_busy(drm_intel_bo *bo) 59322944501Smrg{ 59422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 59522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 59622944501Smrg struct drm_i915_gem_busy busy; 59722944501Smrg int ret; 59822944501Smrg 59920131375Smrg if (bo_gem->reusable && bo_gem->idle) 60020131375Smrg return false; 60120131375Smrg 602424e9256Smrg memclear(busy); 60322944501Smrg busy.handle = bo_gem->gem_handle; 60422944501Smrg 6056d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 60620131375Smrg if (ret == 0) { 60720131375Smrg bo_gem->idle = !busy.busy; 60820131375Smrg return busy.busy; 60920131375Smrg } else { 61020131375Smrg return false; 61120131375Smrg } 61222944501Smrg return (ret == 0 && busy.busy); 61322944501Smrg} 61422944501Smrg 61522944501Smrgstatic int 61622944501Smrgdrm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 61722944501Smrg drm_intel_bo_gem *bo_gem, int state) 61822944501Smrg{ 61922944501Smrg struct drm_i915_gem_madvise madv; 62022944501Smrg 621424e9256Smrg memclear(madv); 62222944501Smrg madv.handle = bo_gem->gem_handle; 62322944501Smrg madv.madv = state; 62422944501Smrg madv.retained = 1; 6256d98c517Smrg drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 62622944501Smrg 62722944501Smrg return madv.retained; 62822944501Smrg} 62922944501Smrg 63022944501Smrgstatic int 63122944501Smrgdrm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 63222944501Smrg{ 63322944501Smrg return drm_intel_gem_bo_madvise_internal 63422944501Smrg ((drm_intel_bufmgr_gem *) bo->bufmgr, 63522944501Smrg (drm_intel_bo_gem *) bo, 63622944501Smrg madv); 63722944501Smrg} 63822944501Smrg 63922944501Smrg/* drop the oldest entries that have been purged by the kernel */ 64022944501Smrgstatic void 64122944501Smrgdrm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 64222944501Smrg struct drm_intel_gem_bo_bucket *bucket) 64322944501Smrg{ 64422944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 64522944501Smrg drm_intel_bo_gem *bo_gem; 64622944501Smrg 64722944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 64822944501Smrg bucket->head.next, head); 64922944501Smrg if (drm_intel_gem_bo_madvise_internal 65022944501Smrg (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 65122944501Smrg break; 65222944501Smrg 65322944501Smrg DRMLISTDEL(&bo_gem->head); 65422944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 65522944501Smrg } 65622944501Smrg} 65722944501Smrg 65822944501Smrgstatic drm_intel_bo * 65922944501Smrgdrm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 66022944501Smrg const char *name, 66122944501Smrg unsigned long size, 6626d98c517Smrg unsigned long flags, 6636d98c517Smrg uint32_t tiling_mode, 6646d98c517Smrg unsigned long stride) 66522944501Smrg{ 66622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 66722944501Smrg drm_intel_bo_gem *bo_gem; 66822944501Smrg unsigned int page_size = getpagesize(); 66922944501Smrg int ret; 67022944501Smrg struct drm_intel_gem_bo_bucket *bucket; 67120131375Smrg bool alloc_from_cache; 67222944501Smrg unsigned long bo_size; 67320131375Smrg bool for_render = false; 67422944501Smrg 67522944501Smrg if (flags & BO_ALLOC_FOR_RENDER) 67620131375Smrg for_render = true; 67722944501Smrg 67822944501Smrg /* Round the allocated size up to a power of two number of pages. */ 67922944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 68022944501Smrg 68122944501Smrg /* If we don't have caching at this size, don't actually round the 68222944501Smrg * allocation up. 68322944501Smrg */ 68422944501Smrg if (bucket == NULL) { 68522944501Smrg bo_size = size; 68622944501Smrg if (bo_size < page_size) 68722944501Smrg bo_size = page_size; 68822944501Smrg } else { 68922944501Smrg bo_size = bucket->size; 69022944501Smrg } 69122944501Smrg 69222944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 69322944501Smrg /* Get a buffer out of the cache if available */ 69422944501Smrgretry: 69520131375Smrg alloc_from_cache = false; 69622944501Smrg if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 69722944501Smrg if (for_render) { 69822944501Smrg /* Allocate new render-target BOs from the tail (MRU) 69922944501Smrg * of the list, as it will likely be hot in the GPU 70022944501Smrg * cache and in the aperture for us. 70122944501Smrg */ 70222944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 70322944501Smrg bucket->head.prev, head); 70422944501Smrg DRMLISTDEL(&bo_gem->head); 70520131375Smrg alloc_from_cache = true; 70622944501Smrg } else { 70722944501Smrg /* For non-render-target BOs (where we're probably 70822944501Smrg * going to map it first thing in order to fill it 70922944501Smrg * with data), check if the last BO in the cache is 71022944501Smrg * unbusy, and only reuse in that case. Otherwise, 71122944501Smrg * allocating a new buffer is probably faster than 71222944501Smrg * waiting for the GPU to finish. 71322944501Smrg */ 71422944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 71522944501Smrg bucket->head.next, head); 71622944501Smrg if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 71720131375Smrg alloc_from_cache = true; 71822944501Smrg DRMLISTDEL(&bo_gem->head); 71922944501Smrg } 72022944501Smrg } 72122944501Smrg 72222944501Smrg if (alloc_from_cache) { 72322944501Smrg if (!drm_intel_gem_bo_madvise_internal 72422944501Smrg (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 72522944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 72622944501Smrg drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 72722944501Smrg bucket); 72822944501Smrg goto retry; 72922944501Smrg } 7306d98c517Smrg 7316d98c517Smrg if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 7326d98c517Smrg tiling_mode, 7336d98c517Smrg stride)) { 7346d98c517Smrg drm_intel_gem_bo_free(&bo_gem->bo); 7356d98c517Smrg goto retry; 7366d98c517Smrg } 73722944501Smrg } 73822944501Smrg } 73922944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 74022944501Smrg 74122944501Smrg if (!alloc_from_cache) { 74222944501Smrg struct drm_i915_gem_create create; 74322944501Smrg 74422944501Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 74522944501Smrg if (!bo_gem) 74622944501Smrg return NULL; 74722944501Smrg 74822944501Smrg bo_gem->bo.size = bo_size; 74920131375Smrg 750424e9256Smrg memclear(create); 75122944501Smrg create.size = bo_size; 75222944501Smrg 7536d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 7546d98c517Smrg DRM_IOCTL_I915_GEM_CREATE, 7556d98c517Smrg &create); 75622944501Smrg bo_gem->gem_handle = create.handle; 75722944501Smrg bo_gem->bo.handle = bo_gem->gem_handle; 75822944501Smrg if (ret != 0) { 75922944501Smrg free(bo_gem); 76022944501Smrg return NULL; 76122944501Smrg } 76222944501Smrg bo_gem->bo.bufmgr = bufmgr; 7636d98c517Smrg 7646d98c517Smrg bo_gem->tiling_mode = I915_TILING_NONE; 7656d98c517Smrg bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 7666d98c517Smrg bo_gem->stride = 0; 7676d98c517Smrg 7683c748557Ssnj /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized 7693c748557Ssnj list (vma_list), so better set the list head here */ 7703c748557Ssnj DRMINITLISTHEAD(&bo_gem->name_list); 7713c748557Ssnj DRMINITLISTHEAD(&bo_gem->vma_list); 7726d98c517Smrg if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 7736d98c517Smrg tiling_mode, 7746d98c517Smrg stride)) { 7756d98c517Smrg drm_intel_gem_bo_free(&bo_gem->bo); 7766d98c517Smrg return NULL; 7776d98c517Smrg } 77822944501Smrg } 77922944501Smrg 78022944501Smrg bo_gem->name = name; 78122944501Smrg atomic_set(&bo_gem->refcount, 1); 78222944501Smrg bo_gem->validate_index = -1; 78322944501Smrg bo_gem->reloc_tree_fences = 0; 78420131375Smrg bo_gem->used_as_reloc_target = false; 78520131375Smrg bo_gem->has_error = false; 78620131375Smrg bo_gem->reusable = true; 78720131375Smrg bo_gem->aub_annotations = NULL; 78820131375Smrg bo_gem->aub_annotation_count = 0; 78922944501Smrg 79022944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 79122944501Smrg 79222944501Smrg DBG("bo_create: buf %d (%s) %ldb\n", 79322944501Smrg bo_gem->gem_handle, bo_gem->name, size); 79422944501Smrg 79522944501Smrg return &bo_gem->bo; 79622944501Smrg} 79722944501Smrg 79822944501Smrgstatic drm_intel_bo * 79922944501Smrgdrm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 80022944501Smrg const char *name, 80122944501Smrg unsigned long size, 80222944501Smrg unsigned int alignment) 80322944501Smrg{ 80422944501Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 8056d98c517Smrg BO_ALLOC_FOR_RENDER, 8066d98c517Smrg I915_TILING_NONE, 0); 80722944501Smrg} 80822944501Smrg 80922944501Smrgstatic drm_intel_bo * 81022944501Smrgdrm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 81122944501Smrg const char *name, 81222944501Smrg unsigned long size, 81322944501Smrg unsigned int alignment) 81422944501Smrg{ 8156d98c517Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 8166d98c517Smrg I915_TILING_NONE, 0); 81722944501Smrg} 81822944501Smrg 81922944501Smrgstatic drm_intel_bo * 82022944501Smrgdrm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 82122944501Smrg int x, int y, int cpp, uint32_t *tiling_mode, 82222944501Smrg unsigned long *pitch, unsigned long flags) 82322944501Smrg{ 82422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 825aaba2545Smrg unsigned long size, stride; 826aaba2545Smrg uint32_t tiling; 82722944501Smrg 828aaba2545Smrg do { 82920131375Smrg unsigned long aligned_y, height_alignment; 830aaba2545Smrg 831aaba2545Smrg tiling = *tiling_mode; 832aaba2545Smrg 833aaba2545Smrg /* If we're tiled, our allocations are in 8 or 32-row blocks, 834aaba2545Smrg * so failure to align our height means that we won't allocate 835aaba2545Smrg * enough pages. 836aaba2545Smrg * 837aaba2545Smrg * If we're untiled, we still have to align to 2 rows high 838aaba2545Smrg * because the data port accesses 2x2 blocks even if the 839aaba2545Smrg * bottom row isn't to be rendered, so failure to align means 840aaba2545Smrg * we could walk off the end of the GTT and fault. This is 841aaba2545Smrg * documented on 965, and may be the case on older chipsets 842aaba2545Smrg * too so we try to be careful. 843aaba2545Smrg */ 844aaba2545Smrg aligned_y = y; 84520131375Smrg height_alignment = 2; 84620131375Smrg 84720131375Smrg if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 84820131375Smrg height_alignment = 16; 84920131375Smrg else if (tiling == I915_TILING_X 85020131375Smrg || (IS_915(bufmgr_gem->pci_device) 85120131375Smrg && tiling == I915_TILING_Y)) 85220131375Smrg height_alignment = 8; 853aaba2545Smrg else if (tiling == I915_TILING_Y) 85420131375Smrg height_alignment = 32; 85520131375Smrg aligned_y = ALIGN(y, height_alignment); 856aaba2545Smrg 857aaba2545Smrg stride = x * cpp; 8586d98c517Smrg stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 859aaba2545Smrg size = stride * aligned_y; 860aaba2545Smrg size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 861aaba2545Smrg } while (*tiling_mode != tiling); 86222944501Smrg *pitch = stride; 86322944501Smrg 8646d98c517Smrg if (tiling == I915_TILING_NONE) 8656d98c517Smrg stride = 0; 8666d98c517Smrg 8676d98c517Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 8686d98c517Smrg tiling, stride); 86922944501Smrg} 87022944501Smrg 871a884aba1Smrgstatic drm_intel_bo * 872a884aba1Smrgdrm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 873a884aba1Smrg const char *name, 874a884aba1Smrg void *addr, 875a884aba1Smrg uint32_t tiling_mode, 876a884aba1Smrg uint32_t stride, 877a884aba1Smrg unsigned long size, 878a884aba1Smrg unsigned long flags) 879a884aba1Smrg{ 880a884aba1Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 881a884aba1Smrg drm_intel_bo_gem *bo_gem; 882a884aba1Smrg int ret; 883a884aba1Smrg struct drm_i915_gem_userptr userptr; 884a884aba1Smrg 885a884aba1Smrg /* Tiling with userptr surfaces is not supported 886a884aba1Smrg * on all hardware so refuse it for time being. 887a884aba1Smrg */ 888a884aba1Smrg if (tiling_mode != I915_TILING_NONE) 889a884aba1Smrg return NULL; 890a884aba1Smrg 891a884aba1Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 892a884aba1Smrg if (!bo_gem) 893a884aba1Smrg return NULL; 894a884aba1Smrg 895a884aba1Smrg bo_gem->bo.size = size; 896a884aba1Smrg 897424e9256Smrg memclear(userptr); 898a884aba1Smrg userptr.user_ptr = (__u64)((unsigned long)addr); 899a884aba1Smrg userptr.user_size = size; 900a884aba1Smrg userptr.flags = flags; 901a884aba1Smrg 902a884aba1Smrg ret = drmIoctl(bufmgr_gem->fd, 903a884aba1Smrg DRM_IOCTL_I915_GEM_USERPTR, 904a884aba1Smrg &userptr); 905a884aba1Smrg if (ret != 0) { 906a884aba1Smrg DBG("bo_create_userptr: " 907a884aba1Smrg "ioctl failed with user ptr %p size 0x%lx, " 908a884aba1Smrg "user flags 0x%lx\n", addr, size, flags); 909a884aba1Smrg free(bo_gem); 910a884aba1Smrg return NULL; 911a884aba1Smrg } 912a884aba1Smrg 913a884aba1Smrg bo_gem->gem_handle = userptr.handle; 914a884aba1Smrg bo_gem->bo.handle = bo_gem->gem_handle; 915a884aba1Smrg bo_gem->bo.bufmgr = bufmgr; 916a884aba1Smrg bo_gem->is_userptr = true; 917a884aba1Smrg bo_gem->bo.virtual = addr; 918a884aba1Smrg /* Save the address provided by user */ 919a884aba1Smrg bo_gem->user_virtual = addr; 920a884aba1Smrg bo_gem->tiling_mode = I915_TILING_NONE; 921a884aba1Smrg bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 922a884aba1Smrg bo_gem->stride = 0; 923a884aba1Smrg 924a884aba1Smrg DRMINITLISTHEAD(&bo_gem->name_list); 925a884aba1Smrg DRMINITLISTHEAD(&bo_gem->vma_list); 926a884aba1Smrg 927a884aba1Smrg bo_gem->name = name; 928a884aba1Smrg atomic_set(&bo_gem->refcount, 1); 929a884aba1Smrg bo_gem->validate_index = -1; 930a884aba1Smrg bo_gem->reloc_tree_fences = 0; 931a884aba1Smrg bo_gem->used_as_reloc_target = false; 932a884aba1Smrg bo_gem->has_error = false; 933a884aba1Smrg bo_gem->reusable = false; 934a884aba1Smrg 935a884aba1Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 936a884aba1Smrg 937a884aba1Smrg DBG("bo_create_userptr: " 938a884aba1Smrg "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", 939a884aba1Smrg addr, bo_gem->gem_handle, bo_gem->name, 940a884aba1Smrg size, stride, tiling_mode); 941a884aba1Smrg 942a884aba1Smrg return &bo_gem->bo; 943a884aba1Smrg} 944a884aba1Smrg 945424e9256Smrgstatic bool 946424e9256Smrghas_userptr(drm_intel_bufmgr_gem *bufmgr_gem) 947424e9256Smrg{ 948424e9256Smrg int ret; 949424e9256Smrg void *ptr; 950424e9256Smrg long pgsz; 951424e9256Smrg struct drm_i915_gem_userptr userptr; 952424e9256Smrg 953424e9256Smrg pgsz = sysconf(_SC_PAGESIZE); 954424e9256Smrg assert(pgsz > 0); 955424e9256Smrg 956424e9256Smrg ret = posix_memalign(&ptr, pgsz, pgsz); 957424e9256Smrg if (ret) { 958424e9256Smrg DBG("Failed to get a page (%ld) for userptr detection!\n", 959424e9256Smrg pgsz); 960424e9256Smrg return false; 961424e9256Smrg } 962424e9256Smrg 963424e9256Smrg memclear(userptr); 964424e9256Smrg userptr.user_ptr = (__u64)(unsigned long)ptr; 965424e9256Smrg userptr.user_size = pgsz; 966424e9256Smrg 967424e9256Smrgretry: 968424e9256Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); 969424e9256Smrg if (ret) { 970424e9256Smrg if (errno == ENODEV && userptr.flags == 0) { 971424e9256Smrg userptr.flags = I915_USERPTR_UNSYNCHRONIZED; 972424e9256Smrg goto retry; 973424e9256Smrg } 974424e9256Smrg free(ptr); 975424e9256Smrg return false; 976424e9256Smrg } 977424e9256Smrg 978424e9256Smrg /* We don't release the userptr bo here as we want to keep the 979424e9256Smrg * kernel mm tracking alive for our lifetime. The first time we 980424e9256Smrg * create a userptr object the kernel has to install a mmu_notifer 981424e9256Smrg * which is a heavyweight operation (e.g. it requires taking all 982424e9256Smrg * mm_locks and stop_machine()). 983424e9256Smrg */ 984424e9256Smrg 985424e9256Smrg bufmgr_gem->userptr_active.ptr = ptr; 986424e9256Smrg bufmgr_gem->userptr_active.handle = userptr.handle; 987424e9256Smrg 988424e9256Smrg return true; 989424e9256Smrg} 990424e9256Smrg 991424e9256Smrgstatic drm_intel_bo * 992424e9256Smrgcheck_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 993424e9256Smrg const char *name, 994424e9256Smrg void *addr, 995424e9256Smrg uint32_t tiling_mode, 996424e9256Smrg uint32_t stride, 997424e9256Smrg unsigned long size, 998424e9256Smrg unsigned long flags) 999424e9256Smrg{ 1000424e9256Smrg if (has_userptr((drm_intel_bufmgr_gem *)bufmgr)) 1001424e9256Smrg bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr; 1002424e9256Smrg else 1003424e9256Smrg bufmgr->bo_alloc_userptr = NULL; 1004424e9256Smrg 1005424e9256Smrg return drm_intel_bo_alloc_userptr(bufmgr, name, addr, 1006424e9256Smrg tiling_mode, stride, size, flags); 1007424e9256Smrg} 1008424e9256Smrg 100922944501Smrg/** 101022944501Smrg * Returns a drm_intel_bo wrapping the given buffer object handle. 101122944501Smrg * 101222944501Smrg * This can be used when one application needs to pass a buffer object 101322944501Smrg * to another. 101422944501Smrg */ 1015424e9256Smrgdrm_intel_bo * 101622944501Smrgdrm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 101722944501Smrg const char *name, 101822944501Smrg unsigned int handle) 101922944501Smrg{ 102022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 102122944501Smrg drm_intel_bo_gem *bo_gem; 102222944501Smrg int ret; 102322944501Smrg struct drm_gem_open open_arg; 102422944501Smrg struct drm_i915_gem_get_tiling get_tiling; 102520131375Smrg drmMMListHead *list; 102622944501Smrg 102720131375Smrg /* At the moment most applications only have a few named bo. 102820131375Smrg * For instance, in a DRI client only the render buffers passed 102920131375Smrg * between X and the client are named. And since X returns the 103020131375Smrg * alternating names for the front/back buffer a linear search 103120131375Smrg * provides a sufficiently fast match. 103220131375Smrg */ 1033a884aba1Smrg pthread_mutex_lock(&bufmgr_gem->lock); 103420131375Smrg for (list = bufmgr_gem->named.next; 103520131375Smrg list != &bufmgr_gem->named; 103620131375Smrg list = list->next) { 103720131375Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 103820131375Smrg if (bo_gem->global_name == handle) { 103920131375Smrg drm_intel_gem_bo_reference(&bo_gem->bo); 1040a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 104120131375Smrg return &bo_gem->bo; 104220131375Smrg } 104320131375Smrg } 104422944501Smrg 1045424e9256Smrg memclear(open_arg); 104622944501Smrg open_arg.name = handle; 10476d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 10486d98c517Smrg DRM_IOCTL_GEM_OPEN, 10496d98c517Smrg &open_arg); 105022944501Smrg if (ret != 0) { 10519ce4edccSmrg DBG("Couldn't reference %s handle 0x%08x: %s\n", 10529ce4edccSmrg name, handle, strerror(errno)); 1053a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 105422944501Smrg return NULL; 105522944501Smrg } 105620131375Smrg /* Now see if someone has used a prime handle to get this 105720131375Smrg * object from the kernel before by looking through the list 105820131375Smrg * again for a matching gem_handle 105920131375Smrg */ 106020131375Smrg for (list = bufmgr_gem->named.next; 106120131375Smrg list != &bufmgr_gem->named; 106220131375Smrg list = list->next) { 106320131375Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 106420131375Smrg if (bo_gem->gem_handle == open_arg.handle) { 106520131375Smrg drm_intel_gem_bo_reference(&bo_gem->bo); 1066a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 106720131375Smrg return &bo_gem->bo; 106820131375Smrg } 106920131375Smrg } 107020131375Smrg 107120131375Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 1072a884aba1Smrg if (!bo_gem) { 1073a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 107420131375Smrg return NULL; 1075a884aba1Smrg } 107620131375Smrg 107722944501Smrg bo_gem->bo.size = open_arg.size; 107822944501Smrg bo_gem->bo.offset = 0; 107920131375Smrg bo_gem->bo.offset64 = 0; 108022944501Smrg bo_gem->bo.virtual = NULL; 108122944501Smrg bo_gem->bo.bufmgr = bufmgr; 108222944501Smrg bo_gem->name = name; 108322944501Smrg atomic_set(&bo_gem->refcount, 1); 108422944501Smrg bo_gem->validate_index = -1; 108522944501Smrg bo_gem->gem_handle = open_arg.handle; 108620131375Smrg bo_gem->bo.handle = open_arg.handle; 108722944501Smrg bo_gem->global_name = handle; 108820131375Smrg bo_gem->reusable = false; 108922944501Smrg 1090424e9256Smrg memclear(get_tiling); 109122944501Smrg get_tiling.handle = bo_gem->gem_handle; 10926d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 10936d98c517Smrg DRM_IOCTL_I915_GEM_GET_TILING, 10946d98c517Smrg &get_tiling); 109522944501Smrg if (ret != 0) { 109622944501Smrg drm_intel_gem_bo_unreference(&bo_gem->bo); 1097a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 109822944501Smrg return NULL; 109922944501Smrg } 110022944501Smrg bo_gem->tiling_mode = get_tiling.tiling_mode; 110122944501Smrg bo_gem->swizzle_mode = get_tiling.swizzle_mode; 11026d98c517Smrg /* XXX stride is unknown */ 110322944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 110422944501Smrg 110520131375Smrg DRMINITLISTHEAD(&bo_gem->vma_list); 110620131375Smrg DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 1107a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 110822944501Smrg DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 110922944501Smrg 111022944501Smrg return &bo_gem->bo; 111122944501Smrg} 111222944501Smrg 111322944501Smrgstatic void 111422944501Smrgdrm_intel_gem_bo_free(drm_intel_bo *bo) 111522944501Smrg{ 111622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 111722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 111822944501Smrg struct drm_gem_close close; 111922944501Smrg int ret; 112022944501Smrg 112120131375Smrg DRMLISTDEL(&bo_gem->vma_list); 112220131375Smrg if (bo_gem->mem_virtual) { 112320131375Smrg VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 1124a884aba1Smrg drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 112520131375Smrg bufmgr_gem->vma_count--; 112620131375Smrg } 112720131375Smrg if (bo_gem->gtt_virtual) { 1128a884aba1Smrg drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 112920131375Smrg bufmgr_gem->vma_count--; 113020131375Smrg } 113122944501Smrg 113222944501Smrg /* Close this object */ 1133424e9256Smrg memclear(close); 113422944501Smrg close.handle = bo_gem->gem_handle; 11356d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 113622944501Smrg if (ret != 0) { 11379ce4edccSmrg DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 11389ce4edccSmrg bo_gem->gem_handle, bo_gem->name, strerror(errno)); 113922944501Smrg } 114020131375Smrg free(bo_gem->aub_annotations); 114122944501Smrg free(bo); 114222944501Smrg} 114322944501Smrg 114420131375Smrgstatic void 114520131375Smrgdrm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 114620131375Smrg{ 114720131375Smrg#if HAVE_VALGRIND 114820131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 114920131375Smrg 115020131375Smrg if (bo_gem->mem_virtual) 115120131375Smrg VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 115220131375Smrg 115320131375Smrg if (bo_gem->gtt_virtual) 115420131375Smrg VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 115520131375Smrg#endif 115620131375Smrg} 115720131375Smrg 115822944501Smrg/** Frees all cached buffers significantly older than @time. */ 115922944501Smrgstatic void 116022944501Smrgdrm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 116122944501Smrg{ 116222944501Smrg int i; 116322944501Smrg 11646d98c517Smrg if (bufmgr_gem->time == time) 11656d98c517Smrg return; 11666d98c517Smrg 1167aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 116822944501Smrg struct drm_intel_gem_bo_bucket *bucket = 116922944501Smrg &bufmgr_gem->cache_bucket[i]; 117022944501Smrg 117122944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 117222944501Smrg drm_intel_bo_gem *bo_gem; 117322944501Smrg 117422944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 117522944501Smrg bucket->head.next, head); 117622944501Smrg if (time - bo_gem->free_time <= 1) 117722944501Smrg break; 117822944501Smrg 117922944501Smrg DRMLISTDEL(&bo_gem->head); 118022944501Smrg 118122944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 118222944501Smrg } 118322944501Smrg } 11846d98c517Smrg 11856d98c517Smrg bufmgr_gem->time = time; 118622944501Smrg} 118722944501Smrg 118820131375Smrgstatic void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 118920131375Smrg{ 119020131375Smrg int limit; 119120131375Smrg 119220131375Smrg DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 119320131375Smrg bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 119420131375Smrg 119520131375Smrg if (bufmgr_gem->vma_max < 0) 119620131375Smrg return; 119720131375Smrg 119820131375Smrg /* We may need to evict a few entries in order to create new mmaps */ 119920131375Smrg limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 120020131375Smrg if (limit < 0) 120120131375Smrg limit = 0; 120220131375Smrg 120320131375Smrg while (bufmgr_gem->vma_count > limit) { 120420131375Smrg drm_intel_bo_gem *bo_gem; 120520131375Smrg 120620131375Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 120720131375Smrg bufmgr_gem->vma_cache.next, 120820131375Smrg vma_list); 120920131375Smrg assert(bo_gem->map_count == 0); 121020131375Smrg DRMLISTDELINIT(&bo_gem->vma_list); 121120131375Smrg 121220131375Smrg if (bo_gem->mem_virtual) { 1213a884aba1Smrg drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 121420131375Smrg bo_gem->mem_virtual = NULL; 121520131375Smrg bufmgr_gem->vma_count--; 121620131375Smrg } 121720131375Smrg if (bo_gem->gtt_virtual) { 1218a884aba1Smrg drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 121920131375Smrg bo_gem->gtt_virtual = NULL; 122020131375Smrg bufmgr_gem->vma_count--; 122120131375Smrg } 122220131375Smrg } 122320131375Smrg} 122420131375Smrg 122520131375Smrgstatic void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 122620131375Smrg drm_intel_bo_gem *bo_gem) 122720131375Smrg{ 122820131375Smrg bufmgr_gem->vma_open--; 122920131375Smrg DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 123020131375Smrg if (bo_gem->mem_virtual) 123120131375Smrg bufmgr_gem->vma_count++; 123220131375Smrg if (bo_gem->gtt_virtual) 123320131375Smrg bufmgr_gem->vma_count++; 123420131375Smrg drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 123520131375Smrg} 123620131375Smrg 123720131375Smrgstatic void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 123820131375Smrg drm_intel_bo_gem *bo_gem) 123920131375Smrg{ 124020131375Smrg bufmgr_gem->vma_open++; 124120131375Smrg DRMLISTDEL(&bo_gem->vma_list); 124220131375Smrg if (bo_gem->mem_virtual) 124320131375Smrg bufmgr_gem->vma_count--; 124420131375Smrg if (bo_gem->gtt_virtual) 124520131375Smrg bufmgr_gem->vma_count--; 124620131375Smrg drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 124720131375Smrg} 124820131375Smrg 124922944501Smrgstatic void 125022944501Smrgdrm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 125122944501Smrg{ 125222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 125322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 125422944501Smrg struct drm_intel_gem_bo_bucket *bucket; 125522944501Smrg int i; 125622944501Smrg 125722944501Smrg /* Unreference all the target buffers */ 125822944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 1259aaba2545Smrg if (bo_gem->reloc_target_info[i].bo != bo) { 1260aaba2545Smrg drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1261aaba2545Smrg reloc_target_info[i].bo, 1262aaba2545Smrg time); 1263aaba2545Smrg } 126422944501Smrg } 126522944501Smrg bo_gem->reloc_count = 0; 126620131375Smrg bo_gem->used_as_reloc_target = false; 126722944501Smrg 126822944501Smrg DBG("bo_unreference final: %d (%s)\n", 126922944501Smrg bo_gem->gem_handle, bo_gem->name); 127022944501Smrg 127122944501Smrg /* release memory associated with this object */ 127222944501Smrg if (bo_gem->reloc_target_info) { 127322944501Smrg free(bo_gem->reloc_target_info); 127422944501Smrg bo_gem->reloc_target_info = NULL; 127522944501Smrg } 127622944501Smrg if (bo_gem->relocs) { 127722944501Smrg free(bo_gem->relocs); 127822944501Smrg bo_gem->relocs = NULL; 127922944501Smrg } 128022944501Smrg 128120131375Smrg /* Clear any left-over mappings */ 128220131375Smrg if (bo_gem->map_count) { 128320131375Smrg DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 128420131375Smrg bo_gem->map_count = 0; 128520131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 128620131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 128720131375Smrg } 128820131375Smrg 128920131375Smrg DRMLISTDEL(&bo_gem->name_list); 129020131375Smrg 129122944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 129222944501Smrg /* Put the buffer into our internal cache for reuse if we can. */ 129322944501Smrg if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 129422944501Smrg drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 129522944501Smrg I915_MADV_DONTNEED)) { 129622944501Smrg bo_gem->free_time = time; 129722944501Smrg 129822944501Smrg bo_gem->name = NULL; 129922944501Smrg bo_gem->validate_index = -1; 130022944501Smrg 130122944501Smrg DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 130222944501Smrg } else { 130322944501Smrg drm_intel_gem_bo_free(bo); 130422944501Smrg } 130522944501Smrg} 130622944501Smrg 130722944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 130822944501Smrg time_t time) 130922944501Smrg{ 131022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 131122944501Smrg 131222944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 131322944501Smrg if (atomic_dec_and_test(&bo_gem->refcount)) 131422944501Smrg drm_intel_gem_bo_unreference_final(bo, time); 131522944501Smrg} 131622944501Smrg 131722944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 131822944501Smrg{ 131922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 132022944501Smrg 132122944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 1322a884aba1Smrg 1323a884aba1Smrg if (atomic_add_unless(&bo_gem->refcount, -1, 1)) { 132422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 132522944501Smrg (drm_intel_bufmgr_gem *) bo->bufmgr; 132622944501Smrg struct timespec time; 132722944501Smrg 132822944501Smrg clock_gettime(CLOCK_MONOTONIC, &time); 132922944501Smrg 133022944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 1331a884aba1Smrg 1332a884aba1Smrg if (atomic_dec_and_test(&bo_gem->refcount)) { 1333a884aba1Smrg drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 1334a884aba1Smrg drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1335a884aba1Smrg } 1336a884aba1Smrg 133722944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 133822944501Smrg } 133922944501Smrg} 134022944501Smrg 134122944501Smrgstatic int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 134222944501Smrg{ 134322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 134422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 134522944501Smrg struct drm_i915_gem_set_domain set_domain; 134622944501Smrg int ret; 134722944501Smrg 1348a884aba1Smrg if (bo_gem->is_userptr) { 1349a884aba1Smrg /* Return the same user ptr */ 1350a884aba1Smrg bo->virtual = bo_gem->user_virtual; 1351a884aba1Smrg return 0; 1352a884aba1Smrg } 1353a884aba1Smrg 135422944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 135522944501Smrg 135620131375Smrg if (bo_gem->map_count++ == 0) 135720131375Smrg drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 135820131375Smrg 135922944501Smrg if (!bo_gem->mem_virtual) { 136022944501Smrg struct drm_i915_gem_mmap mmap_arg; 136122944501Smrg 136220131375Smrg DBG("bo_map: %d (%s), map_count=%d\n", 136320131375Smrg bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 136422944501Smrg 1365424e9256Smrg memclear(mmap_arg); 136622944501Smrg mmap_arg.handle = bo_gem->gem_handle; 136722944501Smrg mmap_arg.size = bo->size; 13686d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 13696d98c517Smrg DRM_IOCTL_I915_GEM_MMAP, 13706d98c517Smrg &mmap_arg); 137122944501Smrg if (ret != 0) { 137222944501Smrg ret = -errno; 13739ce4edccSmrg DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 13749ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, 13759ce4edccSmrg bo_gem->name, strerror(errno)); 137620131375Smrg if (--bo_gem->map_count == 0) 137720131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 137822944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 137922944501Smrg return ret; 138022944501Smrg } 138120131375Smrg VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 138222944501Smrg bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 138322944501Smrg } 138422944501Smrg DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 138522944501Smrg bo_gem->mem_virtual); 138622944501Smrg bo->virtual = bo_gem->mem_virtual; 138722944501Smrg 1388424e9256Smrg memclear(set_domain); 138922944501Smrg set_domain.handle = bo_gem->gem_handle; 139022944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_CPU; 139122944501Smrg if (write_enable) 139222944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_CPU; 139322944501Smrg else 139422944501Smrg set_domain.write_domain = 0; 13956d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 13966d98c517Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 13976d98c517Smrg &set_domain); 139822944501Smrg if (ret != 0) { 13999ce4edccSmrg DBG("%s:%d: Error setting to CPU domain %d: %s\n", 14009ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, 14019ce4edccSmrg strerror(errno)); 140222944501Smrg } 140322944501Smrg 140420131375Smrg if (write_enable) 140520131375Smrg bo_gem->mapped_cpu_write = true; 140620131375Smrg 140720131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 140820131375Smrg VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 140922944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 141022944501Smrg 141122944501Smrg return 0; 141222944501Smrg} 141322944501Smrg 141420131375Smrgstatic int 141520131375Smrgmap_gtt(drm_intel_bo *bo) 141622944501Smrg{ 141722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 141822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 141922944501Smrg int ret; 142022944501Smrg 1421a884aba1Smrg if (bo_gem->is_userptr) 1422a884aba1Smrg return -EINVAL; 1423a884aba1Smrg 142420131375Smrg if (bo_gem->map_count++ == 0) 142520131375Smrg drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 142622944501Smrg 142722944501Smrg /* Get a mapping of the buffer if we haven't before. */ 142822944501Smrg if (bo_gem->gtt_virtual == NULL) { 142922944501Smrg struct drm_i915_gem_mmap_gtt mmap_arg; 143022944501Smrg 143120131375Smrg DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 143220131375Smrg bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 143322944501Smrg 1434424e9256Smrg memclear(mmap_arg); 143522944501Smrg mmap_arg.handle = bo_gem->gem_handle; 143622944501Smrg 143722944501Smrg /* Get the fake offset back... */ 14386d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 14396d98c517Smrg DRM_IOCTL_I915_GEM_MMAP_GTT, 14406d98c517Smrg &mmap_arg); 144122944501Smrg if (ret != 0) { 144222944501Smrg ret = -errno; 14439ce4edccSmrg DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 14449ce4edccSmrg __FILE__, __LINE__, 14459ce4edccSmrg bo_gem->gem_handle, bo_gem->name, 14469ce4edccSmrg strerror(errno)); 144720131375Smrg if (--bo_gem->map_count == 0) 144820131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 144922944501Smrg return ret; 145022944501Smrg } 145122944501Smrg 145222944501Smrg /* and mmap it */ 1453aec75c42Sriastradh ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size, 1454aec75c42Sriastradh &bo_gem->gtt_virtual); 1455aec75c42Sriastradh if (ret) { 145622944501Smrg bo_gem->gtt_virtual = NULL; 14579ce4edccSmrg DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 14589ce4edccSmrg __FILE__, __LINE__, 14599ce4edccSmrg bo_gem->gem_handle, bo_gem->name, 14609ce4edccSmrg strerror(errno)); 146120131375Smrg if (--bo_gem->map_count == 0) 146220131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 146322944501Smrg return ret; 146422944501Smrg } 146522944501Smrg } 146622944501Smrg 146722944501Smrg bo->virtual = bo_gem->gtt_virtual; 146822944501Smrg 146922944501Smrg DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 147022944501Smrg bo_gem->gtt_virtual); 147122944501Smrg 147220131375Smrg return 0; 147320131375Smrg} 147420131375Smrg 1475424e9256Smrgint 1476a884aba1Smrgdrm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 147720131375Smrg{ 147820131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 147920131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 148020131375Smrg struct drm_i915_gem_set_domain set_domain; 148120131375Smrg int ret; 148220131375Smrg 148320131375Smrg pthread_mutex_lock(&bufmgr_gem->lock); 148420131375Smrg 148520131375Smrg ret = map_gtt(bo); 148620131375Smrg if (ret) { 148720131375Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 148820131375Smrg return ret; 148920131375Smrg } 149020131375Smrg 149120131375Smrg /* Now move it to the GTT domain so that the GPU and CPU 149220131375Smrg * caches are flushed and the GPU isn't actively using the 149320131375Smrg * buffer. 149420131375Smrg * 149520131375Smrg * The pagefault handler does this domain change for us when 149620131375Smrg * it has unbound the BO from the GTT, but it's up to us to 149720131375Smrg * tell it when we're about to use things if we had done 149820131375Smrg * rendering and it still happens to be bound to the GTT. 149920131375Smrg */ 1500424e9256Smrg memclear(set_domain); 150122944501Smrg set_domain.handle = bo_gem->gem_handle; 150222944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 150322944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_GTT; 15046d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 15056d98c517Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 15066d98c517Smrg &set_domain); 150722944501Smrg if (ret != 0) { 15089ce4edccSmrg DBG("%s:%d: Error setting domain %d: %s\n", 15099ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, 15109ce4edccSmrg strerror(errno)); 151122944501Smrg } 151222944501Smrg 151320131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 151420131375Smrg VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 151522944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 151622944501Smrg 15176d98c517Smrg return 0; 151822944501Smrg} 151922944501Smrg 152020131375Smrg/** 152120131375Smrg * Performs a mapping of the buffer object like the normal GTT 152220131375Smrg * mapping, but avoids waiting for the GPU to be done reading from or 152320131375Smrg * rendering to the buffer. 152420131375Smrg * 152520131375Smrg * This is used in the implementation of GL_ARB_map_buffer_range: The 152620131375Smrg * user asks to create a buffer, then does a mapping, fills some 152720131375Smrg * space, runs a drawing command, then asks to map it again without 152820131375Smrg * synchronizing because it guarantees that it won't write over the 152920131375Smrg * data that the GPU is busy using (or, more specifically, that if it 153020131375Smrg * does write over the data, it acknowledges that rendering is 153120131375Smrg * undefined). 153220131375Smrg */ 153320131375Smrg 1534424e9256Smrgint 1535a884aba1Smrgdrm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 153622944501Smrg{ 153722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 153820131375Smrg#ifdef HAVE_VALGRIND 153920131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 154020131375Smrg#endif 154120131375Smrg int ret; 154222944501Smrg 154320131375Smrg /* If the CPU cache isn't coherent with the GTT, then use a 154420131375Smrg * regular synchronized mapping. The problem is that we don't 154520131375Smrg * track where the buffer was last used on the CPU side in 154620131375Smrg * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 154720131375Smrg * we would potentially corrupt the buffer even when the user 154820131375Smrg * does reasonable things. 154920131375Smrg */ 155020131375Smrg if (!bufmgr_gem->has_llc) 155120131375Smrg return drm_intel_gem_bo_map_gtt(bo); 155222944501Smrg 155322944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 155420131375Smrg 155520131375Smrg ret = map_gtt(bo); 155620131375Smrg if (ret == 0) { 155720131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 155820131375Smrg VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 155920131375Smrg } 156020131375Smrg 156122944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 156222944501Smrg 156322944501Smrg return ret; 156422944501Smrg} 156522944501Smrg 156622944501Smrgstatic int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 156722944501Smrg{ 1568a884aba1Smrg drm_intel_bufmgr_gem *bufmgr_gem; 156922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 157020131375Smrg int ret = 0; 157122944501Smrg 157222944501Smrg if (bo == NULL) 157322944501Smrg return 0; 157422944501Smrg 1575a884aba1Smrg if (bo_gem->is_userptr) 1576a884aba1Smrg return 0; 1577a884aba1Smrg 1578a884aba1Smrg bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1579a884aba1Smrg 158022944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 158122944501Smrg 158220131375Smrg if (bo_gem->map_count <= 0) { 158320131375Smrg DBG("attempted to unmap an unmapped bo\n"); 158420131375Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 158520131375Smrg /* Preserve the old behaviour of just treating this as a 158620131375Smrg * no-op rather than reporting the error. 158720131375Smrg */ 158820131375Smrg return 0; 158920131375Smrg } 159020131375Smrg 159120131375Smrg if (bo_gem->mapped_cpu_write) { 159220131375Smrg struct drm_i915_gem_sw_finish sw_finish; 159320131375Smrg 159420131375Smrg /* Cause a flush to happen if the buffer's pinned for 159520131375Smrg * scanout, so the results show up in a timely manner. 159620131375Smrg * Unlike GTT set domains, this only does work if the 159720131375Smrg * buffer should be scanout-related. 159820131375Smrg */ 1599424e9256Smrg memclear(sw_finish); 160020131375Smrg sw_finish.handle = bo_gem->gem_handle; 160120131375Smrg ret = drmIoctl(bufmgr_gem->fd, 160220131375Smrg DRM_IOCTL_I915_GEM_SW_FINISH, 160320131375Smrg &sw_finish); 160420131375Smrg ret = ret == -1 ? -errno : 0; 160520131375Smrg 160620131375Smrg bo_gem->mapped_cpu_write = false; 160720131375Smrg } 160822944501Smrg 160920131375Smrg /* We need to unmap after every innovation as we cannot track 161020131375Smrg * an open vma for every bo as that will exhaasut the system 161120131375Smrg * limits and cause later failures. 161220131375Smrg */ 161320131375Smrg if (--bo_gem->map_count == 0) { 161420131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 161520131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 161620131375Smrg bo->virtual = NULL; 161720131375Smrg } 161822944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 161922944501Smrg 162022944501Smrg return ret; 162122944501Smrg} 162222944501Smrg 1623424e9256Smrgint 1624a884aba1Smrgdrm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 162520131375Smrg{ 162620131375Smrg return drm_intel_gem_bo_unmap(bo); 162720131375Smrg} 162820131375Smrg 162922944501Smrgstatic int 163022944501Smrgdrm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 163122944501Smrg unsigned long size, const void *data) 163222944501Smrg{ 163322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 163422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 163522944501Smrg struct drm_i915_gem_pwrite pwrite; 163622944501Smrg int ret; 163722944501Smrg 1638a884aba1Smrg if (bo_gem->is_userptr) 1639a884aba1Smrg return -EINVAL; 1640a884aba1Smrg 1641424e9256Smrg memclear(pwrite); 164222944501Smrg pwrite.handle = bo_gem->gem_handle; 164322944501Smrg pwrite.offset = offset; 164422944501Smrg pwrite.size = size; 164522944501Smrg pwrite.data_ptr = (uint64_t) (uintptr_t) data; 16466d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 16476d98c517Smrg DRM_IOCTL_I915_GEM_PWRITE, 16486d98c517Smrg &pwrite); 164922944501Smrg if (ret != 0) { 165022944501Smrg ret = -errno; 16519ce4edccSmrg DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 16529ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 16539ce4edccSmrg (int)size, strerror(errno)); 165422944501Smrg } 165522944501Smrg 165622944501Smrg return ret; 165722944501Smrg} 165822944501Smrg 165922944501Smrgstatic int 166022944501Smrgdrm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 166122944501Smrg{ 166222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 166322944501Smrg struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 166422944501Smrg int ret; 166522944501Smrg 1666424e9256Smrg memclear(get_pipe_from_crtc_id); 166722944501Smrg get_pipe_from_crtc_id.crtc_id = crtc_id; 16686d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 16696d98c517Smrg DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 16706d98c517Smrg &get_pipe_from_crtc_id); 167122944501Smrg if (ret != 0) { 167222944501Smrg /* We return -1 here to signal that we don't 167322944501Smrg * know which pipe is associated with this crtc. 167422944501Smrg * This lets the caller know that this information 167522944501Smrg * isn't available; using the wrong pipe for 167622944501Smrg * vblank waiting can cause the chipset to lock up 167722944501Smrg */ 167822944501Smrg return -1; 167922944501Smrg } 168022944501Smrg 168122944501Smrg return get_pipe_from_crtc_id.pipe; 168222944501Smrg} 168322944501Smrg 168422944501Smrgstatic int 168522944501Smrgdrm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 168622944501Smrg unsigned long size, void *data) 168722944501Smrg{ 168822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 168922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 169022944501Smrg struct drm_i915_gem_pread pread; 169122944501Smrg int ret; 169222944501Smrg 1693a884aba1Smrg if (bo_gem->is_userptr) 1694a884aba1Smrg return -EINVAL; 1695a884aba1Smrg 1696424e9256Smrg memclear(pread); 169722944501Smrg pread.handle = bo_gem->gem_handle; 169822944501Smrg pread.offset = offset; 169922944501Smrg pread.size = size; 170022944501Smrg pread.data_ptr = (uint64_t) (uintptr_t) data; 17016d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 17026d98c517Smrg DRM_IOCTL_I915_GEM_PREAD, 17036d98c517Smrg &pread); 170422944501Smrg if (ret != 0) { 170522944501Smrg ret = -errno; 17069ce4edccSmrg DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 17079ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 17089ce4edccSmrg (int)size, strerror(errno)); 170922944501Smrg } 171022944501Smrg 171122944501Smrg return ret; 171222944501Smrg} 171322944501Smrg 17149ce4edccSmrg/** Waits for all GPU rendering with the object to have completed. */ 171522944501Smrgstatic void 171622944501Smrgdrm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 171722944501Smrg{ 17189ce4edccSmrg drm_intel_gem_bo_start_gtt_access(bo, 1); 171922944501Smrg} 172022944501Smrg 172120131375Smrg/** 172220131375Smrg * Waits on a BO for the given amount of time. 172320131375Smrg * 172420131375Smrg * @bo: buffer object to wait for 172520131375Smrg * @timeout_ns: amount of time to wait in nanoseconds. 172620131375Smrg * If value is less than 0, an infinite wait will occur. 172720131375Smrg * 172820131375Smrg * Returns 0 if the wait was successful ie. the last batch referencing the 172920131375Smrg * object has completed within the allotted time. Otherwise some negative return 173020131375Smrg * value describes the error. Of particular interest is -ETIME when the wait has 173120131375Smrg * failed to yield the desired result. 173220131375Smrg * 173320131375Smrg * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 173420131375Smrg * the operation to give up after a certain amount of time. Another subtle 173520131375Smrg * difference is the internal locking semantics are different (this variant does 173620131375Smrg * not hold the lock for the duration of the wait). This makes the wait subject 173720131375Smrg * to a larger userspace race window. 173820131375Smrg * 173920131375Smrg * The implementation shall wait until the object is no longer actively 174020131375Smrg * referenced within a batch buffer at the time of the call. The wait will 174120131375Smrg * not guarantee that the buffer is re-issued via another thread, or an flinked 174220131375Smrg * handle. Userspace must make sure this race does not occur if such precision 174320131375Smrg * is important. 1744424e9256Smrg * 1745424e9256Smrg * Note that some kernels have broken the inifite wait for negative values 1746424e9256Smrg * promise, upgrade to latest stable kernels if this is the case. 174720131375Smrg */ 1748424e9256Smrgint 1749a884aba1Smrgdrm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 175020131375Smrg{ 175120131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 175220131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 175320131375Smrg struct drm_i915_gem_wait wait; 175420131375Smrg int ret; 175520131375Smrg 175620131375Smrg if (!bufmgr_gem->has_wait_timeout) { 175720131375Smrg DBG("%s:%d: Timed wait is not supported. Falling back to " 175820131375Smrg "infinite wait\n", __FILE__, __LINE__); 175920131375Smrg if (timeout_ns) { 176020131375Smrg drm_intel_gem_bo_wait_rendering(bo); 176120131375Smrg return 0; 176220131375Smrg } else { 176320131375Smrg return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 176420131375Smrg } 176520131375Smrg } 176620131375Smrg 1767424e9256Smrg memclear(wait); 176820131375Smrg wait.bo_handle = bo_gem->gem_handle; 176920131375Smrg wait.timeout_ns = timeout_ns; 177020131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 177120131375Smrg if (ret == -1) 177220131375Smrg return -errno; 177320131375Smrg 177420131375Smrg return ret; 177520131375Smrg} 177620131375Smrg 177722944501Smrg/** 177822944501Smrg * Sets the object to the GTT read and possibly write domain, used by the X 177922944501Smrg * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 178022944501Smrg * 178122944501Smrg * In combination with drm_intel_gem_bo_pin() and manual fence management, we 178222944501Smrg * can do tiled pixmaps this way. 178322944501Smrg */ 1784424e9256Smrgvoid 178522944501Smrgdrm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 178622944501Smrg{ 178722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 178822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 178922944501Smrg struct drm_i915_gem_set_domain set_domain; 179022944501Smrg int ret; 179122944501Smrg 1792424e9256Smrg memclear(set_domain); 179322944501Smrg set_domain.handle = bo_gem->gem_handle; 179422944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 179522944501Smrg set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 17966d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 17976d98c517Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 17986d98c517Smrg &set_domain); 179922944501Smrg if (ret != 0) { 18009ce4edccSmrg DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 18019ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, 18029ce4edccSmrg set_domain.read_domains, set_domain.write_domain, 18039ce4edccSmrg strerror(errno)); 180422944501Smrg } 180522944501Smrg} 180622944501Smrg 180722944501Smrgstatic void 180822944501Smrgdrm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 180922944501Smrg{ 181022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1811424e9256Smrg struct drm_gem_close close_bo; 1812424e9256Smrg int i, ret; 181322944501Smrg 181422944501Smrg free(bufmgr_gem->exec2_objects); 181522944501Smrg free(bufmgr_gem->exec_objects); 181622944501Smrg free(bufmgr_gem->exec_bos); 181720131375Smrg free(bufmgr_gem->aub_filename); 181822944501Smrg 181922944501Smrg pthread_mutex_destroy(&bufmgr_gem->lock); 182022944501Smrg 182122944501Smrg /* Free any cached buffer objects we were going to reuse */ 1822aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 182322944501Smrg struct drm_intel_gem_bo_bucket *bucket = 182422944501Smrg &bufmgr_gem->cache_bucket[i]; 182522944501Smrg drm_intel_bo_gem *bo_gem; 182622944501Smrg 182722944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 182822944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 182922944501Smrg bucket->head.next, head); 183022944501Smrg DRMLISTDEL(&bo_gem->head); 183122944501Smrg 183222944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 183322944501Smrg } 183422944501Smrg } 183522944501Smrg 1836424e9256Smrg /* Release userptr bo kept hanging around for optimisation. */ 1837424e9256Smrg if (bufmgr_gem->userptr_active.ptr) { 1838424e9256Smrg memclear(close_bo); 1839424e9256Smrg close_bo.handle = bufmgr_gem->userptr_active.handle; 1840424e9256Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); 1841424e9256Smrg free(bufmgr_gem->userptr_active.ptr); 1842424e9256Smrg if (ret) 1843424e9256Smrg fprintf(stderr, 1844424e9256Smrg "Failed to release test userptr object! (%d) " 1845424e9256Smrg "i915 kernel driver may not be sane!\n", errno); 1846424e9256Smrg } 1847424e9256Smrg 184822944501Smrg free(bufmgr); 184922944501Smrg} 185022944501Smrg 185122944501Smrg/** 185222944501Smrg * Adds the target buffer to the validation list and adds the relocation 185322944501Smrg * to the reloc_buffer's relocation list. 185422944501Smrg * 185522944501Smrg * The relocation entry at the given offset must already contain the 185622944501Smrg * precomputed relocation value, because the kernel will optimize out 185722944501Smrg * the relocation entry write when the buffer hasn't moved from the 185822944501Smrg * last known offset in target_bo. 185922944501Smrg */ 186022944501Smrgstatic int 186122944501Smrgdo_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 186222944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 186322944501Smrg uint32_t read_domains, uint32_t write_domain, 186420131375Smrg bool need_fence) 186522944501Smrg{ 186622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 186722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 186822944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 186920131375Smrg bool fenced_command; 187022944501Smrg 187122944501Smrg if (bo_gem->has_error) 187222944501Smrg return -ENOMEM; 187322944501Smrg 187422944501Smrg if (target_bo_gem->has_error) { 187520131375Smrg bo_gem->has_error = true; 187622944501Smrg return -ENOMEM; 187722944501Smrg } 187822944501Smrg 187922944501Smrg /* We never use HW fences for rendering on 965+ */ 188022944501Smrg if (bufmgr_gem->gen >= 4) 188120131375Smrg need_fence = false; 188222944501Smrg 18839ce4edccSmrg fenced_command = need_fence; 18849ce4edccSmrg if (target_bo_gem->tiling_mode == I915_TILING_NONE) 188520131375Smrg need_fence = false; 18869ce4edccSmrg 188722944501Smrg /* Create a new relocation list if needed */ 188822944501Smrg if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 188922944501Smrg return -ENOMEM; 189022944501Smrg 189122944501Smrg /* Check overflow */ 189222944501Smrg assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 189322944501Smrg 189422944501Smrg /* Check args */ 189522944501Smrg assert(offset <= bo->size - 4); 189622944501Smrg assert((write_domain & (write_domain - 1)) == 0); 189722944501Smrg 18983c748557Ssnj /* An object needing a fence is a tiled buffer, so it won't have 18993c748557Ssnj * relocs to other buffers. 19003c748557Ssnj */ 19013c748557Ssnj if (need_fence) { 19023c748557Ssnj assert(target_bo_gem->reloc_count == 0); 19033c748557Ssnj target_bo_gem->reloc_tree_fences = 1; 19043c748557Ssnj } 19053c748557Ssnj 190622944501Smrg /* Make sure that we're not adding a reloc to something whose size has 190722944501Smrg * already been accounted for. 190822944501Smrg */ 190922944501Smrg assert(!bo_gem->used_as_reloc_target); 1910aaba2545Smrg if (target_bo_gem != bo_gem) { 191120131375Smrg target_bo_gem->used_as_reloc_target = true; 1912aaba2545Smrg bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 19133c748557Ssnj bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1914aaba2545Smrg } 191522944501Smrg 191622944501Smrg bo_gem->relocs[bo_gem->reloc_count].offset = offset; 191722944501Smrg bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 191822944501Smrg bo_gem->relocs[bo_gem->reloc_count].target_handle = 191922944501Smrg target_bo_gem->gem_handle; 192022944501Smrg bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 192122944501Smrg bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 192220131375Smrg bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; 192322944501Smrg 192422944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1925aaba2545Smrg if (target_bo != bo) 1926aaba2545Smrg drm_intel_gem_bo_reference(target_bo); 19279ce4edccSmrg if (fenced_command) 192822944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 192922944501Smrg DRM_INTEL_RELOC_FENCE; 193022944501Smrg else 193122944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 193222944501Smrg 193322944501Smrg bo_gem->reloc_count++; 193422944501Smrg 193522944501Smrg return 0; 193622944501Smrg} 193722944501Smrg 193822944501Smrgstatic int 193922944501Smrgdrm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 194022944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 194122944501Smrg uint32_t read_domains, uint32_t write_domain) 194222944501Smrg{ 194322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 194422944501Smrg 194522944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 194622944501Smrg read_domains, write_domain, 194722944501Smrg !bufmgr_gem->fenced_relocs); 194822944501Smrg} 194922944501Smrg 195022944501Smrgstatic int 195122944501Smrgdrm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 195222944501Smrg drm_intel_bo *target_bo, 195322944501Smrg uint32_t target_offset, 195422944501Smrg uint32_t read_domains, uint32_t write_domain) 195522944501Smrg{ 195622944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 195720131375Smrg read_domains, write_domain, true); 195820131375Smrg} 195920131375Smrg 1960424e9256Smrgint 196120131375Smrgdrm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 196220131375Smrg{ 196320131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 196420131375Smrg 196520131375Smrg return bo_gem->reloc_count; 196620131375Smrg} 196720131375Smrg 196820131375Smrg/** 196920131375Smrg * Removes existing relocation entries in the BO after "start". 197020131375Smrg * 197120131375Smrg * This allows a user to avoid a two-step process for state setup with 197220131375Smrg * counting up all the buffer objects and doing a 197320131375Smrg * drm_intel_bufmgr_check_aperture_space() before emitting any of the 197420131375Smrg * relocations for the state setup. Instead, save the state of the 197520131375Smrg * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 197620131375Smrg * state, and then check if it still fits in the aperture. 197720131375Smrg * 197820131375Smrg * Any further drm_intel_bufmgr_check_aperture_space() queries 197920131375Smrg * involving this buffer in the tree are undefined after this call. 198020131375Smrg */ 1981424e9256Smrgvoid 198220131375Smrgdrm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 198320131375Smrg{ 1984a884aba1Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 198520131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 198620131375Smrg int i; 198720131375Smrg struct timespec time; 198820131375Smrg 198920131375Smrg clock_gettime(CLOCK_MONOTONIC, &time); 199020131375Smrg 199120131375Smrg assert(bo_gem->reloc_count >= start); 1992a884aba1Smrg 199320131375Smrg /* Unreference the cleared target buffers */ 1994a884aba1Smrg pthread_mutex_lock(&bufmgr_gem->lock); 1995a884aba1Smrg 199620131375Smrg for (i = start; i < bo_gem->reloc_count; i++) { 199720131375Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 199820131375Smrg if (&target_bo_gem->bo != bo) { 199920131375Smrg bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 200020131375Smrg drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 200120131375Smrg time.tv_sec); 200220131375Smrg } 200320131375Smrg } 200420131375Smrg bo_gem->reloc_count = start; 2005a884aba1Smrg 2006a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 2007a884aba1Smrg 200822944501Smrg} 200922944501Smrg 201022944501Smrg/** 201122944501Smrg * Walk the tree of relocations rooted at BO and accumulate the list of 201222944501Smrg * validations to be performed and update the relocation buffers with 201322944501Smrg * index values into the validation list. 201422944501Smrg */ 201522944501Smrgstatic void 201622944501Smrgdrm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 201722944501Smrg{ 201822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 201922944501Smrg int i; 202022944501Smrg 202122944501Smrg if (bo_gem->relocs == NULL) 202222944501Smrg return; 202322944501Smrg 202422944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 202522944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 202622944501Smrg 2027aaba2545Smrg if (target_bo == bo) 2028aaba2545Smrg continue; 2029aaba2545Smrg 203020131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 203120131375Smrg 203222944501Smrg /* Continue walking the tree depth-first. */ 203322944501Smrg drm_intel_gem_bo_process_reloc(target_bo); 203422944501Smrg 203522944501Smrg /* Add the target to the validate list */ 203622944501Smrg drm_intel_add_validate_buffer(target_bo); 203722944501Smrg } 203822944501Smrg} 203922944501Smrg 204022944501Smrgstatic void 204122944501Smrgdrm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 204222944501Smrg{ 204322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 204422944501Smrg int i; 204522944501Smrg 204622944501Smrg if (bo_gem->relocs == NULL) 204722944501Smrg return; 204822944501Smrg 204922944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 205022944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 205122944501Smrg int need_fence; 205222944501Smrg 2053aaba2545Smrg if (target_bo == bo) 2054aaba2545Smrg continue; 2055aaba2545Smrg 205620131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 205720131375Smrg 205822944501Smrg /* Continue walking the tree depth-first. */ 205922944501Smrg drm_intel_gem_bo_process_reloc2(target_bo); 206022944501Smrg 206122944501Smrg need_fence = (bo_gem->reloc_target_info[i].flags & 206222944501Smrg DRM_INTEL_RELOC_FENCE); 206322944501Smrg 206422944501Smrg /* Add the target to the validate list */ 206522944501Smrg drm_intel_add_validate_buffer2(target_bo, need_fence); 206622944501Smrg } 206722944501Smrg} 206822944501Smrg 206922944501Smrg 207022944501Smrgstatic void 207122944501Smrgdrm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 207222944501Smrg{ 207322944501Smrg int i; 207422944501Smrg 207522944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 207622944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 207722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 207822944501Smrg 207922944501Smrg /* Update the buffer offset */ 208020131375Smrg if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { 2081d82d45b3Sjoerg DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n", 2082d82d45b3Sjoerg bo_gem->gem_handle, bo_gem->name, 2083d82d45b3Sjoerg (unsigned long long)bo->offset64, 208422944501Smrg (unsigned long long)bufmgr_gem->exec_objects[i]. 208522944501Smrg offset); 208620131375Smrg bo->offset64 = bufmgr_gem->exec_objects[i].offset; 208722944501Smrg bo->offset = bufmgr_gem->exec_objects[i].offset; 208822944501Smrg } 208922944501Smrg } 209022944501Smrg} 209122944501Smrg 209222944501Smrgstatic void 209322944501Smrgdrm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 209422944501Smrg{ 209522944501Smrg int i; 209622944501Smrg 209722944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 209822944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 209922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 210022944501Smrg 210122944501Smrg /* Update the buffer offset */ 210220131375Smrg if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { 2103d82d45b3Sjoerg DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n", 2104d82d45b3Sjoerg bo_gem->gem_handle, bo_gem->name, 2105d82d45b3Sjoerg (unsigned long long)bo->offset64, 210622944501Smrg (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 210720131375Smrg bo->offset64 = bufmgr_gem->exec2_objects[i].offset; 210822944501Smrg bo->offset = bufmgr_gem->exec2_objects[i].offset; 210922944501Smrg } 211022944501Smrg } 211122944501Smrg} 211222944501Smrg 211320131375Smrgstatic void 211420131375Smrgaub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) 211520131375Smrg{ 211620131375Smrg fwrite(&data, 1, 4, bufmgr_gem->aub_file); 211720131375Smrg} 211820131375Smrg 211920131375Smrgstatic void 212020131375Smrgaub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) 212120131375Smrg{ 212220131375Smrg fwrite(data, 1, size, bufmgr_gem->aub_file); 212320131375Smrg} 212420131375Smrg 212520131375Smrgstatic void 212620131375Smrgaub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) 212722944501Smrg{ 212822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 212922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 213020131375Smrg uint32_t *data; 213120131375Smrg unsigned int i; 213222944501Smrg 213320131375Smrg data = malloc(bo->size); 213420131375Smrg drm_intel_bo_get_subdata(bo, offset, size, data); 213522944501Smrg 213620131375Smrg /* Easy mode: write out bo with no relocations */ 213720131375Smrg if (!bo_gem->reloc_count) { 213820131375Smrg aub_out_data(bufmgr_gem, data, size); 213920131375Smrg free(data); 214020131375Smrg return; 214120131375Smrg } 214222944501Smrg 214320131375Smrg /* Otherwise, handle the relocations while writing. */ 214420131375Smrg for (i = 0; i < size / 4; i++) { 214520131375Smrg int r; 214620131375Smrg for (r = 0; r < bo_gem->reloc_count; r++) { 214720131375Smrg struct drm_i915_gem_relocation_entry *reloc; 214820131375Smrg drm_intel_reloc_target *info; 214922944501Smrg 215020131375Smrg reloc = &bo_gem->relocs[r]; 215120131375Smrg info = &bo_gem->reloc_target_info[r]; 215222944501Smrg 215320131375Smrg if (reloc->offset == offset + i * 4) { 215420131375Smrg drm_intel_bo_gem *target_gem; 215520131375Smrg uint32_t val; 215622944501Smrg 215720131375Smrg target_gem = (drm_intel_bo_gem *)info->bo; 215822944501Smrg 215920131375Smrg val = reloc->delta; 216020131375Smrg val += target_gem->aub_offset; 216122944501Smrg 216220131375Smrg aub_out(bufmgr_gem, val); 216320131375Smrg data[i] = val; 216420131375Smrg break; 216520131375Smrg } 216620131375Smrg } 216720131375Smrg if (r == bo_gem->reloc_count) { 216820131375Smrg /* no relocation, just the data */ 216920131375Smrg aub_out(bufmgr_gem, data[i]); 217020131375Smrg } 217122944501Smrg } 217222944501Smrg 217320131375Smrg free(data); 217422944501Smrg} 217522944501Smrg 217620131375Smrgstatic void 217720131375Smrgaub_bo_get_address(drm_intel_bo *bo) 217822944501Smrg{ 217920131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 218020131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 218122944501Smrg 218220131375Smrg /* Give the object a graphics address in the AUB file. We 218320131375Smrg * don't just use the GEM object address because we do AUB 218420131375Smrg * dumping before execution -- we want to successfully log 218520131375Smrg * when the hardware might hang, and we might even want to aub 218620131375Smrg * capture for a driver trying to execute on a different 218720131375Smrg * generation of hardware by disabling the actual kernel exec 218820131375Smrg * call. 218920131375Smrg */ 219020131375Smrg bo_gem->aub_offset = bufmgr_gem->aub_offset; 219120131375Smrg bufmgr_gem->aub_offset += bo->size; 219220131375Smrg /* XXX: Handle aperture overflow. */ 219320131375Smrg assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); 219420131375Smrg} 219520131375Smrg 219620131375Smrgstatic void 219720131375Smrgaub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 219820131375Smrg uint32_t offset, uint32_t size) 219920131375Smrg{ 220020131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 220120131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 220220131375Smrg 220320131375Smrg aub_out(bufmgr_gem, 220420131375Smrg CMD_AUB_TRACE_HEADER_BLOCK | 220520131375Smrg ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 220620131375Smrg aub_out(bufmgr_gem, 220720131375Smrg AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); 220820131375Smrg aub_out(bufmgr_gem, subtype); 220920131375Smrg aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 221020131375Smrg aub_out(bufmgr_gem, size); 221120131375Smrg if (bufmgr_gem->gen >= 8) 221220131375Smrg aub_out(bufmgr_gem, 0); 221320131375Smrg aub_write_bo_data(bo, offset, size); 221420131375Smrg} 221520131375Smrg 221620131375Smrg/** 221720131375Smrg * Break up large objects into multiple writes. Otherwise a 128kb VBO 221820131375Smrg * would overflow the 16 bits of size field in the packet header and 221920131375Smrg * everything goes badly after that. 222020131375Smrg */ 222120131375Smrgstatic void 222220131375Smrgaub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 222320131375Smrg uint32_t offset, uint32_t size) 222420131375Smrg{ 222520131375Smrg uint32_t block_size; 222620131375Smrg uint32_t sub_offset; 222720131375Smrg 222820131375Smrg for (sub_offset = 0; sub_offset < size; sub_offset += block_size) { 222920131375Smrg block_size = size - sub_offset; 223020131375Smrg 223120131375Smrg if (block_size > 8 * 4096) 223220131375Smrg block_size = 8 * 4096; 223320131375Smrg 223420131375Smrg aub_write_trace_block(bo, type, subtype, offset + sub_offset, 223520131375Smrg block_size); 223620131375Smrg } 223720131375Smrg} 223820131375Smrg 223920131375Smrgstatic void 224020131375Smrgaub_write_bo(drm_intel_bo *bo) 224120131375Smrg{ 224220131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 224320131375Smrg uint32_t offset = 0; 224420131375Smrg unsigned i; 224520131375Smrg 224620131375Smrg aub_bo_get_address(bo); 224720131375Smrg 224820131375Smrg /* Write out each annotated section separately. */ 224920131375Smrg for (i = 0; i < bo_gem->aub_annotation_count; ++i) { 225020131375Smrg drm_intel_aub_annotation *annotation = 225120131375Smrg &bo_gem->aub_annotations[i]; 225220131375Smrg uint32_t ending_offset = annotation->ending_offset; 225320131375Smrg if (ending_offset > bo->size) 225420131375Smrg ending_offset = bo->size; 225520131375Smrg if (ending_offset > offset) { 225620131375Smrg aub_write_large_trace_block(bo, annotation->type, 225720131375Smrg annotation->subtype, 225820131375Smrg offset, 225920131375Smrg ending_offset - offset); 226020131375Smrg offset = ending_offset; 226120131375Smrg } 226220131375Smrg } 226320131375Smrg 226420131375Smrg /* Write out any remaining unannotated data */ 226520131375Smrg if (offset < bo->size) { 226620131375Smrg aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, 226720131375Smrg offset, bo->size - offset); 226820131375Smrg } 226920131375Smrg} 227020131375Smrg 227120131375Smrg/* 227220131375Smrg * Make a ringbuffer on fly and dump it 227320131375Smrg */ 227420131375Smrgstatic void 227520131375Smrgaub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, 227620131375Smrg uint32_t batch_buffer, int ring_flag) 227720131375Smrg{ 227820131375Smrg uint32_t ringbuffer[4096]; 227920131375Smrg int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ 228020131375Smrg int ring_count = 0; 228120131375Smrg 228220131375Smrg if (ring_flag == I915_EXEC_BSD) 228320131375Smrg ring = AUB_TRACE_TYPE_RING_PRB1; 228420131375Smrg else if (ring_flag == I915_EXEC_BLT) 228520131375Smrg ring = AUB_TRACE_TYPE_RING_PRB2; 228620131375Smrg 228720131375Smrg /* Make a ring buffer to execute our batchbuffer. */ 228820131375Smrg memset(ringbuffer, 0, sizeof(ringbuffer)); 228920131375Smrg if (bufmgr_gem->gen >= 8) { 229020131375Smrg ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); 229120131375Smrg ringbuffer[ring_count++] = batch_buffer; 229220131375Smrg ringbuffer[ring_count++] = 0; 229320131375Smrg } else { 229420131375Smrg ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; 229520131375Smrg ringbuffer[ring_count++] = batch_buffer; 229620131375Smrg } 229720131375Smrg 229820131375Smrg /* Write out the ring. This appears to trigger execution of 229920131375Smrg * the ring in the simulator. 230020131375Smrg */ 230120131375Smrg aub_out(bufmgr_gem, 230220131375Smrg CMD_AUB_TRACE_HEADER_BLOCK | 230320131375Smrg ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 230420131375Smrg aub_out(bufmgr_gem, 230520131375Smrg AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); 230620131375Smrg aub_out(bufmgr_gem, 0); /* general/surface subtype */ 230720131375Smrg aub_out(bufmgr_gem, bufmgr_gem->aub_offset); 230820131375Smrg aub_out(bufmgr_gem, ring_count * 4); 230920131375Smrg if (bufmgr_gem->gen >= 8) 231020131375Smrg aub_out(bufmgr_gem, 0); 231120131375Smrg 231220131375Smrg /* FIXME: Need some flush operations here? */ 231320131375Smrg aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); 231420131375Smrg 231520131375Smrg /* Update offset pointer */ 231620131375Smrg bufmgr_gem->aub_offset += 4096; 231720131375Smrg} 231820131375Smrg 2319424e9256Smrgvoid 232020131375Smrgdrm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 232120131375Smrg int x1, int y1, int width, int height, 232220131375Smrg enum aub_dump_bmp_format format, 232320131375Smrg int pitch, int offset) 232420131375Smrg{ 232520131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 232620131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 232720131375Smrg uint32_t cpp; 232820131375Smrg 232920131375Smrg switch (format) { 233020131375Smrg case AUB_DUMP_BMP_FORMAT_8BIT: 233120131375Smrg cpp = 1; 233220131375Smrg break; 233320131375Smrg case AUB_DUMP_BMP_FORMAT_ARGB_4444: 233420131375Smrg cpp = 2; 233520131375Smrg break; 233620131375Smrg case AUB_DUMP_BMP_FORMAT_ARGB_0888: 233720131375Smrg case AUB_DUMP_BMP_FORMAT_ARGB_8888: 233820131375Smrg cpp = 4; 233920131375Smrg break; 234020131375Smrg default: 234120131375Smrg printf("Unknown AUB dump format %d\n", format); 234220131375Smrg return; 234320131375Smrg } 234420131375Smrg 234520131375Smrg if (!bufmgr_gem->aub_file) 234620131375Smrg return; 234720131375Smrg 234820131375Smrg aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); 234920131375Smrg aub_out(bufmgr_gem, (y1 << 16) | x1); 235020131375Smrg aub_out(bufmgr_gem, 235120131375Smrg (format << 24) | 235220131375Smrg (cpp << 19) | 235320131375Smrg pitch / 4); 235420131375Smrg aub_out(bufmgr_gem, (height << 16) | width); 235520131375Smrg aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 235620131375Smrg aub_out(bufmgr_gem, 235720131375Smrg ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | 235820131375Smrg ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); 235920131375Smrg} 236020131375Smrg 236120131375Smrgstatic void 236220131375Smrgaub_exec(drm_intel_bo *bo, int ring_flag, int used) 236320131375Smrg{ 236420131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 236520131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 236620131375Smrg int i; 236720131375Smrg bool batch_buffer_needs_annotations; 236820131375Smrg 236920131375Smrg if (!bufmgr_gem->aub_file) 237020131375Smrg return; 237120131375Smrg 237220131375Smrg /* If batch buffer is not annotated, annotate it the best we 237320131375Smrg * can. 237420131375Smrg */ 237520131375Smrg batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0; 237620131375Smrg if (batch_buffer_needs_annotations) { 237720131375Smrg drm_intel_aub_annotation annotations[2] = { 237820131375Smrg { AUB_TRACE_TYPE_BATCH, 0, used }, 237920131375Smrg { AUB_TRACE_TYPE_NOTYPE, 0, bo->size } 238020131375Smrg }; 238120131375Smrg drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2); 238220131375Smrg } 238320131375Smrg 238420131375Smrg /* Write out all buffers to AUB memory */ 238520131375Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 238620131375Smrg aub_write_bo(bufmgr_gem->exec_bos[i]); 238720131375Smrg } 238820131375Smrg 238920131375Smrg /* Remove any annotations we added */ 239020131375Smrg if (batch_buffer_needs_annotations) 239120131375Smrg drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0); 239220131375Smrg 239320131375Smrg /* Dump ring buffer */ 239420131375Smrg aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); 239520131375Smrg 239620131375Smrg fflush(bufmgr_gem->aub_file); 239720131375Smrg 239820131375Smrg /* 239920131375Smrg * One frame has been dumped. So reset the aub_offset for the next frame. 240020131375Smrg * 240120131375Smrg * FIXME: Can we do this? 240220131375Smrg */ 240320131375Smrg bufmgr_gem->aub_offset = 0x10000; 240420131375Smrg} 240520131375Smrg 240620131375Smrgstatic int 240720131375Smrgdrm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 240820131375Smrg drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 240920131375Smrg{ 241020131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 241120131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 241220131375Smrg struct drm_i915_gem_execbuffer execbuf; 241320131375Smrg int ret, i; 241420131375Smrg 241520131375Smrg if (bo_gem->has_error) 241620131375Smrg return -ENOMEM; 241720131375Smrg 241820131375Smrg pthread_mutex_lock(&bufmgr_gem->lock); 241920131375Smrg /* Update indices and set up the validate list. */ 242020131375Smrg drm_intel_gem_bo_process_reloc(bo); 242120131375Smrg 242220131375Smrg /* Add the batch buffer to the validation list. There are no 242320131375Smrg * relocations pointing to it. 242420131375Smrg */ 242520131375Smrg drm_intel_add_validate_buffer(bo); 242620131375Smrg 2427424e9256Smrg memclear(execbuf); 242820131375Smrg execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 242920131375Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 243020131375Smrg execbuf.batch_start_offset = 0; 243120131375Smrg execbuf.batch_len = used; 243220131375Smrg execbuf.cliprects_ptr = (uintptr_t) cliprects; 243320131375Smrg execbuf.num_cliprects = num_cliprects; 243420131375Smrg execbuf.DR1 = 0; 243520131375Smrg execbuf.DR4 = DR4; 243620131375Smrg 243720131375Smrg ret = drmIoctl(bufmgr_gem->fd, 243820131375Smrg DRM_IOCTL_I915_GEM_EXECBUFFER, 243920131375Smrg &execbuf); 244020131375Smrg if (ret != 0) { 244120131375Smrg ret = -errno; 244220131375Smrg if (errno == ENOSPC) { 244320131375Smrg DBG("Execbuffer fails to pin. " 244420131375Smrg "Estimate: %u. Actual: %u. Available: %u\n", 244520131375Smrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 244620131375Smrg bufmgr_gem-> 244720131375Smrg exec_count), 244820131375Smrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 244920131375Smrg bufmgr_gem-> 245020131375Smrg exec_count), 245120131375Smrg (unsigned int)bufmgr_gem->gtt_size); 245220131375Smrg } 245320131375Smrg } 245420131375Smrg drm_intel_update_buffer_offsets(bufmgr_gem); 245520131375Smrg 245620131375Smrg if (bufmgr_gem->bufmgr.debug) 245720131375Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 245820131375Smrg 245920131375Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 246020131375Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 246120131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 246220131375Smrg 246320131375Smrg bo_gem->idle = false; 246420131375Smrg 246520131375Smrg /* Disconnect the buffer from the validate list */ 246620131375Smrg bo_gem->validate_index = -1; 246720131375Smrg bufmgr_gem->exec_bos[i] = NULL; 246820131375Smrg } 246920131375Smrg bufmgr_gem->exec_count = 0; 247020131375Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 247120131375Smrg 247220131375Smrg return ret; 247320131375Smrg} 247420131375Smrg 247520131375Smrgstatic int 247620131375Smrgdo_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 247720131375Smrg drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 247820131375Smrg unsigned int flags) 247920131375Smrg{ 248020131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 248120131375Smrg struct drm_i915_gem_execbuffer2 execbuf; 248220131375Smrg int ret = 0; 248320131375Smrg int i; 248420131375Smrg 248520131375Smrg switch (flags & 0x7) { 248620131375Smrg default: 248720131375Smrg return -EINVAL; 248820131375Smrg case I915_EXEC_BLT: 24899ce4edccSmrg if (!bufmgr_gem->has_blt) 24909ce4edccSmrg return -EINVAL; 24919ce4edccSmrg break; 24929ce4edccSmrg case I915_EXEC_BSD: 24939ce4edccSmrg if (!bufmgr_gem->has_bsd) 24949ce4edccSmrg return -EINVAL; 24959ce4edccSmrg break; 249620131375Smrg case I915_EXEC_VEBOX: 249720131375Smrg if (!bufmgr_gem->has_vebox) 249820131375Smrg return -EINVAL; 249920131375Smrg break; 25009ce4edccSmrg case I915_EXEC_RENDER: 25019ce4edccSmrg case I915_EXEC_DEFAULT: 25029ce4edccSmrg break; 25039ce4edccSmrg } 2504aaba2545Smrg 250522944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 250622944501Smrg /* Update indices and set up the validate list. */ 250722944501Smrg drm_intel_gem_bo_process_reloc2(bo); 250822944501Smrg 250922944501Smrg /* Add the batch buffer to the validation list. There are no relocations 251022944501Smrg * pointing to it. 251122944501Smrg */ 251222944501Smrg drm_intel_add_validate_buffer2(bo, 0); 251322944501Smrg 2514424e9256Smrg memclear(execbuf); 251522944501Smrg execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 251622944501Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 251722944501Smrg execbuf.batch_start_offset = 0; 251822944501Smrg execbuf.batch_len = used; 251922944501Smrg execbuf.cliprects_ptr = (uintptr_t)cliprects; 252022944501Smrg execbuf.num_cliprects = num_cliprects; 252122944501Smrg execbuf.DR1 = 0; 252222944501Smrg execbuf.DR4 = DR4; 252320131375Smrg execbuf.flags = flags; 252420131375Smrg if (ctx == NULL) 252520131375Smrg i915_execbuffer2_set_context_id(execbuf, 0); 252620131375Smrg else 252720131375Smrg i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 252822944501Smrg execbuf.rsvd2 = 0; 252922944501Smrg 253020131375Smrg aub_exec(bo, flags, used); 253120131375Smrg 253220131375Smrg if (bufmgr_gem->no_exec) 253320131375Smrg goto skip_execution; 253420131375Smrg 25356d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 25366d98c517Smrg DRM_IOCTL_I915_GEM_EXECBUFFER2, 25376d98c517Smrg &execbuf); 253822944501Smrg if (ret != 0) { 253922944501Smrg ret = -errno; 25406d98c517Smrg if (ret == -ENOSPC) { 25419ce4edccSmrg DBG("Execbuffer fails to pin. " 25429ce4edccSmrg "Estimate: %u. Actual: %u. Available: %u\n", 25439ce4edccSmrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 25449ce4edccSmrg bufmgr_gem->exec_count), 25459ce4edccSmrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 25469ce4edccSmrg bufmgr_gem->exec_count), 25479ce4edccSmrg (unsigned int) bufmgr_gem->gtt_size); 254822944501Smrg } 254922944501Smrg } 255022944501Smrg drm_intel_update_buffer_offsets2(bufmgr_gem); 255122944501Smrg 255220131375Smrgskip_execution: 255322944501Smrg if (bufmgr_gem->bufmgr.debug) 255422944501Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 255522944501Smrg 255622944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 255722944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 255822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 255922944501Smrg 256020131375Smrg bo_gem->idle = false; 256120131375Smrg 256222944501Smrg /* Disconnect the buffer from the validate list */ 256322944501Smrg bo_gem->validate_index = -1; 256422944501Smrg bufmgr_gem->exec_bos[i] = NULL; 256522944501Smrg } 256622944501Smrg bufmgr_gem->exec_count = 0; 256722944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 256822944501Smrg 256922944501Smrg return ret; 257022944501Smrg} 257122944501Smrg 2572aaba2545Smrgstatic int 2573aaba2545Smrgdrm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2574aaba2545Smrg drm_clip_rect_t *cliprects, int num_cliprects, 2575aaba2545Smrg int DR4) 2576aaba2545Smrg{ 257720131375Smrg return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 257820131375Smrg I915_EXEC_RENDER); 257920131375Smrg} 258020131375Smrg 258120131375Smrgstatic int 258220131375Smrgdrm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 258320131375Smrg drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 258420131375Smrg unsigned int flags) 258520131375Smrg{ 258620131375Smrg return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 258720131375Smrg flags); 258820131375Smrg} 258920131375Smrg 2590424e9256Smrgint 259120131375Smrgdrm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 259220131375Smrg int used, unsigned int flags) 259320131375Smrg{ 259420131375Smrg return do_exec2(bo, used, ctx, NULL, 0, 0, flags); 2595aaba2545Smrg} 2596aaba2545Smrg 259722944501Smrgstatic int 259822944501Smrgdrm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 259922944501Smrg{ 260022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 260122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 260222944501Smrg struct drm_i915_gem_pin pin; 260322944501Smrg int ret; 260422944501Smrg 2605424e9256Smrg memclear(pin); 260622944501Smrg pin.handle = bo_gem->gem_handle; 260722944501Smrg pin.alignment = alignment; 260822944501Smrg 26096d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 26106d98c517Smrg DRM_IOCTL_I915_GEM_PIN, 26116d98c517Smrg &pin); 261222944501Smrg if (ret != 0) 261322944501Smrg return -errno; 261422944501Smrg 261520131375Smrg bo->offset64 = pin.offset; 261622944501Smrg bo->offset = pin.offset; 261722944501Smrg return 0; 261822944501Smrg} 261922944501Smrg 262022944501Smrgstatic int 262122944501Smrgdrm_intel_gem_bo_unpin(drm_intel_bo *bo) 262222944501Smrg{ 262322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 262422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 262522944501Smrg struct drm_i915_gem_unpin unpin; 262622944501Smrg int ret; 262722944501Smrg 2628424e9256Smrg memclear(unpin); 262922944501Smrg unpin.handle = bo_gem->gem_handle; 263022944501Smrg 26316d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 263222944501Smrg if (ret != 0) 263322944501Smrg return -errno; 263422944501Smrg 263522944501Smrg return 0; 263622944501Smrg} 263722944501Smrg 263822944501Smrgstatic int 26396d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 26406d98c517Smrg uint32_t tiling_mode, 26416d98c517Smrg uint32_t stride) 264222944501Smrg{ 264322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 264422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 264522944501Smrg struct drm_i915_gem_set_tiling set_tiling; 264622944501Smrg int ret; 264722944501Smrg 26486d98c517Smrg if (bo_gem->global_name == 0 && 26496d98c517Smrg tiling_mode == bo_gem->tiling_mode && 26506d98c517Smrg stride == bo_gem->stride) 265122944501Smrg return 0; 265222944501Smrg 265322944501Smrg memset(&set_tiling, 0, sizeof(set_tiling)); 265422944501Smrg do { 26556d98c517Smrg /* set_tiling is slightly broken and overwrites the 26566d98c517Smrg * input on the error path, so we have to open code 26576d98c517Smrg * rmIoctl. 26586d98c517Smrg */ 26596d98c517Smrg set_tiling.handle = bo_gem->gem_handle; 26606d98c517Smrg set_tiling.tiling_mode = tiling_mode; 266122944501Smrg set_tiling.stride = stride; 266222944501Smrg 266322944501Smrg ret = ioctl(bufmgr_gem->fd, 266422944501Smrg DRM_IOCTL_I915_GEM_SET_TILING, 266522944501Smrg &set_tiling); 26666d98c517Smrg } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 26676d98c517Smrg if (ret == -1) 26686d98c517Smrg return -errno; 26696d98c517Smrg 26706d98c517Smrg bo_gem->tiling_mode = set_tiling.tiling_mode; 26716d98c517Smrg bo_gem->swizzle_mode = set_tiling.swizzle_mode; 26726d98c517Smrg bo_gem->stride = set_tiling.stride; 26736d98c517Smrg return 0; 26746d98c517Smrg} 26756d98c517Smrg 26766d98c517Smrgstatic int 26776d98c517Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 26786d98c517Smrg uint32_t stride) 26796d98c517Smrg{ 26806d98c517Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 26816d98c517Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 26826d98c517Smrg int ret; 26836d98c517Smrg 2684a884aba1Smrg /* Tiling with userptr surfaces is not supported 2685a884aba1Smrg * on all hardware so refuse it for time being. 2686a884aba1Smrg */ 2687a884aba1Smrg if (bo_gem->is_userptr) 2688a884aba1Smrg return -EINVAL; 2689a884aba1Smrg 26906d98c517Smrg /* Linear buffers have no stride. By ensuring that we only ever use 26916d98c517Smrg * stride 0 with linear buffers, we simplify our code. 26926d98c517Smrg */ 26936d98c517Smrg if (*tiling_mode == I915_TILING_NONE) 26946d98c517Smrg stride = 0; 26956d98c517Smrg 26966d98c517Smrg ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 26976d98c517Smrg if (ret == 0) 2698aaba2545Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 269922944501Smrg 270022944501Smrg *tiling_mode = bo_gem->tiling_mode; 2701aaba2545Smrg return ret; 270222944501Smrg} 270322944501Smrg 270422944501Smrgstatic int 270522944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 270622944501Smrg uint32_t * swizzle_mode) 270722944501Smrg{ 270822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 270922944501Smrg 271022944501Smrg *tiling_mode = bo_gem->tiling_mode; 271122944501Smrg *swizzle_mode = bo_gem->swizzle_mode; 271222944501Smrg return 0; 271322944501Smrg} 271422944501Smrg 2715424e9256Smrgdrm_intel_bo * 271620131375Smrgdrm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 271720131375Smrg{ 271820131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 271920131375Smrg int ret; 272020131375Smrg uint32_t handle; 272120131375Smrg drm_intel_bo_gem *bo_gem; 272220131375Smrg struct drm_i915_gem_get_tiling get_tiling; 272320131375Smrg drmMMListHead *list; 272420131375Smrg 272520131375Smrg ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 272620131375Smrg 272720131375Smrg /* 272820131375Smrg * See if the kernel has already returned this buffer to us. Just as 272920131375Smrg * for named buffers, we must not create two bo's pointing at the same 273020131375Smrg * kernel object 273120131375Smrg */ 2732a884aba1Smrg pthread_mutex_lock(&bufmgr_gem->lock); 273320131375Smrg for (list = bufmgr_gem->named.next; 273420131375Smrg list != &bufmgr_gem->named; 273520131375Smrg list = list->next) { 273620131375Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 273720131375Smrg if (bo_gem->gem_handle == handle) { 273820131375Smrg drm_intel_gem_bo_reference(&bo_gem->bo); 2739a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 274020131375Smrg return &bo_gem->bo; 274120131375Smrg } 274220131375Smrg } 274320131375Smrg 274420131375Smrg if (ret) { 274520131375Smrg fprintf(stderr,"ret is %d %d\n", ret, errno); 2746a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 274720131375Smrg return NULL; 274820131375Smrg } 274920131375Smrg 275020131375Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 2751a884aba1Smrg if (!bo_gem) { 2752a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 275320131375Smrg return NULL; 2754a884aba1Smrg } 275520131375Smrg /* Determine size of bo. The fd-to-handle ioctl really should 275620131375Smrg * return the size, but it doesn't. If we have kernel 3.12 or 275720131375Smrg * later, we can lseek on the prime fd to get the size. Older 275820131375Smrg * kernels will just fail, in which case we fall back to the 275920131375Smrg * provided (estimated or guess size). */ 276020131375Smrg ret = lseek(prime_fd, 0, SEEK_END); 276120131375Smrg if (ret != -1) 276220131375Smrg bo_gem->bo.size = ret; 276320131375Smrg else 276420131375Smrg bo_gem->bo.size = size; 276520131375Smrg 276620131375Smrg bo_gem->bo.handle = handle; 276720131375Smrg bo_gem->bo.bufmgr = bufmgr; 276820131375Smrg 276920131375Smrg bo_gem->gem_handle = handle; 277020131375Smrg 277120131375Smrg atomic_set(&bo_gem->refcount, 1); 277220131375Smrg 277320131375Smrg bo_gem->name = "prime"; 277420131375Smrg bo_gem->validate_index = -1; 277520131375Smrg bo_gem->reloc_tree_fences = 0; 277620131375Smrg bo_gem->used_as_reloc_target = false; 277720131375Smrg bo_gem->has_error = false; 277820131375Smrg bo_gem->reusable = false; 277920131375Smrg 278020131375Smrg DRMINITLISTHEAD(&bo_gem->vma_list); 278120131375Smrg DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2782a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 278320131375Smrg 2784424e9256Smrg memclear(get_tiling); 278520131375Smrg get_tiling.handle = bo_gem->gem_handle; 278620131375Smrg ret = drmIoctl(bufmgr_gem->fd, 278720131375Smrg DRM_IOCTL_I915_GEM_GET_TILING, 278820131375Smrg &get_tiling); 278920131375Smrg if (ret != 0) { 279020131375Smrg drm_intel_gem_bo_unreference(&bo_gem->bo); 279120131375Smrg return NULL; 279220131375Smrg } 279320131375Smrg bo_gem->tiling_mode = get_tiling.tiling_mode; 279420131375Smrg bo_gem->swizzle_mode = get_tiling.swizzle_mode; 279520131375Smrg /* XXX stride is unknown */ 279620131375Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 279720131375Smrg 279820131375Smrg return &bo_gem->bo; 279920131375Smrg} 280020131375Smrg 2801424e9256Smrgint 280220131375Smrgdrm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 280320131375Smrg{ 280420131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 280520131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 280620131375Smrg 2807a884aba1Smrg pthread_mutex_lock(&bufmgr_gem->lock); 280820131375Smrg if (DRMLISTEMPTY(&bo_gem->name_list)) 280920131375Smrg DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2810a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 281120131375Smrg 281220131375Smrg if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 281320131375Smrg DRM_CLOEXEC, prime_fd) != 0) 281420131375Smrg return -errno; 281520131375Smrg 281620131375Smrg bo_gem->reusable = false; 281720131375Smrg 281820131375Smrg return 0; 281920131375Smrg} 282020131375Smrg 282122944501Smrgstatic int 282222944501Smrgdrm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 282322944501Smrg{ 282422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 282522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 282622944501Smrg int ret; 282722944501Smrg 282822944501Smrg if (!bo_gem->global_name) { 282920131375Smrg struct drm_gem_flink flink; 283020131375Smrg 2831424e9256Smrg memclear(flink); 283222944501Smrg flink.handle = bo_gem->gem_handle; 283322944501Smrg 2834a884aba1Smrg pthread_mutex_lock(&bufmgr_gem->lock); 2835a884aba1Smrg 28366d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 2837a884aba1Smrg if (ret != 0) { 2838a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 283922944501Smrg return -errno; 2840a884aba1Smrg } 284120131375Smrg 284222944501Smrg bo_gem->global_name = flink.name; 284320131375Smrg bo_gem->reusable = false; 284420131375Smrg 284520131375Smrg if (DRMLISTEMPTY(&bo_gem->name_list)) 284620131375Smrg DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2847a884aba1Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 284822944501Smrg } 284922944501Smrg 285022944501Smrg *name = bo_gem->global_name; 285122944501Smrg return 0; 285222944501Smrg} 285322944501Smrg 285422944501Smrg/** 285522944501Smrg * Enables unlimited caching of buffer objects for reuse. 285622944501Smrg * 285722944501Smrg * This is potentially very memory expensive, as the cache at each bucket 285822944501Smrg * size is only bounded by how many buffers of that size we've managed to have 285922944501Smrg * in flight at once. 286022944501Smrg */ 2861424e9256Smrgvoid 286222944501Smrgdrm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 286322944501Smrg{ 286422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 286522944501Smrg 286620131375Smrg bufmgr_gem->bo_reuse = true; 286722944501Smrg} 286822944501Smrg 286922944501Smrg/** 287022944501Smrg * Enable use of fenced reloc type. 287122944501Smrg * 287222944501Smrg * New code should enable this to avoid unnecessary fence register 287322944501Smrg * allocation. If this option is not enabled, all relocs will have fence 287422944501Smrg * register allocated. 287522944501Smrg */ 2876424e9256Smrgvoid 287722944501Smrgdrm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 287822944501Smrg{ 287922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 288022944501Smrg 288122944501Smrg if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 288220131375Smrg bufmgr_gem->fenced_relocs = true; 288322944501Smrg} 288422944501Smrg 288522944501Smrg/** 288622944501Smrg * Return the additional aperture space required by the tree of buffer objects 288722944501Smrg * rooted at bo. 288822944501Smrg */ 288922944501Smrgstatic int 289022944501Smrgdrm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 289122944501Smrg{ 289222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 289322944501Smrg int i; 289422944501Smrg int total = 0; 289522944501Smrg 289622944501Smrg if (bo == NULL || bo_gem->included_in_check_aperture) 289722944501Smrg return 0; 289822944501Smrg 289922944501Smrg total += bo->size; 290020131375Smrg bo_gem->included_in_check_aperture = true; 290122944501Smrg 290222944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 290322944501Smrg total += 290422944501Smrg drm_intel_gem_bo_get_aperture_space(bo_gem-> 290522944501Smrg reloc_target_info[i].bo); 290622944501Smrg 290722944501Smrg return total; 290822944501Smrg} 290922944501Smrg 291022944501Smrg/** 291122944501Smrg * Count the number of buffers in this list that need a fence reg 291222944501Smrg * 291322944501Smrg * If the count is greater than the number of available regs, we'll have 291422944501Smrg * to ask the caller to resubmit a batch with fewer tiled buffers. 291522944501Smrg * 291622944501Smrg * This function over-counts if the same buffer is used multiple times. 291722944501Smrg */ 291822944501Smrgstatic unsigned int 291922944501Smrgdrm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 292022944501Smrg{ 292122944501Smrg int i; 292222944501Smrg unsigned int total = 0; 292322944501Smrg 292422944501Smrg for (i = 0; i < count; i++) { 292522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 292622944501Smrg 292722944501Smrg if (bo_gem == NULL) 292822944501Smrg continue; 292922944501Smrg 293022944501Smrg total += bo_gem->reloc_tree_fences; 293122944501Smrg } 293222944501Smrg return total; 293322944501Smrg} 293422944501Smrg 293522944501Smrg/** 293622944501Smrg * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 293722944501Smrg * for the next drm_intel_bufmgr_check_aperture_space() call. 293822944501Smrg */ 293922944501Smrgstatic void 294022944501Smrgdrm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 294122944501Smrg{ 294222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 294322944501Smrg int i; 294422944501Smrg 294522944501Smrg if (bo == NULL || !bo_gem->included_in_check_aperture) 294622944501Smrg return; 294722944501Smrg 294820131375Smrg bo_gem->included_in_check_aperture = false; 294922944501Smrg 295022944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 295122944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 295222944501Smrg reloc_target_info[i].bo); 295322944501Smrg} 295422944501Smrg 295522944501Smrg/** 295622944501Smrg * Return a conservative estimate for the amount of aperture required 295722944501Smrg * for a collection of buffers. This may double-count some buffers. 295822944501Smrg */ 295922944501Smrgstatic unsigned int 296022944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 296122944501Smrg{ 296222944501Smrg int i; 296322944501Smrg unsigned int total = 0; 296422944501Smrg 296522944501Smrg for (i = 0; i < count; i++) { 296622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 296722944501Smrg if (bo_gem != NULL) 296822944501Smrg total += bo_gem->reloc_tree_size; 296922944501Smrg } 297022944501Smrg return total; 297122944501Smrg} 297222944501Smrg 297322944501Smrg/** 297422944501Smrg * Return the amount of aperture needed for a collection of buffers. 297522944501Smrg * This avoids double counting any buffers, at the cost of looking 297622944501Smrg * at every buffer in the set. 297722944501Smrg */ 297822944501Smrgstatic unsigned int 297922944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 298022944501Smrg{ 298122944501Smrg int i; 298222944501Smrg unsigned int total = 0; 298322944501Smrg 298422944501Smrg for (i = 0; i < count; i++) { 298522944501Smrg total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 298622944501Smrg /* For the first buffer object in the array, we get an 298722944501Smrg * accurate count back for its reloc_tree size (since nothing 298822944501Smrg * had been flagged as being counted yet). We can save that 298922944501Smrg * value out as a more conservative reloc_tree_size that 299022944501Smrg * avoids double-counting target buffers. Since the first 299122944501Smrg * buffer happens to usually be the batch buffer in our 299222944501Smrg * callers, this can pull us back from doing the tree 299322944501Smrg * walk on every new batch emit. 299422944501Smrg */ 299522944501Smrg if (i == 0) { 299622944501Smrg drm_intel_bo_gem *bo_gem = 299722944501Smrg (drm_intel_bo_gem *) bo_array[i]; 299822944501Smrg bo_gem->reloc_tree_size = total; 299922944501Smrg } 300022944501Smrg } 300122944501Smrg 300222944501Smrg for (i = 0; i < count; i++) 300322944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 300422944501Smrg return total; 300522944501Smrg} 300622944501Smrg 300722944501Smrg/** 300822944501Smrg * Return -1 if the batchbuffer should be flushed before attempting to 300922944501Smrg * emit rendering referencing the buffers pointed to by bo_array. 301022944501Smrg * 301122944501Smrg * This is required because if we try to emit a batchbuffer with relocations 301222944501Smrg * to a tree of buffers that won't simultaneously fit in the aperture, 301322944501Smrg * the rendering will return an error at a point where the software is not 301422944501Smrg * prepared to recover from it. 301522944501Smrg * 301622944501Smrg * However, we also want to emit the batchbuffer significantly before we reach 301722944501Smrg * the limit, as a series of batchbuffers each of which references buffers 301822944501Smrg * covering almost all of the aperture means that at each emit we end up 301922944501Smrg * waiting to evict a buffer from the last rendering, and we get synchronous 302022944501Smrg * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 302122944501Smrg * get better parallelism. 302222944501Smrg */ 302322944501Smrgstatic int 302422944501Smrgdrm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 302522944501Smrg{ 302622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 302722944501Smrg (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 302822944501Smrg unsigned int total = 0; 302922944501Smrg unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 303022944501Smrg int total_fences; 303122944501Smrg 303222944501Smrg /* Check for fence reg constraints if necessary */ 303322944501Smrg if (bufmgr_gem->available_fences) { 303422944501Smrg total_fences = drm_intel_gem_total_fences(bo_array, count); 303522944501Smrg if (total_fences > bufmgr_gem->available_fences) 303622944501Smrg return -ENOSPC; 303722944501Smrg } 303822944501Smrg 303922944501Smrg total = drm_intel_gem_estimate_batch_space(bo_array, count); 304022944501Smrg 304122944501Smrg if (total > threshold) 304222944501Smrg total = drm_intel_gem_compute_batch_space(bo_array, count); 304322944501Smrg 304422944501Smrg if (total > threshold) { 304522944501Smrg DBG("check_space: overflowed available aperture, " 304622944501Smrg "%dkb vs %dkb\n", 304722944501Smrg total / 1024, (int)bufmgr_gem->gtt_size / 1024); 304822944501Smrg return -ENOSPC; 304922944501Smrg } else { 305022944501Smrg DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 305122944501Smrg (int)bufmgr_gem->gtt_size / 1024); 305222944501Smrg return 0; 305322944501Smrg } 305422944501Smrg} 305522944501Smrg 305622944501Smrg/* 305722944501Smrg * Disable buffer reuse for objects which are shared with the kernel 305822944501Smrg * as scanout buffers 305922944501Smrg */ 306022944501Smrgstatic int 306122944501Smrgdrm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 306222944501Smrg{ 306322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 306422944501Smrg 306520131375Smrg bo_gem->reusable = false; 306622944501Smrg return 0; 306722944501Smrg} 306822944501Smrg 3069aaba2545Smrgstatic int 3070aaba2545Smrgdrm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 3071aaba2545Smrg{ 3072aaba2545Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3073aaba2545Smrg 3074aaba2545Smrg return bo_gem->reusable; 3075aaba2545Smrg} 3076aaba2545Smrg 307722944501Smrgstatic int 307822944501Smrg_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 307922944501Smrg{ 308022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 308122944501Smrg int i; 308222944501Smrg 308322944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 308422944501Smrg if (bo_gem->reloc_target_info[i].bo == target_bo) 308522944501Smrg return 1; 3086aaba2545Smrg if (bo == bo_gem->reloc_target_info[i].bo) 3087aaba2545Smrg continue; 308822944501Smrg if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 308922944501Smrg target_bo)) 309022944501Smrg return 1; 309122944501Smrg } 309222944501Smrg 309322944501Smrg return 0; 309422944501Smrg} 309522944501Smrg 309622944501Smrg/** Return true if target_bo is referenced by bo's relocation tree. */ 309722944501Smrgstatic int 309822944501Smrgdrm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 309922944501Smrg{ 310022944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 310122944501Smrg 310222944501Smrg if (bo == NULL || target_bo == NULL) 310322944501Smrg return 0; 310422944501Smrg if (target_bo_gem->used_as_reloc_target) 310522944501Smrg return _drm_intel_gem_bo_references(bo, target_bo); 310622944501Smrg return 0; 310722944501Smrg} 310822944501Smrg 3109aaba2545Smrgstatic void 3110aaba2545Smrgadd_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 3111aaba2545Smrg{ 3112aaba2545Smrg unsigned int i = bufmgr_gem->num_buckets; 3113aaba2545Smrg 3114aaba2545Smrg assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 3115aaba2545Smrg 3116aaba2545Smrg DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 3117aaba2545Smrg bufmgr_gem->cache_bucket[i].size = size; 3118aaba2545Smrg bufmgr_gem->num_buckets++; 3119aaba2545Smrg} 3120aaba2545Smrg 3121aaba2545Smrgstatic void 3122aaba2545Smrginit_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 3123aaba2545Smrg{ 3124aaba2545Smrg unsigned long size, cache_max_size = 64 * 1024 * 1024; 3125aaba2545Smrg 3126aaba2545Smrg /* OK, so power of two buckets was too wasteful of memory. 3127aaba2545Smrg * Give 3 other sizes between each power of two, to hopefully 3128aaba2545Smrg * cover things accurately enough. (The alternative is 3129aaba2545Smrg * probably to just go for exact matching of sizes, and assume 3130aaba2545Smrg * that for things like composited window resize the tiled 3131aaba2545Smrg * width/height alignment and rounding of sizes to pages will 3132aaba2545Smrg * get us useful cache hit rates anyway) 3133aaba2545Smrg */ 3134aaba2545Smrg add_bucket(bufmgr_gem, 4096); 3135aaba2545Smrg add_bucket(bufmgr_gem, 4096 * 2); 3136aaba2545Smrg add_bucket(bufmgr_gem, 4096 * 3); 3137aaba2545Smrg 3138aaba2545Smrg /* Initialize the linked lists for BO reuse cache. */ 3139aaba2545Smrg for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 3140aaba2545Smrg add_bucket(bufmgr_gem, size); 3141aaba2545Smrg 3142aaba2545Smrg add_bucket(bufmgr_gem, size + size * 1 / 4); 3143aaba2545Smrg add_bucket(bufmgr_gem, size + size * 2 / 4); 3144aaba2545Smrg add_bucket(bufmgr_gem, size + size * 3 / 4); 3145aaba2545Smrg } 3146aaba2545Smrg} 3147aaba2545Smrg 3148424e9256Smrgvoid 314920131375Smrgdrm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 315020131375Smrg{ 315120131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 315220131375Smrg 315320131375Smrg bufmgr_gem->vma_max = limit; 315420131375Smrg 315520131375Smrg drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 315620131375Smrg} 315720131375Smrg 315820131375Smrg/** 315920131375Smrg * Get the PCI ID for the device. This can be overridden by setting the 316020131375Smrg * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 316120131375Smrg */ 316220131375Smrgstatic int 316320131375Smrgget_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 316420131375Smrg{ 316520131375Smrg char *devid_override; 3166424e9256Smrg int devid = 0; 316720131375Smrg int ret; 316820131375Smrg drm_i915_getparam_t gp; 316920131375Smrg 317020131375Smrg if (geteuid() == getuid()) { 317120131375Smrg devid_override = getenv("INTEL_DEVID_OVERRIDE"); 317220131375Smrg if (devid_override) { 317320131375Smrg bufmgr_gem->no_exec = true; 317420131375Smrg return strtod(devid_override, NULL); 317520131375Smrg } 317620131375Smrg } 317720131375Smrg 3178424e9256Smrg memclear(gp); 317920131375Smrg gp.param = I915_PARAM_CHIPSET_ID; 318020131375Smrg gp.value = &devid; 318120131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 318220131375Smrg if (ret) { 318320131375Smrg fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 318420131375Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 318520131375Smrg } 318620131375Smrg return devid; 318720131375Smrg} 318820131375Smrg 3189424e9256Smrgint 319020131375Smrgdrm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 319120131375Smrg{ 319220131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 319320131375Smrg 319420131375Smrg return bufmgr_gem->pci_device; 319520131375Smrg} 319620131375Smrg 319720131375Smrg/** 319820131375Smrg * Sets the AUB filename. 319920131375Smrg * 320020131375Smrg * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 320120131375Smrg * for it to have any effect. 320220131375Smrg */ 3203424e9256Smrgvoid 320420131375Smrgdrm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 320520131375Smrg const char *filename) 320620131375Smrg{ 320720131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 320820131375Smrg 320920131375Smrg free(bufmgr_gem->aub_filename); 321020131375Smrg if (filename) 321120131375Smrg bufmgr_gem->aub_filename = strdup(filename); 321220131375Smrg} 321320131375Smrg 321420131375Smrg/** 321520131375Smrg * Sets up AUB dumping. 321620131375Smrg * 321720131375Smrg * This is a trace file format that can be used with the simulator. 321820131375Smrg * Packets are emitted in a format somewhat like GPU command packets. 321920131375Smrg * You can set up a GTT and upload your objects into the referenced 322020131375Smrg * space, then send off batchbuffers and get BMPs out the other end. 322120131375Smrg */ 3222424e9256Smrgvoid 322320131375Smrgdrm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 322420131375Smrg{ 322520131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 322620131375Smrg int entry = 0x200003; 322720131375Smrg int i; 322820131375Smrg int gtt_size = 0x10000; 322920131375Smrg const char *filename; 323020131375Smrg 323120131375Smrg if (!enable) { 323220131375Smrg if (bufmgr_gem->aub_file) { 323320131375Smrg fclose(bufmgr_gem->aub_file); 323420131375Smrg bufmgr_gem->aub_file = NULL; 323520131375Smrg } 323620131375Smrg return; 323720131375Smrg } 323820131375Smrg 323920131375Smrg if (geteuid() != getuid()) 324020131375Smrg return; 324120131375Smrg 324220131375Smrg if (bufmgr_gem->aub_filename) 324320131375Smrg filename = bufmgr_gem->aub_filename; 324420131375Smrg else 324520131375Smrg filename = "intel.aub"; 324620131375Smrg bufmgr_gem->aub_file = fopen(filename, "w+"); 324720131375Smrg if (!bufmgr_gem->aub_file) 324820131375Smrg return; 324920131375Smrg 325020131375Smrg /* Start allocating objects from just after the GTT. */ 325120131375Smrg bufmgr_gem->aub_offset = gtt_size; 325220131375Smrg 325320131375Smrg /* Start with a (required) version packet. */ 325420131375Smrg aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); 325520131375Smrg aub_out(bufmgr_gem, 325620131375Smrg (4 << AUB_HEADER_MAJOR_SHIFT) | 325720131375Smrg (0 << AUB_HEADER_MINOR_SHIFT)); 325820131375Smrg for (i = 0; i < 8; i++) { 325920131375Smrg aub_out(bufmgr_gem, 0); /* app name */ 326020131375Smrg } 326120131375Smrg aub_out(bufmgr_gem, 0); /* timestamp */ 326220131375Smrg aub_out(bufmgr_gem, 0); /* timestamp */ 326320131375Smrg aub_out(bufmgr_gem, 0); /* comment len */ 326420131375Smrg 326520131375Smrg /* Set up the GTT. The max we can handle is 256M */ 326620131375Smrg aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 32673c748557Ssnj /* Need to use GTT_ENTRY type for recent emulator */ 32683c748557Ssnj aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT_ENTRY | 0 | AUB_TRACE_OP_DATA_WRITE); 326920131375Smrg aub_out(bufmgr_gem, 0); /* subtype */ 327020131375Smrg aub_out(bufmgr_gem, 0); /* offset */ 327120131375Smrg aub_out(bufmgr_gem, gtt_size); /* size */ 327220131375Smrg if (bufmgr_gem->gen >= 8) 327320131375Smrg aub_out(bufmgr_gem, 0); 327420131375Smrg for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { 327520131375Smrg aub_out(bufmgr_gem, entry); 327620131375Smrg } 327720131375Smrg} 327820131375Smrg 3279424e9256Smrgdrm_intel_context * 328020131375Smrgdrm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 328120131375Smrg{ 328220131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 328320131375Smrg struct drm_i915_gem_context_create create; 328420131375Smrg drm_intel_context *context = NULL; 328520131375Smrg int ret; 328620131375Smrg 328720131375Smrg context = calloc(1, sizeof(*context)); 328820131375Smrg if (!context) 328920131375Smrg return NULL; 329020131375Smrg 3291424e9256Smrg memclear(create); 329220131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 329320131375Smrg if (ret != 0) { 329420131375Smrg DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 329520131375Smrg strerror(errno)); 329620131375Smrg free(context); 329720131375Smrg return NULL; 329820131375Smrg } 329920131375Smrg 330020131375Smrg context->ctx_id = create.ctx_id; 330120131375Smrg context->bufmgr = bufmgr; 330220131375Smrg 330320131375Smrg return context; 330420131375Smrg} 330520131375Smrg 3306424e9256Smrgvoid 330720131375Smrgdrm_intel_gem_context_destroy(drm_intel_context *ctx) 330820131375Smrg{ 330920131375Smrg drm_intel_bufmgr_gem *bufmgr_gem; 331020131375Smrg struct drm_i915_gem_context_destroy destroy; 331120131375Smrg int ret; 331220131375Smrg 331320131375Smrg if (ctx == NULL) 331420131375Smrg return; 331520131375Smrg 3316424e9256Smrg memclear(destroy); 331720131375Smrg 331820131375Smrg bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 331920131375Smrg destroy.ctx_id = ctx->ctx_id; 332020131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 332120131375Smrg &destroy); 332220131375Smrg if (ret != 0) 332320131375Smrg fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 332420131375Smrg strerror(errno)); 332520131375Smrg 332620131375Smrg free(ctx); 332720131375Smrg} 332820131375Smrg 3329424e9256Smrgint 333020131375Smrgdrm_intel_get_reset_stats(drm_intel_context *ctx, 333120131375Smrg uint32_t *reset_count, 333220131375Smrg uint32_t *active, 333320131375Smrg uint32_t *pending) 333420131375Smrg{ 333520131375Smrg drm_intel_bufmgr_gem *bufmgr_gem; 333620131375Smrg struct drm_i915_reset_stats stats; 333720131375Smrg int ret; 333820131375Smrg 333920131375Smrg if (ctx == NULL) 334020131375Smrg return -EINVAL; 334120131375Smrg 3342424e9256Smrg memclear(stats); 334320131375Smrg 334420131375Smrg bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 334520131375Smrg stats.ctx_id = ctx->ctx_id; 334620131375Smrg ret = drmIoctl(bufmgr_gem->fd, 334720131375Smrg DRM_IOCTL_I915_GET_RESET_STATS, 334820131375Smrg &stats); 334920131375Smrg if (ret == 0) { 335020131375Smrg if (reset_count != NULL) 335120131375Smrg *reset_count = stats.reset_count; 335220131375Smrg 335320131375Smrg if (active != NULL) 335420131375Smrg *active = stats.batch_active; 335520131375Smrg 335620131375Smrg if (pending != NULL) 335720131375Smrg *pending = stats.batch_pending; 335820131375Smrg } 335920131375Smrg 336020131375Smrg return ret; 336120131375Smrg} 336220131375Smrg 3363424e9256Smrgint 336420131375Smrgdrm_intel_reg_read(drm_intel_bufmgr *bufmgr, 336520131375Smrg uint32_t offset, 336620131375Smrg uint64_t *result) 336720131375Smrg{ 336820131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 336920131375Smrg struct drm_i915_reg_read reg_read; 337020131375Smrg int ret; 337120131375Smrg 3372424e9256Smrg memclear(reg_read); 337320131375Smrg reg_read.offset = offset; 337420131375Smrg 337520131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 337620131375Smrg 337720131375Smrg *result = reg_read.val; 337820131375Smrg return ret; 337920131375Smrg} 338020131375Smrg 3381424e9256Smrgint 3382424e9256Smrgdrm_intel_get_subslice_total(int fd, unsigned int *subslice_total) 3383424e9256Smrg{ 3384424e9256Smrg drm_i915_getparam_t gp; 3385424e9256Smrg int ret; 3386424e9256Smrg 3387424e9256Smrg memclear(gp); 3388424e9256Smrg gp.value = (int*)subslice_total; 3389424e9256Smrg gp.param = I915_PARAM_SUBSLICE_TOTAL; 3390424e9256Smrg ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3391424e9256Smrg if (ret) 3392424e9256Smrg return -errno; 3393424e9256Smrg 3394424e9256Smrg return 0; 3395424e9256Smrg} 3396424e9256Smrg 3397424e9256Smrgint 3398424e9256Smrgdrm_intel_get_eu_total(int fd, unsigned int *eu_total) 3399424e9256Smrg{ 3400424e9256Smrg drm_i915_getparam_t gp; 3401424e9256Smrg int ret; 3402424e9256Smrg 3403424e9256Smrg memclear(gp); 3404424e9256Smrg gp.value = (int*)eu_total; 3405424e9256Smrg gp.param = I915_PARAM_EU_TOTAL; 3406424e9256Smrg ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3407424e9256Smrg if (ret) 3408424e9256Smrg return -errno; 3409424e9256Smrg 3410424e9256Smrg return 0; 3411424e9256Smrg} 341220131375Smrg 341320131375Smrg/** 341420131375Smrg * Annotate the given bo for use in aub dumping. 341520131375Smrg * 341620131375Smrg * \param annotations is an array of drm_intel_aub_annotation objects 341720131375Smrg * describing the type of data in various sections of the bo. Each 341820131375Smrg * element of the array specifies the type and subtype of a section of 341920131375Smrg * the bo, and the past-the-end offset of that section. The elements 342020131375Smrg * of \c annotations must be sorted so that ending_offset is 342120131375Smrg * increasing. 342220131375Smrg * 342320131375Smrg * \param count is the number of elements in the \c annotations array. 342420131375Smrg * If \c count is zero, then \c annotations will not be dereferenced. 342520131375Smrg * 342620131375Smrg * Annotations are copied into a private data structure, so caller may 342720131375Smrg * re-use the memory pointed to by \c annotations after the call 342820131375Smrg * returns. 342920131375Smrg * 343020131375Smrg * Annotations are stored for the lifetime of the bo; to reset to the 343120131375Smrg * default state (no annotations), call this function with a \c count 343220131375Smrg * of zero. 343320131375Smrg */ 3434424e9256Smrgvoid 343520131375Smrgdrm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 343620131375Smrg drm_intel_aub_annotation *annotations, 343720131375Smrg unsigned count) 343820131375Smrg{ 343920131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 344020131375Smrg unsigned size = sizeof(*annotations) * count; 344120131375Smrg drm_intel_aub_annotation *new_annotations = 344220131375Smrg count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL; 344320131375Smrg if (new_annotations == NULL) { 344420131375Smrg free(bo_gem->aub_annotations); 344520131375Smrg bo_gem->aub_annotations = NULL; 344620131375Smrg bo_gem->aub_annotation_count = 0; 344720131375Smrg return; 344820131375Smrg } 344920131375Smrg memcpy(new_annotations, annotations, size); 345020131375Smrg bo_gem->aub_annotations = new_annotations; 345120131375Smrg bo_gem->aub_annotation_count = count; 345220131375Smrg} 345320131375Smrg 3454a884aba1Smrgstatic pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; 3455a884aba1Smrgstatic drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list }; 3456a884aba1Smrg 3457a884aba1Smrgstatic drm_intel_bufmgr_gem * 3458a884aba1Smrgdrm_intel_bufmgr_gem_find(int fd) 3459a884aba1Smrg{ 3460a884aba1Smrg drm_intel_bufmgr_gem *bufmgr_gem; 3461a884aba1Smrg 3462a884aba1Smrg DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) { 3463a884aba1Smrg if (bufmgr_gem->fd == fd) { 3464a884aba1Smrg atomic_inc(&bufmgr_gem->refcount); 3465a884aba1Smrg return bufmgr_gem; 3466a884aba1Smrg } 3467a884aba1Smrg } 3468a884aba1Smrg 3469a884aba1Smrg return NULL; 3470a884aba1Smrg} 3471a884aba1Smrg 3472a884aba1Smrgstatic void 3473a884aba1Smrgdrm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) 3474a884aba1Smrg{ 3475a884aba1Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3476a884aba1Smrg 3477a884aba1Smrg if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) { 3478a884aba1Smrg pthread_mutex_lock(&bufmgr_list_mutex); 3479a884aba1Smrg 3480a884aba1Smrg if (atomic_dec_and_test(&bufmgr_gem->refcount)) { 3481a884aba1Smrg DRMLISTDEL(&bufmgr_gem->managers); 3482a884aba1Smrg drm_intel_bufmgr_gem_destroy(bufmgr); 3483a884aba1Smrg } 3484a884aba1Smrg 3485a884aba1Smrg pthread_mutex_unlock(&bufmgr_list_mutex); 3486a884aba1Smrg } 3487a884aba1Smrg} 3488a884aba1Smrg 348922944501Smrg/** 349022944501Smrg * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 349122944501Smrg * and manage map buffer objections. 349222944501Smrg * 349322944501Smrg * \param fd File descriptor of the opened DRM device. 349422944501Smrg */ 3495424e9256Smrgdrm_intel_bufmgr * 349622944501Smrgdrm_intel_bufmgr_gem_init(int fd, int batch_size) 349722944501Smrg{ 349822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem; 349922944501Smrg struct drm_i915_gem_get_aperture aperture; 350022944501Smrg drm_i915_getparam_t gp; 350120131375Smrg int ret, tmp; 350220131375Smrg bool exec2 = false; 350322944501Smrg 3504a884aba1Smrg pthread_mutex_lock(&bufmgr_list_mutex); 3505a884aba1Smrg 3506a884aba1Smrg bufmgr_gem = drm_intel_bufmgr_gem_find(fd); 3507a884aba1Smrg if (bufmgr_gem) 3508a884aba1Smrg goto exit; 3509a884aba1Smrg 351022944501Smrg bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 351122944501Smrg if (bufmgr_gem == NULL) 3512a884aba1Smrg goto exit; 351322944501Smrg 351422944501Smrg bufmgr_gem->fd = fd; 3515a884aba1Smrg atomic_set(&bufmgr_gem->refcount, 1); 351622944501Smrg 351722944501Smrg if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 351822944501Smrg free(bufmgr_gem); 3519a884aba1Smrg bufmgr_gem = NULL; 3520a884aba1Smrg goto exit; 352122944501Smrg } 352222944501Smrg 3523424e9256Smrg memclear(aperture); 35246d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 35256d98c517Smrg DRM_IOCTL_I915_GEM_GET_APERTURE, 35266d98c517Smrg &aperture); 352722944501Smrg 352822944501Smrg if (ret == 0) 352922944501Smrg bufmgr_gem->gtt_size = aperture.aper_available_size; 353022944501Smrg else { 353122944501Smrg fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 353222944501Smrg strerror(errno)); 353322944501Smrg bufmgr_gem->gtt_size = 128 * 1024 * 1024; 353422944501Smrg fprintf(stderr, "Assuming %dkB available aperture size.\n" 353522944501Smrg "May lead to reduced performance or incorrect " 353622944501Smrg "rendering.\n", 353722944501Smrg (int)bufmgr_gem->gtt_size / 1024); 353822944501Smrg } 353922944501Smrg 354020131375Smrg bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 354122944501Smrg 354220131375Smrg if (IS_GEN2(bufmgr_gem->pci_device)) 354322944501Smrg bufmgr_gem->gen = 2; 354420131375Smrg else if (IS_GEN3(bufmgr_gem->pci_device)) 354522944501Smrg bufmgr_gem->gen = 3; 354620131375Smrg else if (IS_GEN4(bufmgr_gem->pci_device)) 354722944501Smrg bufmgr_gem->gen = 4; 354820131375Smrg else if (IS_GEN5(bufmgr_gem->pci_device)) 354920131375Smrg bufmgr_gem->gen = 5; 355020131375Smrg else if (IS_GEN6(bufmgr_gem->pci_device)) 355122944501Smrg bufmgr_gem->gen = 6; 355220131375Smrg else if (IS_GEN7(bufmgr_gem->pci_device)) 355320131375Smrg bufmgr_gem->gen = 7; 355420131375Smrg else if (IS_GEN8(bufmgr_gem->pci_device)) 355520131375Smrg bufmgr_gem->gen = 8; 35563c748557Ssnj else if (IS_GEN9(bufmgr_gem->pci_device)) 35573c748557Ssnj bufmgr_gem->gen = 9; 355820131375Smrg else { 355920131375Smrg free(bufmgr_gem); 3560a884aba1Smrg bufmgr_gem = NULL; 3561a884aba1Smrg goto exit; 356220131375Smrg } 356320131375Smrg 356420131375Smrg if (IS_GEN3(bufmgr_gem->pci_device) && 356520131375Smrg bufmgr_gem->gtt_size > 256*1024*1024) { 356620131375Smrg /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 356720131375Smrg * be used for tiled blits. To simplify the accounting, just 356820131375Smrg * substract the unmappable part (fixed to 256MB on all known 356920131375Smrg * gen3 devices) if the kernel advertises it. */ 357020131375Smrg bufmgr_gem->gtt_size -= 256*1024*1024; 357120131375Smrg } 357220131375Smrg 3573424e9256Smrg memclear(gp); 357420131375Smrg gp.value = &tmp; 357522944501Smrg 357622944501Smrg gp.param = I915_PARAM_HAS_EXECBUF2; 35776d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 357822944501Smrg if (!ret) 357920131375Smrg exec2 = true; 358022944501Smrg 3581aaba2545Smrg gp.param = I915_PARAM_HAS_BSD; 35826d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 35839ce4edccSmrg bufmgr_gem->has_bsd = ret == 0; 35849ce4edccSmrg 35859ce4edccSmrg gp.param = I915_PARAM_HAS_BLT; 35869ce4edccSmrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 35879ce4edccSmrg bufmgr_gem->has_blt = ret == 0; 35889ce4edccSmrg 35899ce4edccSmrg gp.param = I915_PARAM_HAS_RELAXED_FENCING; 35909ce4edccSmrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 35919ce4edccSmrg bufmgr_gem->has_relaxed_fencing = ret == 0; 3592aaba2545Smrg 3593424e9256Smrg bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; 3594a884aba1Smrg 359520131375Smrg gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 359620131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 359720131375Smrg bufmgr_gem->has_wait_timeout = ret == 0; 359820131375Smrg 359920131375Smrg gp.param = I915_PARAM_HAS_LLC; 360020131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 360120131375Smrg if (ret != 0) { 360220131375Smrg /* Kernel does not supports HAS_LLC query, fallback to GPU 360320131375Smrg * generation detection and assume that we have LLC on GEN6/7 360420131375Smrg */ 360520131375Smrg bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 360620131375Smrg IS_GEN7(bufmgr_gem->pci_device)); 360720131375Smrg } else 360820131375Smrg bufmgr_gem->has_llc = *gp.value; 360920131375Smrg 361020131375Smrg gp.param = I915_PARAM_HAS_VEBOX; 361120131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 361220131375Smrg bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 361320131375Smrg 361422944501Smrg if (bufmgr_gem->gen < 4) { 361522944501Smrg gp.param = I915_PARAM_NUM_FENCES_AVAIL; 361622944501Smrg gp.value = &bufmgr_gem->available_fences; 36176d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 361822944501Smrg if (ret) { 361922944501Smrg fprintf(stderr, "get fences failed: %d [%d]\n", ret, 362022944501Smrg errno); 362122944501Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, 362222944501Smrg *gp.value); 362322944501Smrg bufmgr_gem->available_fences = 0; 362422944501Smrg } else { 362522944501Smrg /* XXX The kernel reports the total number of fences, 362622944501Smrg * including any that may be pinned. 362722944501Smrg * 362822944501Smrg * We presume that there will be at least one pinned 362922944501Smrg * fence for the scanout buffer, but there may be more 363022944501Smrg * than one scanout and the user may be manually 363122944501Smrg * pinning buffers. Let's move to execbuffer2 and 363222944501Smrg * thereby forget the insanity of using fences... 363322944501Smrg */ 363422944501Smrg bufmgr_gem->available_fences -= 2; 363522944501Smrg if (bufmgr_gem->available_fences < 0) 363622944501Smrg bufmgr_gem->available_fences = 0; 363722944501Smrg } 363822944501Smrg } 363922944501Smrg 364022944501Smrg /* Let's go with one relocation per every 2 dwords (but round down a bit 364122944501Smrg * since a power of two will mean an extra page allocation for the reloc 364222944501Smrg * buffer). 364322944501Smrg * 364422944501Smrg * Every 4 was too few for the blender benchmark. 364522944501Smrg */ 364622944501Smrg bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 364722944501Smrg 364822944501Smrg bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 364922944501Smrg bufmgr_gem->bufmgr.bo_alloc_for_render = 365022944501Smrg drm_intel_gem_bo_alloc_for_render; 365122944501Smrg bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 365222944501Smrg bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 365322944501Smrg bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 365422944501Smrg bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 365522944501Smrg bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 365622944501Smrg bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 365722944501Smrg bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 365822944501Smrg bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 365922944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 366022944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 366122944501Smrg bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 366222944501Smrg bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 366322944501Smrg bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 366422944501Smrg bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 366522944501Smrg bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 366622944501Smrg /* Use the new one if available */ 3667aaba2545Smrg if (exec2) { 366822944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 36699ce4edccSmrg bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3670aaba2545Smrg } else 367122944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 367222944501Smrg bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 367322944501Smrg bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 3674a884aba1Smrg bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref; 367522944501Smrg bufmgr_gem->bufmgr.debug = 0; 367622944501Smrg bufmgr_gem->bufmgr.check_aperture_space = 367722944501Smrg drm_intel_gem_check_aperture_space; 367822944501Smrg bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3679aaba2545Smrg bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 368022944501Smrg bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 368122944501Smrg drm_intel_gem_get_pipe_from_crtc_id; 368222944501Smrg bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 368322944501Smrg 368420131375Smrg DRMINITLISTHEAD(&bufmgr_gem->named); 3685aaba2545Smrg init_cache_buckets(bufmgr_gem); 368622944501Smrg 368720131375Smrg DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 368820131375Smrg bufmgr_gem->vma_max = -1; /* unlimited by default */ 368920131375Smrg 3690a884aba1Smrg DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); 3691a884aba1Smrg 3692a884aba1Smrgexit: 3693a884aba1Smrg pthread_mutex_unlock(&bufmgr_list_mutex); 3694a884aba1Smrg 3695a884aba1Smrg return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL; 369622944501Smrg} 3697