intel_bufmgr_gem.c revision aec75c42
122944501Smrg/************************************************************************** 222944501Smrg * 322944501Smrg * Copyright � 2007 Red Hat Inc. 420131375Smrg * Copyright � 2007-2012 Intel Corporation 522944501Smrg * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 622944501Smrg * All Rights Reserved. 722944501Smrg * 822944501Smrg * Permission is hereby granted, free of charge, to any person obtaining a 922944501Smrg * copy of this software and associated documentation files (the 1022944501Smrg * "Software"), to deal in the Software without restriction, including 1122944501Smrg * without limitation the rights to use, copy, modify, merge, publish, 1222944501Smrg * distribute, sub license, and/or sell copies of the Software, and to 1322944501Smrg * permit persons to whom the Software is furnished to do so, subject to 1422944501Smrg * the following conditions: 1522944501Smrg * 1622944501Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1722944501Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1822944501Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 1922944501Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 2022944501Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 2122944501Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2222944501Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2322944501Smrg * 2422944501Smrg * The above copyright notice and this permission notice (including the 2522944501Smrg * next paragraph) shall be included in all copies or substantial portions 2622944501Smrg * of the Software. 2722944501Smrg * 2822944501Smrg * 2922944501Smrg **************************************************************************/ 3022944501Smrg/* 3122944501Smrg * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 3222944501Smrg * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 3322944501Smrg * Eric Anholt <eric@anholt.net> 3422944501Smrg * Dave Airlie <airlied@linux.ie> 3522944501Smrg */ 3622944501Smrg 3722944501Smrg#ifdef HAVE_CONFIG_H 3822944501Smrg#include "config.h" 3922944501Smrg#endif 4022944501Smrg 4122944501Smrg#include <xf86drm.h> 4222944501Smrg#include <xf86atomic.h> 4322944501Smrg#include <fcntl.h> 4422944501Smrg#include <stdio.h> 4522944501Smrg#include <stdlib.h> 4622944501Smrg#include <string.h> 4722944501Smrg#include <unistd.h> 4822944501Smrg#include <assert.h> 4922944501Smrg#include <pthread.h> 502e6867f6Smrg#include <stddef.h> 5122944501Smrg#include <sys/ioctl.h> 5222944501Smrg#include <sys/mman.h> 5322944501Smrg#include <sys/stat.h> 5422944501Smrg#include <sys/types.h> 5520131375Smrg#include <stdbool.h> 5622944501Smrg 5722944501Smrg#include "errno.h" 5820131375Smrg#ifndef ETIME 5920131375Smrg#define ETIME ETIMEDOUT 6020131375Smrg#endif 6122944501Smrg#include "libdrm_lists.h" 6222944501Smrg#include "intel_bufmgr.h" 6322944501Smrg#include "intel_bufmgr_priv.h" 6422944501Smrg#include "intel_chipset.h" 6520131375Smrg#include "intel_aub.h" 6622944501Smrg#include "string.h" 6722944501Smrg 6822944501Smrg#include "i915_drm.h" 6922944501Smrg 7020131375Smrg#ifdef HAVE_VALGRIND 7120131375Smrg#include <valgrind.h> 7220131375Smrg#include <memcheck.h> 7320131375Smrg#define VG(x) x 7420131375Smrg#else 7520131375Smrg#define VG(x) 7620131375Smrg#endif 7720131375Smrg 7820131375Smrg#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) 7920131375Smrg 8022944501Smrg#define DBG(...) do { \ 8122944501Smrg if (bufmgr_gem->bufmgr.debug) \ 8222944501Smrg fprintf(stderr, __VA_ARGS__); \ 8322944501Smrg} while (0) 8422944501Smrg 85aaba2545Smrg#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 86aaba2545Smrg 8722944501Smrgtypedef struct _drm_intel_bo_gem drm_intel_bo_gem; 8822944501Smrg 8922944501Smrgstruct drm_intel_gem_bo_bucket { 9022944501Smrg drmMMListHead head; 9122944501Smrg unsigned long size; 9222944501Smrg}; 9322944501Smrg 9422944501Smrgtypedef struct _drm_intel_bufmgr_gem { 9522944501Smrg drm_intel_bufmgr bufmgr; 9622944501Smrg 9722944501Smrg int fd; 9822944501Smrg 9922944501Smrg int max_relocs; 10022944501Smrg 10122944501Smrg pthread_mutex_t lock; 10222944501Smrg 10322944501Smrg struct drm_i915_gem_exec_object *exec_objects; 10422944501Smrg struct drm_i915_gem_exec_object2 *exec2_objects; 10522944501Smrg drm_intel_bo **exec_bos; 10622944501Smrg int exec_size; 10722944501Smrg int exec_count; 10822944501Smrg 10922944501Smrg /** Array of lists of cached gem objects of power-of-two sizes */ 110aaba2545Smrg struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 111aaba2545Smrg int num_buckets; 1126d98c517Smrg time_t time; 11322944501Smrg 11420131375Smrg drmMMListHead named; 11520131375Smrg drmMMListHead vma_cache; 11620131375Smrg int vma_count, vma_open, vma_max; 11720131375Smrg 11822944501Smrg uint64_t gtt_size; 11922944501Smrg int available_fences; 12022944501Smrg int pci_device; 12122944501Smrg int gen; 1229ce4edccSmrg unsigned int has_bsd : 1; 1239ce4edccSmrg unsigned int has_blt : 1; 1249ce4edccSmrg unsigned int has_relaxed_fencing : 1; 12520131375Smrg unsigned int has_llc : 1; 12620131375Smrg unsigned int has_wait_timeout : 1; 1279ce4edccSmrg unsigned int bo_reuse : 1; 12820131375Smrg unsigned int no_exec : 1; 12920131375Smrg unsigned int has_vebox : 1; 13020131375Smrg bool fenced_relocs; 13120131375Smrg 13220131375Smrg char *aub_filename; 13320131375Smrg FILE *aub_file; 13420131375Smrg uint32_t aub_offset; 13522944501Smrg} drm_intel_bufmgr_gem; 13622944501Smrg 13722944501Smrg#define DRM_INTEL_RELOC_FENCE (1<<0) 13822944501Smrg 13922944501Smrgtypedef struct _drm_intel_reloc_target_info { 14022944501Smrg drm_intel_bo *bo; 14122944501Smrg int flags; 14222944501Smrg} drm_intel_reloc_target; 14322944501Smrg 14422944501Smrgstruct _drm_intel_bo_gem { 14522944501Smrg drm_intel_bo bo; 14622944501Smrg 14722944501Smrg atomic_t refcount; 14822944501Smrg uint32_t gem_handle; 14922944501Smrg const char *name; 15022944501Smrg 15122944501Smrg /** 15222944501Smrg * Kenel-assigned global name for this object 15320131375Smrg * 15420131375Smrg * List contains both flink named and prime fd'd objects 15522944501Smrg */ 15622944501Smrg unsigned int global_name; 15720131375Smrg drmMMListHead name_list; 15822944501Smrg 15922944501Smrg /** 16022944501Smrg * Index of the buffer within the validation list while preparing a 16122944501Smrg * batchbuffer execution. 16222944501Smrg */ 16322944501Smrg int validate_index; 16422944501Smrg 16522944501Smrg /** 16622944501Smrg * Current tiling mode 16722944501Smrg */ 16822944501Smrg uint32_t tiling_mode; 16922944501Smrg uint32_t swizzle_mode; 1706d98c517Smrg unsigned long stride; 17122944501Smrg 17222944501Smrg time_t free_time; 17322944501Smrg 17422944501Smrg /** Array passed to the DRM containing relocation information. */ 17522944501Smrg struct drm_i915_gem_relocation_entry *relocs; 17622944501Smrg /** 17722944501Smrg * Array of info structs corresponding to relocs[i].target_handle etc 17822944501Smrg */ 17922944501Smrg drm_intel_reloc_target *reloc_target_info; 18022944501Smrg /** Number of entries in relocs */ 18122944501Smrg int reloc_count; 18222944501Smrg /** Mapped address for the buffer, saved across map/unmap cycles */ 18322944501Smrg void *mem_virtual; 18422944501Smrg /** GTT virtual address for the buffer, saved across map/unmap cycles */ 18522944501Smrg void *gtt_virtual; 18620131375Smrg int map_count; 18720131375Smrg drmMMListHead vma_list; 18822944501Smrg 18922944501Smrg /** BO cache list */ 19022944501Smrg drmMMListHead head; 19122944501Smrg 19222944501Smrg /** 19322944501Smrg * Boolean of whether this BO and its children have been included in 19422944501Smrg * the current drm_intel_bufmgr_check_aperture_space() total. 19522944501Smrg */ 19620131375Smrg bool included_in_check_aperture; 19722944501Smrg 19822944501Smrg /** 19922944501Smrg * Boolean of whether this buffer has been used as a relocation 20022944501Smrg * target and had its size accounted for, and thus can't have any 20122944501Smrg * further relocations added to it. 20222944501Smrg */ 20320131375Smrg bool used_as_reloc_target; 20422944501Smrg 20522944501Smrg /** 20622944501Smrg * Boolean of whether we have encountered an error whilst building the relocation tree. 20722944501Smrg */ 20820131375Smrg bool has_error; 20922944501Smrg 21022944501Smrg /** 21122944501Smrg * Boolean of whether this buffer can be re-used 21222944501Smrg */ 21320131375Smrg bool reusable; 21420131375Smrg 21520131375Smrg /** 21620131375Smrg * Boolean of whether the GPU is definitely not accessing the buffer. 21720131375Smrg * 21820131375Smrg * This is only valid when reusable, since non-reusable 21920131375Smrg * buffers are those that have been shared wth other 22020131375Smrg * processes, so we don't know their state. 22120131375Smrg */ 22220131375Smrg bool idle; 22322944501Smrg 22422944501Smrg /** 22522944501Smrg * Size in bytes of this buffer and its relocation descendents. 22622944501Smrg * 22722944501Smrg * Used to avoid costly tree walking in 22822944501Smrg * drm_intel_bufmgr_check_aperture in the common case. 22922944501Smrg */ 23022944501Smrg int reloc_tree_size; 23122944501Smrg 23222944501Smrg /** 23322944501Smrg * Number of potential fence registers required by this buffer and its 23422944501Smrg * relocations. 23522944501Smrg */ 23622944501Smrg int reloc_tree_fences; 23720131375Smrg 23820131375Smrg /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ 23920131375Smrg bool mapped_cpu_write; 24020131375Smrg 24120131375Smrg uint32_t aub_offset; 24220131375Smrg 24320131375Smrg drm_intel_aub_annotation *aub_annotations; 24420131375Smrg unsigned aub_annotation_count; 24522944501Smrg}; 24622944501Smrg 24722944501Smrgstatic unsigned int 24822944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 24922944501Smrg 25022944501Smrgstatic unsigned int 25122944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 25222944501Smrg 25322944501Smrgstatic int 25422944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 25522944501Smrg uint32_t * swizzle_mode); 25622944501Smrg 25722944501Smrgstatic int 2586d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2596d98c517Smrg uint32_t tiling_mode, 2606d98c517Smrg uint32_t stride); 26122944501Smrg 26222944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 26322944501Smrg time_t time); 26422944501Smrg 26522944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 26622944501Smrg 26722944501Smrgstatic void drm_intel_gem_bo_free(drm_intel_bo *bo); 26822944501Smrg 26922944501Smrgstatic unsigned long 27022944501Smrgdrm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 27122944501Smrg uint32_t *tiling_mode) 27222944501Smrg{ 27322944501Smrg unsigned long min_size, max_size; 27422944501Smrg unsigned long i; 27522944501Smrg 27622944501Smrg if (*tiling_mode == I915_TILING_NONE) 27722944501Smrg return size; 27822944501Smrg 27922944501Smrg /* 965+ just need multiples of page size for tiling */ 28022944501Smrg if (bufmgr_gem->gen >= 4) 28122944501Smrg return ROUND_UP_TO(size, 4096); 28222944501Smrg 28322944501Smrg /* Older chips need powers of two, of at least 512k or 1M */ 28422944501Smrg if (bufmgr_gem->gen == 3) { 28522944501Smrg min_size = 1024*1024; 28622944501Smrg max_size = 128*1024*1024; 28722944501Smrg } else { 28822944501Smrg min_size = 512*1024; 28922944501Smrg max_size = 64*1024*1024; 29022944501Smrg } 29122944501Smrg 29222944501Smrg if (size > max_size) { 29322944501Smrg *tiling_mode = I915_TILING_NONE; 29422944501Smrg return size; 29522944501Smrg } 29622944501Smrg 2979ce4edccSmrg /* Do we need to allocate every page for the fence? */ 2989ce4edccSmrg if (bufmgr_gem->has_relaxed_fencing) 2999ce4edccSmrg return ROUND_UP_TO(size, 4096); 3009ce4edccSmrg 30122944501Smrg for (i = min_size; i < size; i <<= 1) 30222944501Smrg ; 30322944501Smrg 30422944501Smrg return i; 30522944501Smrg} 30622944501Smrg 30722944501Smrg/* 30822944501Smrg * Round a given pitch up to the minimum required for X tiling on a 30922944501Smrg * given chip. We use 512 as the minimum to allow for a later tiling 31022944501Smrg * change. 31122944501Smrg */ 31222944501Smrgstatic unsigned long 31322944501Smrgdrm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 3146d98c517Smrg unsigned long pitch, uint32_t *tiling_mode) 31522944501Smrg{ 31622944501Smrg unsigned long tile_width; 31722944501Smrg unsigned long i; 31822944501Smrg 31922944501Smrg /* If untiled, then just align it so that we can do rendering 32022944501Smrg * to it with the 3D engine. 32122944501Smrg */ 3226d98c517Smrg if (*tiling_mode == I915_TILING_NONE) 32322944501Smrg return ALIGN(pitch, 64); 32422944501Smrg 32520131375Smrg if (*tiling_mode == I915_TILING_X 32620131375Smrg || (IS_915(bufmgr_gem->pci_device) 32720131375Smrg && *tiling_mode == I915_TILING_Y)) 32822944501Smrg tile_width = 512; 32922944501Smrg else 33022944501Smrg tile_width = 128; 33122944501Smrg 33222944501Smrg /* 965 is flexible */ 33322944501Smrg if (bufmgr_gem->gen >= 4) 33422944501Smrg return ROUND_UP_TO(pitch, tile_width); 33522944501Smrg 3366d98c517Smrg /* The older hardware has a maximum pitch of 8192 with tiled 3376d98c517Smrg * surfaces, so fallback to untiled if it's too large. 3386d98c517Smrg */ 3396d98c517Smrg if (pitch > 8192) { 3406d98c517Smrg *tiling_mode = I915_TILING_NONE; 3416d98c517Smrg return ALIGN(pitch, 64); 3426d98c517Smrg } 3436d98c517Smrg 34422944501Smrg /* Pre-965 needs power of two tile width */ 34522944501Smrg for (i = tile_width; i < pitch; i <<= 1) 34622944501Smrg ; 34722944501Smrg 34822944501Smrg return i; 34922944501Smrg} 35022944501Smrg 35122944501Smrgstatic struct drm_intel_gem_bo_bucket * 35222944501Smrgdrm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 35322944501Smrg unsigned long size) 35422944501Smrg{ 35522944501Smrg int i; 35622944501Smrg 357aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 35822944501Smrg struct drm_intel_gem_bo_bucket *bucket = 35922944501Smrg &bufmgr_gem->cache_bucket[i]; 36022944501Smrg if (bucket->size >= size) { 36122944501Smrg return bucket; 36222944501Smrg } 36322944501Smrg } 36422944501Smrg 36522944501Smrg return NULL; 36622944501Smrg} 36722944501Smrg 36822944501Smrgstatic void 36922944501Smrgdrm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 37022944501Smrg{ 37122944501Smrg int i, j; 37222944501Smrg 37322944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 37422944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 37522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 37622944501Smrg 37722944501Smrg if (bo_gem->relocs == NULL) { 37822944501Smrg DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 37922944501Smrg bo_gem->name); 38022944501Smrg continue; 38122944501Smrg } 38222944501Smrg 38322944501Smrg for (j = 0; j < bo_gem->reloc_count; j++) { 38422944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 38522944501Smrg drm_intel_bo_gem *target_gem = 38622944501Smrg (drm_intel_bo_gem *) target_bo; 38722944501Smrg 38822944501Smrg DBG("%2d: %d (%s)@0x%08llx -> " 38922944501Smrg "%d (%s)@0x%08lx + 0x%08x\n", 39022944501Smrg i, 39122944501Smrg bo_gem->gem_handle, bo_gem->name, 39222944501Smrg (unsigned long long)bo_gem->relocs[j].offset, 39322944501Smrg target_gem->gem_handle, 39422944501Smrg target_gem->name, 39520131375Smrg target_bo->offset64, 39622944501Smrg bo_gem->relocs[j].delta); 39722944501Smrg } 39822944501Smrg } 39922944501Smrg} 40022944501Smrg 40122944501Smrgstatic inline void 40222944501Smrgdrm_intel_gem_bo_reference(drm_intel_bo *bo) 40322944501Smrg{ 40422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 40522944501Smrg 40622944501Smrg atomic_inc(&bo_gem->refcount); 40722944501Smrg} 40822944501Smrg 40922944501Smrg/** 41022944501Smrg * Adds the given buffer to the list of buffers to be validated (moved into the 41122944501Smrg * appropriate memory type) with the next batch submission. 41222944501Smrg * 41322944501Smrg * If a buffer is validated multiple times in a batch submission, it ends up 41422944501Smrg * with the intersection of the memory type flags and the union of the 41522944501Smrg * access flags. 41622944501Smrg */ 41722944501Smrgstatic void 41822944501Smrgdrm_intel_add_validate_buffer(drm_intel_bo *bo) 41922944501Smrg{ 42022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 42122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 42222944501Smrg int index; 42322944501Smrg 42422944501Smrg if (bo_gem->validate_index != -1) 42522944501Smrg return; 42622944501Smrg 42722944501Smrg /* Extend the array of validation entries as necessary. */ 42822944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 42922944501Smrg int new_size = bufmgr_gem->exec_size * 2; 43022944501Smrg 43122944501Smrg if (new_size == 0) 43222944501Smrg new_size = 5; 43322944501Smrg 43422944501Smrg bufmgr_gem->exec_objects = 43522944501Smrg realloc(bufmgr_gem->exec_objects, 43622944501Smrg sizeof(*bufmgr_gem->exec_objects) * new_size); 43722944501Smrg bufmgr_gem->exec_bos = 43822944501Smrg realloc(bufmgr_gem->exec_bos, 43922944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 44022944501Smrg bufmgr_gem->exec_size = new_size; 44122944501Smrg } 44222944501Smrg 44322944501Smrg index = bufmgr_gem->exec_count; 44422944501Smrg bo_gem->validate_index = index; 44522944501Smrg /* Fill in array entry */ 44622944501Smrg bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 44722944501Smrg bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 44822944501Smrg bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 44922944501Smrg bufmgr_gem->exec_objects[index].alignment = 0; 45022944501Smrg bufmgr_gem->exec_objects[index].offset = 0; 45122944501Smrg bufmgr_gem->exec_bos[index] = bo; 45222944501Smrg bufmgr_gem->exec_count++; 45322944501Smrg} 45422944501Smrg 45522944501Smrgstatic void 45622944501Smrgdrm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 45722944501Smrg{ 45822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 45922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 46022944501Smrg int index; 46122944501Smrg 46222944501Smrg if (bo_gem->validate_index != -1) { 46322944501Smrg if (need_fence) 46422944501Smrg bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 46522944501Smrg EXEC_OBJECT_NEEDS_FENCE; 46622944501Smrg return; 46722944501Smrg } 46822944501Smrg 46922944501Smrg /* Extend the array of validation entries as necessary. */ 47022944501Smrg if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 47122944501Smrg int new_size = bufmgr_gem->exec_size * 2; 47222944501Smrg 47322944501Smrg if (new_size == 0) 47422944501Smrg new_size = 5; 47522944501Smrg 47622944501Smrg bufmgr_gem->exec2_objects = 47722944501Smrg realloc(bufmgr_gem->exec2_objects, 47822944501Smrg sizeof(*bufmgr_gem->exec2_objects) * new_size); 47922944501Smrg bufmgr_gem->exec_bos = 48022944501Smrg realloc(bufmgr_gem->exec_bos, 48122944501Smrg sizeof(*bufmgr_gem->exec_bos) * new_size); 48222944501Smrg bufmgr_gem->exec_size = new_size; 48322944501Smrg } 48422944501Smrg 48522944501Smrg index = bufmgr_gem->exec_count; 48622944501Smrg bo_gem->validate_index = index; 48722944501Smrg /* Fill in array entry */ 48822944501Smrg bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 48922944501Smrg bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 49022944501Smrg bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 49122944501Smrg bufmgr_gem->exec2_objects[index].alignment = 0; 49222944501Smrg bufmgr_gem->exec2_objects[index].offset = 0; 49322944501Smrg bufmgr_gem->exec_bos[index] = bo; 49422944501Smrg bufmgr_gem->exec2_objects[index].flags = 0; 49522944501Smrg bufmgr_gem->exec2_objects[index].rsvd1 = 0; 49622944501Smrg bufmgr_gem->exec2_objects[index].rsvd2 = 0; 49722944501Smrg if (need_fence) { 49822944501Smrg bufmgr_gem->exec2_objects[index].flags |= 49922944501Smrg EXEC_OBJECT_NEEDS_FENCE; 50022944501Smrg } 50122944501Smrg bufmgr_gem->exec_count++; 50222944501Smrg} 50322944501Smrg 50422944501Smrg#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 50522944501Smrg sizeof(uint32_t)) 50622944501Smrg 50722944501Smrgstatic void 50822944501Smrgdrm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 50922944501Smrg drm_intel_bo_gem *bo_gem) 51022944501Smrg{ 51122944501Smrg int size; 51222944501Smrg 51322944501Smrg assert(!bo_gem->used_as_reloc_target); 51422944501Smrg 51522944501Smrg /* The older chipsets are far-less flexible in terms of tiling, 51622944501Smrg * and require tiled buffer to be size aligned in the aperture. 51722944501Smrg * This means that in the worst possible case we will need a hole 51822944501Smrg * twice as large as the object in order for it to fit into the 51922944501Smrg * aperture. Optimal packing is for wimps. 52022944501Smrg */ 52122944501Smrg size = bo_gem->bo.size; 5229ce4edccSmrg if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 5239ce4edccSmrg int min_size; 5249ce4edccSmrg 5259ce4edccSmrg if (bufmgr_gem->has_relaxed_fencing) { 5269ce4edccSmrg if (bufmgr_gem->gen == 3) 5279ce4edccSmrg min_size = 1024*1024; 5289ce4edccSmrg else 5299ce4edccSmrg min_size = 512*1024; 5309ce4edccSmrg 5319ce4edccSmrg while (min_size < size) 5329ce4edccSmrg min_size *= 2; 5339ce4edccSmrg } else 5349ce4edccSmrg min_size = size; 5359ce4edccSmrg 5369ce4edccSmrg /* Account for worst-case alignment. */ 5379ce4edccSmrg size = 2 * min_size; 5389ce4edccSmrg } 53922944501Smrg 54022944501Smrg bo_gem->reloc_tree_size = size; 54122944501Smrg} 54222944501Smrg 54322944501Smrgstatic int 54422944501Smrgdrm_intel_setup_reloc_list(drm_intel_bo *bo) 54522944501Smrg{ 54622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 54722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 54822944501Smrg unsigned int max_relocs = bufmgr_gem->max_relocs; 54922944501Smrg 55022944501Smrg if (bo->size / 4 < max_relocs) 55122944501Smrg max_relocs = bo->size / 4; 55222944501Smrg 55322944501Smrg bo_gem->relocs = malloc(max_relocs * 55422944501Smrg sizeof(struct drm_i915_gem_relocation_entry)); 55522944501Smrg bo_gem->reloc_target_info = malloc(max_relocs * 556aaba2545Smrg sizeof(drm_intel_reloc_target)); 55722944501Smrg if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 55820131375Smrg bo_gem->has_error = true; 55922944501Smrg 56022944501Smrg free (bo_gem->relocs); 56122944501Smrg bo_gem->relocs = NULL; 56222944501Smrg 56322944501Smrg free (bo_gem->reloc_target_info); 56422944501Smrg bo_gem->reloc_target_info = NULL; 56522944501Smrg 56622944501Smrg return 1; 56722944501Smrg } 56822944501Smrg 56922944501Smrg return 0; 57022944501Smrg} 57122944501Smrg 57222944501Smrgstatic int 57322944501Smrgdrm_intel_gem_bo_busy(drm_intel_bo *bo) 57422944501Smrg{ 57522944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 57622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 57722944501Smrg struct drm_i915_gem_busy busy; 57822944501Smrg int ret; 57922944501Smrg 58020131375Smrg if (bo_gem->reusable && bo_gem->idle) 58120131375Smrg return false; 58220131375Smrg 58320131375Smrg VG_CLEAR(busy); 58422944501Smrg busy.handle = bo_gem->gem_handle; 58522944501Smrg 5866d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 58720131375Smrg if (ret == 0) { 58820131375Smrg bo_gem->idle = !busy.busy; 58920131375Smrg return busy.busy; 59020131375Smrg } else { 59120131375Smrg return false; 59220131375Smrg } 59322944501Smrg return (ret == 0 && busy.busy); 59422944501Smrg} 59522944501Smrg 59622944501Smrgstatic int 59722944501Smrgdrm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 59822944501Smrg drm_intel_bo_gem *bo_gem, int state) 59922944501Smrg{ 60022944501Smrg struct drm_i915_gem_madvise madv; 60122944501Smrg 60220131375Smrg VG_CLEAR(madv); 60322944501Smrg madv.handle = bo_gem->gem_handle; 60422944501Smrg madv.madv = state; 60522944501Smrg madv.retained = 1; 6066d98c517Smrg drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 60722944501Smrg 60822944501Smrg return madv.retained; 60922944501Smrg} 61022944501Smrg 61122944501Smrgstatic int 61222944501Smrgdrm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 61322944501Smrg{ 61422944501Smrg return drm_intel_gem_bo_madvise_internal 61522944501Smrg ((drm_intel_bufmgr_gem *) bo->bufmgr, 61622944501Smrg (drm_intel_bo_gem *) bo, 61722944501Smrg madv); 61822944501Smrg} 61922944501Smrg 62022944501Smrg/* drop the oldest entries that have been purged by the kernel */ 62122944501Smrgstatic void 62222944501Smrgdrm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 62322944501Smrg struct drm_intel_gem_bo_bucket *bucket) 62422944501Smrg{ 62522944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 62622944501Smrg drm_intel_bo_gem *bo_gem; 62722944501Smrg 62822944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 62922944501Smrg bucket->head.next, head); 63022944501Smrg if (drm_intel_gem_bo_madvise_internal 63122944501Smrg (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 63222944501Smrg break; 63322944501Smrg 63422944501Smrg DRMLISTDEL(&bo_gem->head); 63522944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 63622944501Smrg } 63722944501Smrg} 63822944501Smrg 63922944501Smrgstatic drm_intel_bo * 64022944501Smrgdrm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 64122944501Smrg const char *name, 64222944501Smrg unsigned long size, 6436d98c517Smrg unsigned long flags, 6446d98c517Smrg uint32_t tiling_mode, 6456d98c517Smrg unsigned long stride) 64622944501Smrg{ 64722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 64822944501Smrg drm_intel_bo_gem *bo_gem; 64922944501Smrg unsigned int page_size = getpagesize(); 65022944501Smrg int ret; 65122944501Smrg struct drm_intel_gem_bo_bucket *bucket; 65220131375Smrg bool alloc_from_cache; 65322944501Smrg unsigned long bo_size; 65420131375Smrg bool for_render = false; 65522944501Smrg 65622944501Smrg if (flags & BO_ALLOC_FOR_RENDER) 65720131375Smrg for_render = true; 65822944501Smrg 65922944501Smrg /* Round the allocated size up to a power of two number of pages. */ 66022944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 66122944501Smrg 66222944501Smrg /* If we don't have caching at this size, don't actually round the 66322944501Smrg * allocation up. 66422944501Smrg */ 66522944501Smrg if (bucket == NULL) { 66622944501Smrg bo_size = size; 66722944501Smrg if (bo_size < page_size) 66822944501Smrg bo_size = page_size; 66922944501Smrg } else { 67022944501Smrg bo_size = bucket->size; 67122944501Smrg } 67222944501Smrg 67322944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 67422944501Smrg /* Get a buffer out of the cache if available */ 67522944501Smrgretry: 67620131375Smrg alloc_from_cache = false; 67722944501Smrg if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 67822944501Smrg if (for_render) { 67922944501Smrg /* Allocate new render-target BOs from the tail (MRU) 68022944501Smrg * of the list, as it will likely be hot in the GPU 68122944501Smrg * cache and in the aperture for us. 68222944501Smrg */ 68322944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 68422944501Smrg bucket->head.prev, head); 68522944501Smrg DRMLISTDEL(&bo_gem->head); 68620131375Smrg alloc_from_cache = true; 68722944501Smrg } else { 68822944501Smrg /* For non-render-target BOs (where we're probably 68922944501Smrg * going to map it first thing in order to fill it 69022944501Smrg * with data), check if the last BO in the cache is 69122944501Smrg * unbusy, and only reuse in that case. Otherwise, 69222944501Smrg * allocating a new buffer is probably faster than 69322944501Smrg * waiting for the GPU to finish. 69422944501Smrg */ 69522944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 69622944501Smrg bucket->head.next, head); 69722944501Smrg if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 69820131375Smrg alloc_from_cache = true; 69922944501Smrg DRMLISTDEL(&bo_gem->head); 70022944501Smrg } 70122944501Smrg } 70222944501Smrg 70322944501Smrg if (alloc_from_cache) { 70422944501Smrg if (!drm_intel_gem_bo_madvise_internal 70522944501Smrg (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 70622944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 70722944501Smrg drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 70822944501Smrg bucket); 70922944501Smrg goto retry; 71022944501Smrg } 7116d98c517Smrg 7126d98c517Smrg if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 7136d98c517Smrg tiling_mode, 7146d98c517Smrg stride)) { 7156d98c517Smrg drm_intel_gem_bo_free(&bo_gem->bo); 7166d98c517Smrg goto retry; 7176d98c517Smrg } 71822944501Smrg } 71922944501Smrg } 72022944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 72122944501Smrg 72222944501Smrg if (!alloc_from_cache) { 72322944501Smrg struct drm_i915_gem_create create; 72422944501Smrg 72522944501Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 72622944501Smrg if (!bo_gem) 72722944501Smrg return NULL; 72822944501Smrg 72922944501Smrg bo_gem->bo.size = bo_size; 73020131375Smrg 73120131375Smrg VG_CLEAR(create); 73222944501Smrg create.size = bo_size; 73322944501Smrg 7346d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 7356d98c517Smrg DRM_IOCTL_I915_GEM_CREATE, 7366d98c517Smrg &create); 73722944501Smrg bo_gem->gem_handle = create.handle; 73822944501Smrg bo_gem->bo.handle = bo_gem->gem_handle; 73922944501Smrg if (ret != 0) { 74022944501Smrg free(bo_gem); 74122944501Smrg return NULL; 74222944501Smrg } 74322944501Smrg bo_gem->bo.bufmgr = bufmgr; 7446d98c517Smrg 7456d98c517Smrg bo_gem->tiling_mode = I915_TILING_NONE; 7466d98c517Smrg bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 7476d98c517Smrg bo_gem->stride = 0; 7486d98c517Smrg 7496d98c517Smrg if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 7506d98c517Smrg tiling_mode, 7516d98c517Smrg stride)) { 7526d98c517Smrg drm_intel_gem_bo_free(&bo_gem->bo); 7536d98c517Smrg return NULL; 7546d98c517Smrg } 75520131375Smrg 75620131375Smrg DRMINITLISTHEAD(&bo_gem->name_list); 75720131375Smrg DRMINITLISTHEAD(&bo_gem->vma_list); 75822944501Smrg } 75922944501Smrg 76022944501Smrg bo_gem->name = name; 76122944501Smrg atomic_set(&bo_gem->refcount, 1); 76222944501Smrg bo_gem->validate_index = -1; 76322944501Smrg bo_gem->reloc_tree_fences = 0; 76420131375Smrg bo_gem->used_as_reloc_target = false; 76520131375Smrg bo_gem->has_error = false; 76620131375Smrg bo_gem->reusable = true; 76720131375Smrg bo_gem->aub_annotations = NULL; 76820131375Smrg bo_gem->aub_annotation_count = 0; 76922944501Smrg 77022944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 77122944501Smrg 77222944501Smrg DBG("bo_create: buf %d (%s) %ldb\n", 77322944501Smrg bo_gem->gem_handle, bo_gem->name, size); 77422944501Smrg 77522944501Smrg return &bo_gem->bo; 77622944501Smrg} 77722944501Smrg 77822944501Smrgstatic drm_intel_bo * 77922944501Smrgdrm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 78022944501Smrg const char *name, 78122944501Smrg unsigned long size, 78222944501Smrg unsigned int alignment) 78322944501Smrg{ 78422944501Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 7856d98c517Smrg BO_ALLOC_FOR_RENDER, 7866d98c517Smrg I915_TILING_NONE, 0); 78722944501Smrg} 78822944501Smrg 78922944501Smrgstatic drm_intel_bo * 79022944501Smrgdrm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 79122944501Smrg const char *name, 79222944501Smrg unsigned long size, 79322944501Smrg unsigned int alignment) 79422944501Smrg{ 7956d98c517Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 7966d98c517Smrg I915_TILING_NONE, 0); 79722944501Smrg} 79822944501Smrg 79922944501Smrgstatic drm_intel_bo * 80022944501Smrgdrm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 80122944501Smrg int x, int y, int cpp, uint32_t *tiling_mode, 80222944501Smrg unsigned long *pitch, unsigned long flags) 80322944501Smrg{ 80422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 805aaba2545Smrg unsigned long size, stride; 806aaba2545Smrg uint32_t tiling; 80722944501Smrg 808aaba2545Smrg do { 80920131375Smrg unsigned long aligned_y, height_alignment; 810aaba2545Smrg 811aaba2545Smrg tiling = *tiling_mode; 812aaba2545Smrg 813aaba2545Smrg /* If we're tiled, our allocations are in 8 or 32-row blocks, 814aaba2545Smrg * so failure to align our height means that we won't allocate 815aaba2545Smrg * enough pages. 816aaba2545Smrg * 817aaba2545Smrg * If we're untiled, we still have to align to 2 rows high 818aaba2545Smrg * because the data port accesses 2x2 blocks even if the 819aaba2545Smrg * bottom row isn't to be rendered, so failure to align means 820aaba2545Smrg * we could walk off the end of the GTT and fault. This is 821aaba2545Smrg * documented on 965, and may be the case on older chipsets 822aaba2545Smrg * too so we try to be careful. 823aaba2545Smrg */ 824aaba2545Smrg aligned_y = y; 82520131375Smrg height_alignment = 2; 82620131375Smrg 82720131375Smrg if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 82820131375Smrg height_alignment = 16; 82920131375Smrg else if (tiling == I915_TILING_X 83020131375Smrg || (IS_915(bufmgr_gem->pci_device) 83120131375Smrg && tiling == I915_TILING_Y)) 83220131375Smrg height_alignment = 8; 833aaba2545Smrg else if (tiling == I915_TILING_Y) 83420131375Smrg height_alignment = 32; 83520131375Smrg aligned_y = ALIGN(y, height_alignment); 836aaba2545Smrg 837aaba2545Smrg stride = x * cpp; 8386d98c517Smrg stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 839aaba2545Smrg size = stride * aligned_y; 840aaba2545Smrg size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 841aaba2545Smrg } while (*tiling_mode != tiling); 84222944501Smrg *pitch = stride; 84322944501Smrg 8446d98c517Smrg if (tiling == I915_TILING_NONE) 8456d98c517Smrg stride = 0; 8466d98c517Smrg 8476d98c517Smrg return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 8486d98c517Smrg tiling, stride); 84922944501Smrg} 85022944501Smrg 85122944501Smrg/** 85222944501Smrg * Returns a drm_intel_bo wrapping the given buffer object handle. 85322944501Smrg * 85422944501Smrg * This can be used when one application needs to pass a buffer object 85522944501Smrg * to another. 85622944501Smrg */ 85722944501Smrgdrm_intel_bo * 85822944501Smrgdrm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 85922944501Smrg const char *name, 86022944501Smrg unsigned int handle) 86122944501Smrg{ 86222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 86322944501Smrg drm_intel_bo_gem *bo_gem; 86422944501Smrg int ret; 86522944501Smrg struct drm_gem_open open_arg; 86622944501Smrg struct drm_i915_gem_get_tiling get_tiling; 86720131375Smrg drmMMListHead *list; 86822944501Smrg 86920131375Smrg /* At the moment most applications only have a few named bo. 87020131375Smrg * For instance, in a DRI client only the render buffers passed 87120131375Smrg * between X and the client are named. And since X returns the 87220131375Smrg * alternating names for the front/back buffer a linear search 87320131375Smrg * provides a sufficiently fast match. 87420131375Smrg */ 87520131375Smrg for (list = bufmgr_gem->named.next; 87620131375Smrg list != &bufmgr_gem->named; 87720131375Smrg list = list->next) { 87820131375Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 87920131375Smrg if (bo_gem->global_name == handle) { 88020131375Smrg drm_intel_gem_bo_reference(&bo_gem->bo); 88120131375Smrg return &bo_gem->bo; 88220131375Smrg } 88320131375Smrg } 88422944501Smrg 88520131375Smrg VG_CLEAR(open_arg); 88622944501Smrg open_arg.name = handle; 8876d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 8886d98c517Smrg DRM_IOCTL_GEM_OPEN, 8896d98c517Smrg &open_arg); 89022944501Smrg if (ret != 0) { 8919ce4edccSmrg DBG("Couldn't reference %s handle 0x%08x: %s\n", 8929ce4edccSmrg name, handle, strerror(errno)); 89322944501Smrg return NULL; 89422944501Smrg } 89520131375Smrg /* Now see if someone has used a prime handle to get this 89620131375Smrg * object from the kernel before by looking through the list 89720131375Smrg * again for a matching gem_handle 89820131375Smrg */ 89920131375Smrg for (list = bufmgr_gem->named.next; 90020131375Smrg list != &bufmgr_gem->named; 90120131375Smrg list = list->next) { 90220131375Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 90320131375Smrg if (bo_gem->gem_handle == open_arg.handle) { 90420131375Smrg drm_intel_gem_bo_reference(&bo_gem->bo); 90520131375Smrg return &bo_gem->bo; 90620131375Smrg } 90720131375Smrg } 90820131375Smrg 90920131375Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 91020131375Smrg if (!bo_gem) 91120131375Smrg return NULL; 91220131375Smrg 91322944501Smrg bo_gem->bo.size = open_arg.size; 91422944501Smrg bo_gem->bo.offset = 0; 91520131375Smrg bo_gem->bo.offset64 = 0; 91622944501Smrg bo_gem->bo.virtual = NULL; 91722944501Smrg bo_gem->bo.bufmgr = bufmgr; 91822944501Smrg bo_gem->name = name; 91922944501Smrg atomic_set(&bo_gem->refcount, 1); 92022944501Smrg bo_gem->validate_index = -1; 92122944501Smrg bo_gem->gem_handle = open_arg.handle; 92220131375Smrg bo_gem->bo.handle = open_arg.handle; 92322944501Smrg bo_gem->global_name = handle; 92420131375Smrg bo_gem->reusable = false; 92522944501Smrg 92620131375Smrg VG_CLEAR(get_tiling); 92722944501Smrg get_tiling.handle = bo_gem->gem_handle; 9286d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 9296d98c517Smrg DRM_IOCTL_I915_GEM_GET_TILING, 9306d98c517Smrg &get_tiling); 93122944501Smrg if (ret != 0) { 93222944501Smrg drm_intel_gem_bo_unreference(&bo_gem->bo); 93322944501Smrg return NULL; 93422944501Smrg } 93522944501Smrg bo_gem->tiling_mode = get_tiling.tiling_mode; 93622944501Smrg bo_gem->swizzle_mode = get_tiling.swizzle_mode; 9376d98c517Smrg /* XXX stride is unknown */ 93822944501Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 93922944501Smrg 94020131375Smrg DRMINITLISTHEAD(&bo_gem->vma_list); 94120131375Smrg DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 94222944501Smrg DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 94322944501Smrg 94422944501Smrg return &bo_gem->bo; 94522944501Smrg} 94622944501Smrg 94722944501Smrgstatic void 94822944501Smrgdrm_intel_gem_bo_free(drm_intel_bo *bo) 94922944501Smrg{ 95022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 95122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 95222944501Smrg struct drm_gem_close close; 95322944501Smrg int ret; 95422944501Smrg 95520131375Smrg DRMLISTDEL(&bo_gem->vma_list); 95620131375Smrg if (bo_gem->mem_virtual) { 95720131375Smrg VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 95822944501Smrg munmap(bo_gem->mem_virtual, bo_gem->bo.size); 95920131375Smrg bufmgr_gem->vma_count--; 96020131375Smrg } 96120131375Smrg if (bo_gem->gtt_virtual) { 96222944501Smrg munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 96320131375Smrg bufmgr_gem->vma_count--; 96420131375Smrg } 96522944501Smrg 96622944501Smrg /* Close this object */ 96720131375Smrg VG_CLEAR(close); 96822944501Smrg close.handle = bo_gem->gem_handle; 9696d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 97022944501Smrg if (ret != 0) { 9719ce4edccSmrg DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 9729ce4edccSmrg bo_gem->gem_handle, bo_gem->name, strerror(errno)); 97322944501Smrg } 97420131375Smrg free(bo_gem->aub_annotations); 97522944501Smrg free(bo); 97622944501Smrg} 97722944501Smrg 97820131375Smrgstatic void 97920131375Smrgdrm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 98020131375Smrg{ 98120131375Smrg#if HAVE_VALGRIND 98220131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 98320131375Smrg 98420131375Smrg if (bo_gem->mem_virtual) 98520131375Smrg VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 98620131375Smrg 98720131375Smrg if (bo_gem->gtt_virtual) 98820131375Smrg VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 98920131375Smrg#endif 99020131375Smrg} 99120131375Smrg 99222944501Smrg/** Frees all cached buffers significantly older than @time. */ 99322944501Smrgstatic void 99422944501Smrgdrm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 99522944501Smrg{ 99622944501Smrg int i; 99722944501Smrg 9986d98c517Smrg if (bufmgr_gem->time == time) 9996d98c517Smrg return; 10006d98c517Smrg 1001aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 100222944501Smrg struct drm_intel_gem_bo_bucket *bucket = 100322944501Smrg &bufmgr_gem->cache_bucket[i]; 100422944501Smrg 100522944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 100622944501Smrg drm_intel_bo_gem *bo_gem; 100722944501Smrg 100822944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 100922944501Smrg bucket->head.next, head); 101022944501Smrg if (time - bo_gem->free_time <= 1) 101122944501Smrg break; 101222944501Smrg 101322944501Smrg DRMLISTDEL(&bo_gem->head); 101422944501Smrg 101522944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 101622944501Smrg } 101722944501Smrg } 10186d98c517Smrg 10196d98c517Smrg bufmgr_gem->time = time; 102022944501Smrg} 102122944501Smrg 102220131375Smrgstatic void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 102320131375Smrg{ 102420131375Smrg int limit; 102520131375Smrg 102620131375Smrg DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 102720131375Smrg bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 102820131375Smrg 102920131375Smrg if (bufmgr_gem->vma_max < 0) 103020131375Smrg return; 103120131375Smrg 103220131375Smrg /* We may need to evict a few entries in order to create new mmaps */ 103320131375Smrg limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 103420131375Smrg if (limit < 0) 103520131375Smrg limit = 0; 103620131375Smrg 103720131375Smrg while (bufmgr_gem->vma_count > limit) { 103820131375Smrg drm_intel_bo_gem *bo_gem; 103920131375Smrg 104020131375Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 104120131375Smrg bufmgr_gem->vma_cache.next, 104220131375Smrg vma_list); 104320131375Smrg assert(bo_gem->map_count == 0); 104420131375Smrg DRMLISTDELINIT(&bo_gem->vma_list); 104520131375Smrg 104620131375Smrg if (bo_gem->mem_virtual) { 104720131375Smrg munmap(bo_gem->mem_virtual, bo_gem->bo.size); 104820131375Smrg bo_gem->mem_virtual = NULL; 104920131375Smrg bufmgr_gem->vma_count--; 105020131375Smrg } 105120131375Smrg if (bo_gem->gtt_virtual) { 105220131375Smrg munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 105320131375Smrg bo_gem->gtt_virtual = NULL; 105420131375Smrg bufmgr_gem->vma_count--; 105520131375Smrg } 105620131375Smrg } 105720131375Smrg} 105820131375Smrg 105920131375Smrgstatic void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 106020131375Smrg drm_intel_bo_gem *bo_gem) 106120131375Smrg{ 106220131375Smrg bufmgr_gem->vma_open--; 106320131375Smrg DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 106420131375Smrg if (bo_gem->mem_virtual) 106520131375Smrg bufmgr_gem->vma_count++; 106620131375Smrg if (bo_gem->gtt_virtual) 106720131375Smrg bufmgr_gem->vma_count++; 106820131375Smrg drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 106920131375Smrg} 107020131375Smrg 107120131375Smrgstatic void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 107220131375Smrg drm_intel_bo_gem *bo_gem) 107320131375Smrg{ 107420131375Smrg bufmgr_gem->vma_open++; 107520131375Smrg DRMLISTDEL(&bo_gem->vma_list); 107620131375Smrg if (bo_gem->mem_virtual) 107720131375Smrg bufmgr_gem->vma_count--; 107820131375Smrg if (bo_gem->gtt_virtual) 107920131375Smrg bufmgr_gem->vma_count--; 108020131375Smrg drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 108120131375Smrg} 108220131375Smrg 108322944501Smrgstatic void 108422944501Smrgdrm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 108522944501Smrg{ 108622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 108722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 108822944501Smrg struct drm_intel_gem_bo_bucket *bucket; 108922944501Smrg int i; 109022944501Smrg 109122944501Smrg /* Unreference all the target buffers */ 109222944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 1093aaba2545Smrg if (bo_gem->reloc_target_info[i].bo != bo) { 1094aaba2545Smrg drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1095aaba2545Smrg reloc_target_info[i].bo, 1096aaba2545Smrg time); 1097aaba2545Smrg } 109822944501Smrg } 109922944501Smrg bo_gem->reloc_count = 0; 110020131375Smrg bo_gem->used_as_reloc_target = false; 110122944501Smrg 110222944501Smrg DBG("bo_unreference final: %d (%s)\n", 110322944501Smrg bo_gem->gem_handle, bo_gem->name); 110422944501Smrg 110522944501Smrg /* release memory associated with this object */ 110622944501Smrg if (bo_gem->reloc_target_info) { 110722944501Smrg free(bo_gem->reloc_target_info); 110822944501Smrg bo_gem->reloc_target_info = NULL; 110922944501Smrg } 111022944501Smrg if (bo_gem->relocs) { 111122944501Smrg free(bo_gem->relocs); 111222944501Smrg bo_gem->relocs = NULL; 111322944501Smrg } 111422944501Smrg 111520131375Smrg /* Clear any left-over mappings */ 111620131375Smrg if (bo_gem->map_count) { 111720131375Smrg DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 111820131375Smrg bo_gem->map_count = 0; 111920131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 112020131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 112120131375Smrg } 112220131375Smrg 112320131375Smrg DRMLISTDEL(&bo_gem->name_list); 112420131375Smrg 112522944501Smrg bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 112622944501Smrg /* Put the buffer into our internal cache for reuse if we can. */ 112722944501Smrg if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 112822944501Smrg drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 112922944501Smrg I915_MADV_DONTNEED)) { 113022944501Smrg bo_gem->free_time = time; 113122944501Smrg 113222944501Smrg bo_gem->name = NULL; 113322944501Smrg bo_gem->validate_index = -1; 113422944501Smrg 113522944501Smrg DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 113622944501Smrg } else { 113722944501Smrg drm_intel_gem_bo_free(bo); 113822944501Smrg } 113922944501Smrg} 114022944501Smrg 114122944501Smrgstatic void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 114222944501Smrg time_t time) 114322944501Smrg{ 114422944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 114522944501Smrg 114622944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 114722944501Smrg if (atomic_dec_and_test(&bo_gem->refcount)) 114822944501Smrg drm_intel_gem_bo_unreference_final(bo, time); 114922944501Smrg} 115022944501Smrg 115122944501Smrgstatic void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 115222944501Smrg{ 115322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 115422944501Smrg 115522944501Smrg assert(atomic_read(&bo_gem->refcount) > 0); 115622944501Smrg if (atomic_dec_and_test(&bo_gem->refcount)) { 115722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 115822944501Smrg (drm_intel_bufmgr_gem *) bo->bufmgr; 115922944501Smrg struct timespec time; 116022944501Smrg 116122944501Smrg clock_gettime(CLOCK_MONOTONIC, &time); 116222944501Smrg 116322944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 116422944501Smrg drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 11656d98c517Smrg drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 116622944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 116722944501Smrg } 116822944501Smrg} 116922944501Smrg 117022944501Smrgstatic int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 117122944501Smrg{ 117222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 117322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 117422944501Smrg struct drm_i915_gem_set_domain set_domain; 117522944501Smrg int ret; 117622944501Smrg 117722944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 117822944501Smrg 117920131375Smrg if (bo_gem->map_count++ == 0) 118020131375Smrg drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 118120131375Smrg 118222944501Smrg if (!bo_gem->mem_virtual) { 118322944501Smrg struct drm_i915_gem_mmap mmap_arg; 118422944501Smrg 118520131375Smrg DBG("bo_map: %d (%s), map_count=%d\n", 118620131375Smrg bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 118722944501Smrg 118820131375Smrg VG_CLEAR(mmap_arg); 118922944501Smrg mmap_arg.handle = bo_gem->gem_handle; 119022944501Smrg mmap_arg.offset = 0; 119122944501Smrg mmap_arg.size = bo->size; 11926d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 11936d98c517Smrg DRM_IOCTL_I915_GEM_MMAP, 11946d98c517Smrg &mmap_arg); 119522944501Smrg if (ret != 0) { 119622944501Smrg ret = -errno; 11979ce4edccSmrg DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 11989ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, 11999ce4edccSmrg bo_gem->name, strerror(errno)); 120020131375Smrg if (--bo_gem->map_count == 0) 120120131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 120222944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 120322944501Smrg return ret; 120422944501Smrg } 120520131375Smrg VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 120622944501Smrg bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 120722944501Smrg } 120822944501Smrg DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 120922944501Smrg bo_gem->mem_virtual); 121022944501Smrg bo->virtual = bo_gem->mem_virtual; 121122944501Smrg 121220131375Smrg VG_CLEAR(set_domain); 121322944501Smrg set_domain.handle = bo_gem->gem_handle; 121422944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_CPU; 121522944501Smrg if (write_enable) 121622944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_CPU; 121722944501Smrg else 121822944501Smrg set_domain.write_domain = 0; 12196d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 12206d98c517Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 12216d98c517Smrg &set_domain); 122222944501Smrg if (ret != 0) { 12239ce4edccSmrg DBG("%s:%d: Error setting to CPU domain %d: %s\n", 12249ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, 12259ce4edccSmrg strerror(errno)); 122622944501Smrg } 122722944501Smrg 122820131375Smrg if (write_enable) 122920131375Smrg bo_gem->mapped_cpu_write = true; 123020131375Smrg 123120131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 123220131375Smrg VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 123322944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 123422944501Smrg 123522944501Smrg return 0; 123622944501Smrg} 123722944501Smrg 123820131375Smrgstatic int 123920131375Smrgmap_gtt(drm_intel_bo *bo) 124022944501Smrg{ 124122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 124222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 124322944501Smrg int ret; 124422944501Smrg 124520131375Smrg if (bo_gem->map_count++ == 0) 124620131375Smrg drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 124722944501Smrg 124822944501Smrg /* Get a mapping of the buffer if we haven't before. */ 124922944501Smrg if (bo_gem->gtt_virtual == NULL) { 125022944501Smrg struct drm_i915_gem_mmap_gtt mmap_arg; 125122944501Smrg 125220131375Smrg DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 125320131375Smrg bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 125422944501Smrg 125520131375Smrg VG_CLEAR(mmap_arg); 125622944501Smrg mmap_arg.handle = bo_gem->gem_handle; 125722944501Smrg 125822944501Smrg /* Get the fake offset back... */ 12596d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 12606d98c517Smrg DRM_IOCTL_I915_GEM_MMAP_GTT, 12616d98c517Smrg &mmap_arg); 126222944501Smrg if (ret != 0) { 126322944501Smrg ret = -errno; 12649ce4edccSmrg DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 12659ce4edccSmrg __FILE__, __LINE__, 12669ce4edccSmrg bo_gem->gem_handle, bo_gem->name, 12679ce4edccSmrg strerror(errno)); 126820131375Smrg if (--bo_gem->map_count == 0) 126920131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 127022944501Smrg return ret; 127122944501Smrg } 127222944501Smrg 127322944501Smrg /* and mmap it */ 1274aec75c42Sriastradh ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size, 1275aec75c42Sriastradh &bo_gem->gtt_virtual); 1276aec75c42Sriastradh if (ret) { 127722944501Smrg bo_gem->gtt_virtual = NULL; 12789ce4edccSmrg DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 12799ce4edccSmrg __FILE__, __LINE__, 12809ce4edccSmrg bo_gem->gem_handle, bo_gem->name, 12819ce4edccSmrg strerror(errno)); 128220131375Smrg if (--bo_gem->map_count == 0) 128320131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 128422944501Smrg return ret; 128522944501Smrg } 128622944501Smrg } 128722944501Smrg 128822944501Smrg bo->virtual = bo_gem->gtt_virtual; 128922944501Smrg 129022944501Smrg DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 129122944501Smrg bo_gem->gtt_virtual); 129222944501Smrg 129320131375Smrg return 0; 129420131375Smrg} 129520131375Smrg 129620131375Smrgint drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 129720131375Smrg{ 129820131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 129920131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 130020131375Smrg struct drm_i915_gem_set_domain set_domain; 130120131375Smrg int ret; 130220131375Smrg 130320131375Smrg pthread_mutex_lock(&bufmgr_gem->lock); 130420131375Smrg 130520131375Smrg ret = map_gtt(bo); 130620131375Smrg if (ret) { 130720131375Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 130820131375Smrg return ret; 130920131375Smrg } 131020131375Smrg 131120131375Smrg /* Now move it to the GTT domain so that the GPU and CPU 131220131375Smrg * caches are flushed and the GPU isn't actively using the 131320131375Smrg * buffer. 131420131375Smrg * 131520131375Smrg * The pagefault handler does this domain change for us when 131620131375Smrg * it has unbound the BO from the GTT, but it's up to us to 131720131375Smrg * tell it when we're about to use things if we had done 131820131375Smrg * rendering and it still happens to be bound to the GTT. 131920131375Smrg */ 132020131375Smrg VG_CLEAR(set_domain); 132122944501Smrg set_domain.handle = bo_gem->gem_handle; 132222944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 132322944501Smrg set_domain.write_domain = I915_GEM_DOMAIN_GTT; 13246d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 13256d98c517Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 13266d98c517Smrg &set_domain); 132722944501Smrg if (ret != 0) { 13289ce4edccSmrg DBG("%s:%d: Error setting domain %d: %s\n", 13299ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, 13309ce4edccSmrg strerror(errno)); 133122944501Smrg } 133222944501Smrg 133320131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 133420131375Smrg VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 133522944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 133622944501Smrg 13376d98c517Smrg return 0; 133822944501Smrg} 133922944501Smrg 134020131375Smrg/** 134120131375Smrg * Performs a mapping of the buffer object like the normal GTT 134220131375Smrg * mapping, but avoids waiting for the GPU to be done reading from or 134320131375Smrg * rendering to the buffer. 134420131375Smrg * 134520131375Smrg * This is used in the implementation of GL_ARB_map_buffer_range: The 134620131375Smrg * user asks to create a buffer, then does a mapping, fills some 134720131375Smrg * space, runs a drawing command, then asks to map it again without 134820131375Smrg * synchronizing because it guarantees that it won't write over the 134920131375Smrg * data that the GPU is busy using (or, more specifically, that if it 135020131375Smrg * does write over the data, it acknowledges that rendering is 135120131375Smrg * undefined). 135220131375Smrg */ 135320131375Smrg 135420131375Smrgint drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 135522944501Smrg{ 135622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 135720131375Smrg#ifdef HAVE_VALGRIND 135820131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 135920131375Smrg#endif 136020131375Smrg int ret; 136122944501Smrg 136220131375Smrg /* If the CPU cache isn't coherent with the GTT, then use a 136320131375Smrg * regular synchronized mapping. The problem is that we don't 136420131375Smrg * track where the buffer was last used on the CPU side in 136520131375Smrg * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 136620131375Smrg * we would potentially corrupt the buffer even when the user 136720131375Smrg * does reasonable things. 136820131375Smrg */ 136920131375Smrg if (!bufmgr_gem->has_llc) 137020131375Smrg return drm_intel_gem_bo_map_gtt(bo); 137122944501Smrg 137222944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 137320131375Smrg 137420131375Smrg ret = map_gtt(bo); 137520131375Smrg if (ret == 0) { 137620131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 137720131375Smrg VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 137820131375Smrg } 137920131375Smrg 138022944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 138122944501Smrg 138222944501Smrg return ret; 138322944501Smrg} 138422944501Smrg 138522944501Smrgstatic int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 138622944501Smrg{ 138722944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 138822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 138920131375Smrg int ret = 0; 139022944501Smrg 139122944501Smrg if (bo == NULL) 139222944501Smrg return 0; 139322944501Smrg 139422944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 139522944501Smrg 139620131375Smrg if (bo_gem->map_count <= 0) { 139720131375Smrg DBG("attempted to unmap an unmapped bo\n"); 139820131375Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 139920131375Smrg /* Preserve the old behaviour of just treating this as a 140020131375Smrg * no-op rather than reporting the error. 140120131375Smrg */ 140220131375Smrg return 0; 140320131375Smrg } 140420131375Smrg 140520131375Smrg if (bo_gem->mapped_cpu_write) { 140620131375Smrg struct drm_i915_gem_sw_finish sw_finish; 140720131375Smrg 140820131375Smrg /* Cause a flush to happen if the buffer's pinned for 140920131375Smrg * scanout, so the results show up in a timely manner. 141020131375Smrg * Unlike GTT set domains, this only does work if the 141120131375Smrg * buffer should be scanout-related. 141220131375Smrg */ 141320131375Smrg VG_CLEAR(sw_finish); 141420131375Smrg sw_finish.handle = bo_gem->gem_handle; 141520131375Smrg ret = drmIoctl(bufmgr_gem->fd, 141620131375Smrg DRM_IOCTL_I915_GEM_SW_FINISH, 141720131375Smrg &sw_finish); 141820131375Smrg ret = ret == -1 ? -errno : 0; 141920131375Smrg 142020131375Smrg bo_gem->mapped_cpu_write = false; 142120131375Smrg } 142222944501Smrg 142320131375Smrg /* We need to unmap after every innovation as we cannot track 142420131375Smrg * an open vma for every bo as that will exhaasut the system 142520131375Smrg * limits and cause later failures. 142620131375Smrg */ 142720131375Smrg if (--bo_gem->map_count == 0) { 142820131375Smrg drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 142920131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 143020131375Smrg bo->virtual = NULL; 143120131375Smrg } 143222944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 143322944501Smrg 143422944501Smrg return ret; 143522944501Smrg} 143622944501Smrg 143720131375Smrgint drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 143820131375Smrg{ 143920131375Smrg return drm_intel_gem_bo_unmap(bo); 144020131375Smrg} 144120131375Smrg 144222944501Smrgstatic int 144322944501Smrgdrm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 144422944501Smrg unsigned long size, const void *data) 144522944501Smrg{ 144622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 144722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 144822944501Smrg struct drm_i915_gem_pwrite pwrite; 144922944501Smrg int ret; 145022944501Smrg 145120131375Smrg VG_CLEAR(pwrite); 145222944501Smrg pwrite.handle = bo_gem->gem_handle; 145322944501Smrg pwrite.offset = offset; 145422944501Smrg pwrite.size = size; 145522944501Smrg pwrite.data_ptr = (uint64_t) (uintptr_t) data; 14566d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 14576d98c517Smrg DRM_IOCTL_I915_GEM_PWRITE, 14586d98c517Smrg &pwrite); 145922944501Smrg if (ret != 0) { 146022944501Smrg ret = -errno; 14619ce4edccSmrg DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 14629ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 14639ce4edccSmrg (int)size, strerror(errno)); 146422944501Smrg } 146522944501Smrg 146622944501Smrg return ret; 146722944501Smrg} 146822944501Smrg 146922944501Smrgstatic int 147022944501Smrgdrm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 147122944501Smrg{ 147222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 147322944501Smrg struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 147422944501Smrg int ret; 147522944501Smrg 147620131375Smrg VG_CLEAR(get_pipe_from_crtc_id); 147722944501Smrg get_pipe_from_crtc_id.crtc_id = crtc_id; 14786d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 14796d98c517Smrg DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 14806d98c517Smrg &get_pipe_from_crtc_id); 148122944501Smrg if (ret != 0) { 148222944501Smrg /* We return -1 here to signal that we don't 148322944501Smrg * know which pipe is associated with this crtc. 148422944501Smrg * This lets the caller know that this information 148522944501Smrg * isn't available; using the wrong pipe for 148622944501Smrg * vblank waiting can cause the chipset to lock up 148722944501Smrg */ 148822944501Smrg return -1; 148922944501Smrg } 149022944501Smrg 149122944501Smrg return get_pipe_from_crtc_id.pipe; 149222944501Smrg} 149322944501Smrg 149422944501Smrgstatic int 149522944501Smrgdrm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 149622944501Smrg unsigned long size, void *data) 149722944501Smrg{ 149822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 149922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 150022944501Smrg struct drm_i915_gem_pread pread; 150122944501Smrg int ret; 150222944501Smrg 150320131375Smrg VG_CLEAR(pread); 150422944501Smrg pread.handle = bo_gem->gem_handle; 150522944501Smrg pread.offset = offset; 150622944501Smrg pread.size = size; 150722944501Smrg pread.data_ptr = (uint64_t) (uintptr_t) data; 15086d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 15096d98c517Smrg DRM_IOCTL_I915_GEM_PREAD, 15106d98c517Smrg &pread); 151122944501Smrg if (ret != 0) { 151222944501Smrg ret = -errno; 15139ce4edccSmrg DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 15149ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 15159ce4edccSmrg (int)size, strerror(errno)); 151622944501Smrg } 151722944501Smrg 151822944501Smrg return ret; 151922944501Smrg} 152022944501Smrg 15219ce4edccSmrg/** Waits for all GPU rendering with the object to have completed. */ 152222944501Smrgstatic void 152322944501Smrgdrm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 152422944501Smrg{ 15259ce4edccSmrg drm_intel_gem_bo_start_gtt_access(bo, 1); 152622944501Smrg} 152722944501Smrg 152820131375Smrg/** 152920131375Smrg * Waits on a BO for the given amount of time. 153020131375Smrg * 153120131375Smrg * @bo: buffer object to wait for 153220131375Smrg * @timeout_ns: amount of time to wait in nanoseconds. 153320131375Smrg * If value is less than 0, an infinite wait will occur. 153420131375Smrg * 153520131375Smrg * Returns 0 if the wait was successful ie. the last batch referencing the 153620131375Smrg * object has completed within the allotted time. Otherwise some negative return 153720131375Smrg * value describes the error. Of particular interest is -ETIME when the wait has 153820131375Smrg * failed to yield the desired result. 153920131375Smrg * 154020131375Smrg * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 154120131375Smrg * the operation to give up after a certain amount of time. Another subtle 154220131375Smrg * difference is the internal locking semantics are different (this variant does 154320131375Smrg * not hold the lock for the duration of the wait). This makes the wait subject 154420131375Smrg * to a larger userspace race window. 154520131375Smrg * 154620131375Smrg * The implementation shall wait until the object is no longer actively 154720131375Smrg * referenced within a batch buffer at the time of the call. The wait will 154820131375Smrg * not guarantee that the buffer is re-issued via another thread, or an flinked 154920131375Smrg * handle. Userspace must make sure this race does not occur if such precision 155020131375Smrg * is important. 155120131375Smrg */ 155220131375Smrgint drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 155320131375Smrg{ 155420131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 155520131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 155620131375Smrg struct drm_i915_gem_wait wait; 155720131375Smrg int ret; 155820131375Smrg 155920131375Smrg if (!bufmgr_gem->has_wait_timeout) { 156020131375Smrg DBG("%s:%d: Timed wait is not supported. Falling back to " 156120131375Smrg "infinite wait\n", __FILE__, __LINE__); 156220131375Smrg if (timeout_ns) { 156320131375Smrg drm_intel_gem_bo_wait_rendering(bo); 156420131375Smrg return 0; 156520131375Smrg } else { 156620131375Smrg return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 156720131375Smrg } 156820131375Smrg } 156920131375Smrg 157020131375Smrg wait.bo_handle = bo_gem->gem_handle; 157120131375Smrg wait.timeout_ns = timeout_ns; 157220131375Smrg wait.flags = 0; 157320131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 157420131375Smrg if (ret == -1) 157520131375Smrg return -errno; 157620131375Smrg 157720131375Smrg return ret; 157820131375Smrg} 157920131375Smrg 158022944501Smrg/** 158122944501Smrg * Sets the object to the GTT read and possibly write domain, used by the X 158222944501Smrg * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 158322944501Smrg * 158422944501Smrg * In combination with drm_intel_gem_bo_pin() and manual fence management, we 158522944501Smrg * can do tiled pixmaps this way. 158622944501Smrg */ 158722944501Smrgvoid 158822944501Smrgdrm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 158922944501Smrg{ 159022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 159122944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 159222944501Smrg struct drm_i915_gem_set_domain set_domain; 159322944501Smrg int ret; 159422944501Smrg 159520131375Smrg VG_CLEAR(set_domain); 159622944501Smrg set_domain.handle = bo_gem->gem_handle; 159722944501Smrg set_domain.read_domains = I915_GEM_DOMAIN_GTT; 159822944501Smrg set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 15996d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 16006d98c517Smrg DRM_IOCTL_I915_GEM_SET_DOMAIN, 16016d98c517Smrg &set_domain); 160222944501Smrg if (ret != 0) { 16039ce4edccSmrg DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 16049ce4edccSmrg __FILE__, __LINE__, bo_gem->gem_handle, 16059ce4edccSmrg set_domain.read_domains, set_domain.write_domain, 16069ce4edccSmrg strerror(errno)); 160722944501Smrg } 160822944501Smrg} 160922944501Smrg 161022944501Smrgstatic void 161122944501Smrgdrm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 161222944501Smrg{ 161322944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 161422944501Smrg int i; 161522944501Smrg 161622944501Smrg free(bufmgr_gem->exec2_objects); 161722944501Smrg free(bufmgr_gem->exec_objects); 161822944501Smrg free(bufmgr_gem->exec_bos); 161920131375Smrg free(bufmgr_gem->aub_filename); 162022944501Smrg 162122944501Smrg pthread_mutex_destroy(&bufmgr_gem->lock); 162222944501Smrg 162322944501Smrg /* Free any cached buffer objects we were going to reuse */ 1624aaba2545Smrg for (i = 0; i < bufmgr_gem->num_buckets; i++) { 162522944501Smrg struct drm_intel_gem_bo_bucket *bucket = 162622944501Smrg &bufmgr_gem->cache_bucket[i]; 162722944501Smrg drm_intel_bo_gem *bo_gem; 162822944501Smrg 162922944501Smrg while (!DRMLISTEMPTY(&bucket->head)) { 163022944501Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 163122944501Smrg bucket->head.next, head); 163222944501Smrg DRMLISTDEL(&bo_gem->head); 163322944501Smrg 163422944501Smrg drm_intel_gem_bo_free(&bo_gem->bo); 163522944501Smrg } 163622944501Smrg } 163722944501Smrg 163822944501Smrg free(bufmgr); 163922944501Smrg} 164022944501Smrg 164122944501Smrg/** 164222944501Smrg * Adds the target buffer to the validation list and adds the relocation 164322944501Smrg * to the reloc_buffer's relocation list. 164422944501Smrg * 164522944501Smrg * The relocation entry at the given offset must already contain the 164622944501Smrg * precomputed relocation value, because the kernel will optimize out 164722944501Smrg * the relocation entry write when the buffer hasn't moved from the 164822944501Smrg * last known offset in target_bo. 164922944501Smrg */ 165022944501Smrgstatic int 165122944501Smrgdo_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 165222944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 165322944501Smrg uint32_t read_domains, uint32_t write_domain, 165420131375Smrg bool need_fence) 165522944501Smrg{ 165622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 165722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 165822944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 165920131375Smrg bool fenced_command; 166022944501Smrg 166122944501Smrg if (bo_gem->has_error) 166222944501Smrg return -ENOMEM; 166322944501Smrg 166422944501Smrg if (target_bo_gem->has_error) { 166520131375Smrg bo_gem->has_error = true; 166622944501Smrg return -ENOMEM; 166722944501Smrg } 166822944501Smrg 166922944501Smrg /* We never use HW fences for rendering on 965+ */ 167022944501Smrg if (bufmgr_gem->gen >= 4) 167120131375Smrg need_fence = false; 167222944501Smrg 16739ce4edccSmrg fenced_command = need_fence; 16749ce4edccSmrg if (target_bo_gem->tiling_mode == I915_TILING_NONE) 167520131375Smrg need_fence = false; 16769ce4edccSmrg 167722944501Smrg /* Create a new relocation list if needed */ 167822944501Smrg if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 167922944501Smrg return -ENOMEM; 168022944501Smrg 168122944501Smrg /* Check overflow */ 168222944501Smrg assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 168322944501Smrg 168422944501Smrg /* Check args */ 168522944501Smrg assert(offset <= bo->size - 4); 168622944501Smrg assert((write_domain & (write_domain - 1)) == 0); 168722944501Smrg 168822944501Smrg /* Make sure that we're not adding a reloc to something whose size has 168922944501Smrg * already been accounted for. 169022944501Smrg */ 169122944501Smrg assert(!bo_gem->used_as_reloc_target); 1692aaba2545Smrg if (target_bo_gem != bo_gem) { 169320131375Smrg target_bo_gem->used_as_reloc_target = true; 1694aaba2545Smrg bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1695aaba2545Smrg } 169622944501Smrg /* An object needing a fence is a tiled buffer, so it won't have 169722944501Smrg * relocs to other buffers. 169822944501Smrg */ 169922944501Smrg if (need_fence) 170022944501Smrg target_bo_gem->reloc_tree_fences = 1; 170122944501Smrg bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 170222944501Smrg 170322944501Smrg bo_gem->relocs[bo_gem->reloc_count].offset = offset; 170422944501Smrg bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 170522944501Smrg bo_gem->relocs[bo_gem->reloc_count].target_handle = 170622944501Smrg target_bo_gem->gem_handle; 170722944501Smrg bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 170822944501Smrg bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 170920131375Smrg bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; 171022944501Smrg 171122944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1712aaba2545Smrg if (target_bo != bo) 1713aaba2545Smrg drm_intel_gem_bo_reference(target_bo); 17149ce4edccSmrg if (fenced_command) 171522944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 171622944501Smrg DRM_INTEL_RELOC_FENCE; 171722944501Smrg else 171822944501Smrg bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 171922944501Smrg 172022944501Smrg bo_gem->reloc_count++; 172122944501Smrg 172222944501Smrg return 0; 172322944501Smrg} 172422944501Smrg 172522944501Smrgstatic int 172622944501Smrgdrm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 172722944501Smrg drm_intel_bo *target_bo, uint32_t target_offset, 172822944501Smrg uint32_t read_domains, uint32_t write_domain) 172922944501Smrg{ 173022944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 173122944501Smrg 173222944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 173322944501Smrg read_domains, write_domain, 173422944501Smrg !bufmgr_gem->fenced_relocs); 173522944501Smrg} 173622944501Smrg 173722944501Smrgstatic int 173822944501Smrgdrm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 173922944501Smrg drm_intel_bo *target_bo, 174022944501Smrg uint32_t target_offset, 174122944501Smrg uint32_t read_domains, uint32_t write_domain) 174222944501Smrg{ 174322944501Smrg return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 174420131375Smrg read_domains, write_domain, true); 174520131375Smrg} 174620131375Smrg 174720131375Smrgint 174820131375Smrgdrm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 174920131375Smrg{ 175020131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 175120131375Smrg 175220131375Smrg return bo_gem->reloc_count; 175320131375Smrg} 175420131375Smrg 175520131375Smrg/** 175620131375Smrg * Removes existing relocation entries in the BO after "start". 175720131375Smrg * 175820131375Smrg * This allows a user to avoid a two-step process for state setup with 175920131375Smrg * counting up all the buffer objects and doing a 176020131375Smrg * drm_intel_bufmgr_check_aperture_space() before emitting any of the 176120131375Smrg * relocations for the state setup. Instead, save the state of the 176220131375Smrg * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 176320131375Smrg * state, and then check if it still fits in the aperture. 176420131375Smrg * 176520131375Smrg * Any further drm_intel_bufmgr_check_aperture_space() queries 176620131375Smrg * involving this buffer in the tree are undefined after this call. 176720131375Smrg */ 176820131375Smrgvoid 176920131375Smrgdrm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 177020131375Smrg{ 177120131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 177220131375Smrg int i; 177320131375Smrg struct timespec time; 177420131375Smrg 177520131375Smrg clock_gettime(CLOCK_MONOTONIC, &time); 177620131375Smrg 177720131375Smrg assert(bo_gem->reloc_count >= start); 177820131375Smrg /* Unreference the cleared target buffers */ 177920131375Smrg for (i = start; i < bo_gem->reloc_count; i++) { 178020131375Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 178120131375Smrg if (&target_bo_gem->bo != bo) { 178220131375Smrg bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 178320131375Smrg drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 178420131375Smrg time.tv_sec); 178520131375Smrg } 178620131375Smrg } 178720131375Smrg bo_gem->reloc_count = start; 178822944501Smrg} 178922944501Smrg 179022944501Smrg/** 179122944501Smrg * Walk the tree of relocations rooted at BO and accumulate the list of 179222944501Smrg * validations to be performed and update the relocation buffers with 179322944501Smrg * index values into the validation list. 179422944501Smrg */ 179522944501Smrgstatic void 179622944501Smrgdrm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 179722944501Smrg{ 179822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 179922944501Smrg int i; 180022944501Smrg 180122944501Smrg if (bo_gem->relocs == NULL) 180222944501Smrg return; 180322944501Smrg 180422944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 180522944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 180622944501Smrg 1807aaba2545Smrg if (target_bo == bo) 1808aaba2545Smrg continue; 1809aaba2545Smrg 181020131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 181120131375Smrg 181222944501Smrg /* Continue walking the tree depth-first. */ 181322944501Smrg drm_intel_gem_bo_process_reloc(target_bo); 181422944501Smrg 181522944501Smrg /* Add the target to the validate list */ 181622944501Smrg drm_intel_add_validate_buffer(target_bo); 181722944501Smrg } 181822944501Smrg} 181922944501Smrg 182022944501Smrgstatic void 182122944501Smrgdrm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 182222944501Smrg{ 182322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 182422944501Smrg int i; 182522944501Smrg 182622944501Smrg if (bo_gem->relocs == NULL) 182722944501Smrg return; 182822944501Smrg 182922944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 183022944501Smrg drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 183122944501Smrg int need_fence; 183222944501Smrg 1833aaba2545Smrg if (target_bo == bo) 1834aaba2545Smrg continue; 1835aaba2545Smrg 183620131375Smrg drm_intel_gem_bo_mark_mmaps_incoherent(bo); 183720131375Smrg 183822944501Smrg /* Continue walking the tree depth-first. */ 183922944501Smrg drm_intel_gem_bo_process_reloc2(target_bo); 184022944501Smrg 184122944501Smrg need_fence = (bo_gem->reloc_target_info[i].flags & 184222944501Smrg DRM_INTEL_RELOC_FENCE); 184322944501Smrg 184422944501Smrg /* Add the target to the validate list */ 184522944501Smrg drm_intel_add_validate_buffer2(target_bo, need_fence); 184622944501Smrg } 184722944501Smrg} 184822944501Smrg 184922944501Smrg 185022944501Smrgstatic void 185122944501Smrgdrm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 185222944501Smrg{ 185322944501Smrg int i; 185422944501Smrg 185522944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 185622944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 185722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 185822944501Smrg 185922944501Smrg /* Update the buffer offset */ 186020131375Smrg if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { 186122944501Smrg DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 186220131375Smrg bo_gem->gem_handle, bo_gem->name, bo->offset64, 186322944501Smrg (unsigned long long)bufmgr_gem->exec_objects[i]. 186422944501Smrg offset); 186520131375Smrg bo->offset64 = bufmgr_gem->exec_objects[i].offset; 186622944501Smrg bo->offset = bufmgr_gem->exec_objects[i].offset; 186722944501Smrg } 186822944501Smrg } 186922944501Smrg} 187022944501Smrg 187122944501Smrgstatic void 187222944501Smrgdrm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 187322944501Smrg{ 187422944501Smrg int i; 187522944501Smrg 187622944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 187722944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 187822944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 187922944501Smrg 188022944501Smrg /* Update the buffer offset */ 188120131375Smrg if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { 188222944501Smrg DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 188320131375Smrg bo_gem->gem_handle, bo_gem->name, bo->offset64, 188422944501Smrg (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 188520131375Smrg bo->offset64 = bufmgr_gem->exec2_objects[i].offset; 188622944501Smrg bo->offset = bufmgr_gem->exec2_objects[i].offset; 188722944501Smrg } 188822944501Smrg } 188922944501Smrg} 189022944501Smrg 189120131375Smrgstatic void 189220131375Smrgaub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) 189320131375Smrg{ 189420131375Smrg fwrite(&data, 1, 4, bufmgr_gem->aub_file); 189520131375Smrg} 189620131375Smrg 189720131375Smrgstatic void 189820131375Smrgaub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) 189920131375Smrg{ 190020131375Smrg fwrite(data, 1, size, bufmgr_gem->aub_file); 190120131375Smrg} 190220131375Smrg 190320131375Smrgstatic void 190420131375Smrgaub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) 190522944501Smrg{ 190622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 190722944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 190820131375Smrg uint32_t *data; 190920131375Smrg unsigned int i; 191022944501Smrg 191120131375Smrg data = malloc(bo->size); 191220131375Smrg drm_intel_bo_get_subdata(bo, offset, size, data); 191322944501Smrg 191420131375Smrg /* Easy mode: write out bo with no relocations */ 191520131375Smrg if (!bo_gem->reloc_count) { 191620131375Smrg aub_out_data(bufmgr_gem, data, size); 191720131375Smrg free(data); 191820131375Smrg return; 191920131375Smrg } 192022944501Smrg 192120131375Smrg /* Otherwise, handle the relocations while writing. */ 192220131375Smrg for (i = 0; i < size / 4; i++) { 192320131375Smrg int r; 192420131375Smrg for (r = 0; r < bo_gem->reloc_count; r++) { 192520131375Smrg struct drm_i915_gem_relocation_entry *reloc; 192620131375Smrg drm_intel_reloc_target *info; 192722944501Smrg 192820131375Smrg reloc = &bo_gem->relocs[r]; 192920131375Smrg info = &bo_gem->reloc_target_info[r]; 193022944501Smrg 193120131375Smrg if (reloc->offset == offset + i * 4) { 193220131375Smrg drm_intel_bo_gem *target_gem; 193320131375Smrg uint32_t val; 193422944501Smrg 193520131375Smrg target_gem = (drm_intel_bo_gem *)info->bo; 193622944501Smrg 193720131375Smrg val = reloc->delta; 193820131375Smrg val += target_gem->aub_offset; 193922944501Smrg 194020131375Smrg aub_out(bufmgr_gem, val); 194120131375Smrg data[i] = val; 194220131375Smrg break; 194320131375Smrg } 194420131375Smrg } 194520131375Smrg if (r == bo_gem->reloc_count) { 194620131375Smrg /* no relocation, just the data */ 194720131375Smrg aub_out(bufmgr_gem, data[i]); 194820131375Smrg } 194922944501Smrg } 195022944501Smrg 195120131375Smrg free(data); 195222944501Smrg} 195322944501Smrg 195420131375Smrgstatic void 195520131375Smrgaub_bo_get_address(drm_intel_bo *bo) 195622944501Smrg{ 195720131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 195820131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 195922944501Smrg 196020131375Smrg /* Give the object a graphics address in the AUB file. We 196120131375Smrg * don't just use the GEM object address because we do AUB 196220131375Smrg * dumping before execution -- we want to successfully log 196320131375Smrg * when the hardware might hang, and we might even want to aub 196420131375Smrg * capture for a driver trying to execute on a different 196520131375Smrg * generation of hardware by disabling the actual kernel exec 196620131375Smrg * call. 196720131375Smrg */ 196820131375Smrg bo_gem->aub_offset = bufmgr_gem->aub_offset; 196920131375Smrg bufmgr_gem->aub_offset += bo->size; 197020131375Smrg /* XXX: Handle aperture overflow. */ 197120131375Smrg assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); 197220131375Smrg} 197320131375Smrg 197420131375Smrgstatic void 197520131375Smrgaub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 197620131375Smrg uint32_t offset, uint32_t size) 197720131375Smrg{ 197820131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 197920131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 198020131375Smrg 198120131375Smrg aub_out(bufmgr_gem, 198220131375Smrg CMD_AUB_TRACE_HEADER_BLOCK | 198320131375Smrg ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 198420131375Smrg aub_out(bufmgr_gem, 198520131375Smrg AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); 198620131375Smrg aub_out(bufmgr_gem, subtype); 198720131375Smrg aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 198820131375Smrg aub_out(bufmgr_gem, size); 198920131375Smrg if (bufmgr_gem->gen >= 8) 199020131375Smrg aub_out(bufmgr_gem, 0); 199120131375Smrg aub_write_bo_data(bo, offset, size); 199220131375Smrg} 199320131375Smrg 199420131375Smrg/** 199520131375Smrg * Break up large objects into multiple writes. Otherwise a 128kb VBO 199620131375Smrg * would overflow the 16 bits of size field in the packet header and 199720131375Smrg * everything goes badly after that. 199820131375Smrg */ 199920131375Smrgstatic void 200020131375Smrgaub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 200120131375Smrg uint32_t offset, uint32_t size) 200220131375Smrg{ 200320131375Smrg uint32_t block_size; 200420131375Smrg uint32_t sub_offset; 200520131375Smrg 200620131375Smrg for (sub_offset = 0; sub_offset < size; sub_offset += block_size) { 200720131375Smrg block_size = size - sub_offset; 200820131375Smrg 200920131375Smrg if (block_size > 8 * 4096) 201020131375Smrg block_size = 8 * 4096; 201120131375Smrg 201220131375Smrg aub_write_trace_block(bo, type, subtype, offset + sub_offset, 201320131375Smrg block_size); 201420131375Smrg } 201520131375Smrg} 201620131375Smrg 201720131375Smrgstatic void 201820131375Smrgaub_write_bo(drm_intel_bo *bo) 201920131375Smrg{ 202020131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 202120131375Smrg uint32_t offset = 0; 202220131375Smrg unsigned i; 202320131375Smrg 202420131375Smrg aub_bo_get_address(bo); 202520131375Smrg 202620131375Smrg /* Write out each annotated section separately. */ 202720131375Smrg for (i = 0; i < bo_gem->aub_annotation_count; ++i) { 202820131375Smrg drm_intel_aub_annotation *annotation = 202920131375Smrg &bo_gem->aub_annotations[i]; 203020131375Smrg uint32_t ending_offset = annotation->ending_offset; 203120131375Smrg if (ending_offset > bo->size) 203220131375Smrg ending_offset = bo->size; 203320131375Smrg if (ending_offset > offset) { 203420131375Smrg aub_write_large_trace_block(bo, annotation->type, 203520131375Smrg annotation->subtype, 203620131375Smrg offset, 203720131375Smrg ending_offset - offset); 203820131375Smrg offset = ending_offset; 203920131375Smrg } 204020131375Smrg } 204120131375Smrg 204220131375Smrg /* Write out any remaining unannotated data */ 204320131375Smrg if (offset < bo->size) { 204420131375Smrg aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, 204520131375Smrg offset, bo->size - offset); 204620131375Smrg } 204720131375Smrg} 204820131375Smrg 204920131375Smrg/* 205020131375Smrg * Make a ringbuffer on fly and dump it 205120131375Smrg */ 205220131375Smrgstatic void 205320131375Smrgaub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, 205420131375Smrg uint32_t batch_buffer, int ring_flag) 205520131375Smrg{ 205620131375Smrg uint32_t ringbuffer[4096]; 205720131375Smrg int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ 205820131375Smrg int ring_count = 0; 205920131375Smrg 206020131375Smrg if (ring_flag == I915_EXEC_BSD) 206120131375Smrg ring = AUB_TRACE_TYPE_RING_PRB1; 206220131375Smrg else if (ring_flag == I915_EXEC_BLT) 206320131375Smrg ring = AUB_TRACE_TYPE_RING_PRB2; 206420131375Smrg 206520131375Smrg /* Make a ring buffer to execute our batchbuffer. */ 206620131375Smrg memset(ringbuffer, 0, sizeof(ringbuffer)); 206720131375Smrg if (bufmgr_gem->gen >= 8) { 206820131375Smrg ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); 206920131375Smrg ringbuffer[ring_count++] = batch_buffer; 207020131375Smrg ringbuffer[ring_count++] = 0; 207120131375Smrg } else { 207220131375Smrg ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; 207320131375Smrg ringbuffer[ring_count++] = batch_buffer; 207420131375Smrg } 207520131375Smrg 207620131375Smrg /* Write out the ring. This appears to trigger execution of 207720131375Smrg * the ring in the simulator. 207820131375Smrg */ 207920131375Smrg aub_out(bufmgr_gem, 208020131375Smrg CMD_AUB_TRACE_HEADER_BLOCK | 208120131375Smrg ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 208220131375Smrg aub_out(bufmgr_gem, 208320131375Smrg AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); 208420131375Smrg aub_out(bufmgr_gem, 0); /* general/surface subtype */ 208520131375Smrg aub_out(bufmgr_gem, bufmgr_gem->aub_offset); 208620131375Smrg aub_out(bufmgr_gem, ring_count * 4); 208720131375Smrg if (bufmgr_gem->gen >= 8) 208820131375Smrg aub_out(bufmgr_gem, 0); 208920131375Smrg 209020131375Smrg /* FIXME: Need some flush operations here? */ 209120131375Smrg aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); 209220131375Smrg 209320131375Smrg /* Update offset pointer */ 209420131375Smrg bufmgr_gem->aub_offset += 4096; 209520131375Smrg} 209620131375Smrg 209720131375Smrgvoid 209820131375Smrgdrm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 209920131375Smrg int x1, int y1, int width, int height, 210020131375Smrg enum aub_dump_bmp_format format, 210120131375Smrg int pitch, int offset) 210220131375Smrg{ 210320131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 210420131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 210520131375Smrg uint32_t cpp; 210620131375Smrg 210720131375Smrg switch (format) { 210820131375Smrg case AUB_DUMP_BMP_FORMAT_8BIT: 210920131375Smrg cpp = 1; 211020131375Smrg break; 211120131375Smrg case AUB_DUMP_BMP_FORMAT_ARGB_4444: 211220131375Smrg cpp = 2; 211320131375Smrg break; 211420131375Smrg case AUB_DUMP_BMP_FORMAT_ARGB_0888: 211520131375Smrg case AUB_DUMP_BMP_FORMAT_ARGB_8888: 211620131375Smrg cpp = 4; 211720131375Smrg break; 211820131375Smrg default: 211920131375Smrg printf("Unknown AUB dump format %d\n", format); 212020131375Smrg return; 212120131375Smrg } 212220131375Smrg 212320131375Smrg if (!bufmgr_gem->aub_file) 212420131375Smrg return; 212520131375Smrg 212620131375Smrg aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); 212720131375Smrg aub_out(bufmgr_gem, (y1 << 16) | x1); 212820131375Smrg aub_out(bufmgr_gem, 212920131375Smrg (format << 24) | 213020131375Smrg (cpp << 19) | 213120131375Smrg pitch / 4); 213220131375Smrg aub_out(bufmgr_gem, (height << 16) | width); 213320131375Smrg aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 213420131375Smrg aub_out(bufmgr_gem, 213520131375Smrg ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | 213620131375Smrg ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); 213720131375Smrg} 213820131375Smrg 213920131375Smrgstatic void 214020131375Smrgaub_exec(drm_intel_bo *bo, int ring_flag, int used) 214120131375Smrg{ 214220131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 214320131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 214420131375Smrg int i; 214520131375Smrg bool batch_buffer_needs_annotations; 214620131375Smrg 214720131375Smrg if (!bufmgr_gem->aub_file) 214820131375Smrg return; 214920131375Smrg 215020131375Smrg /* If batch buffer is not annotated, annotate it the best we 215120131375Smrg * can. 215220131375Smrg */ 215320131375Smrg batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0; 215420131375Smrg if (batch_buffer_needs_annotations) { 215520131375Smrg drm_intel_aub_annotation annotations[2] = { 215620131375Smrg { AUB_TRACE_TYPE_BATCH, 0, used }, 215720131375Smrg { AUB_TRACE_TYPE_NOTYPE, 0, bo->size } 215820131375Smrg }; 215920131375Smrg drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2); 216020131375Smrg } 216120131375Smrg 216220131375Smrg /* Write out all buffers to AUB memory */ 216320131375Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 216420131375Smrg aub_write_bo(bufmgr_gem->exec_bos[i]); 216520131375Smrg } 216620131375Smrg 216720131375Smrg /* Remove any annotations we added */ 216820131375Smrg if (batch_buffer_needs_annotations) 216920131375Smrg drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0); 217020131375Smrg 217120131375Smrg /* Dump ring buffer */ 217220131375Smrg aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); 217320131375Smrg 217420131375Smrg fflush(bufmgr_gem->aub_file); 217520131375Smrg 217620131375Smrg /* 217720131375Smrg * One frame has been dumped. So reset the aub_offset for the next frame. 217820131375Smrg * 217920131375Smrg * FIXME: Can we do this? 218020131375Smrg */ 218120131375Smrg bufmgr_gem->aub_offset = 0x10000; 218220131375Smrg} 218320131375Smrg 218420131375Smrgstatic int 218520131375Smrgdrm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 218620131375Smrg drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 218720131375Smrg{ 218820131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 218920131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 219020131375Smrg struct drm_i915_gem_execbuffer execbuf; 219120131375Smrg int ret, i; 219220131375Smrg 219320131375Smrg if (bo_gem->has_error) 219420131375Smrg return -ENOMEM; 219520131375Smrg 219620131375Smrg pthread_mutex_lock(&bufmgr_gem->lock); 219720131375Smrg /* Update indices and set up the validate list. */ 219820131375Smrg drm_intel_gem_bo_process_reloc(bo); 219920131375Smrg 220020131375Smrg /* Add the batch buffer to the validation list. There are no 220120131375Smrg * relocations pointing to it. 220220131375Smrg */ 220320131375Smrg drm_intel_add_validate_buffer(bo); 220420131375Smrg 220520131375Smrg VG_CLEAR(execbuf); 220620131375Smrg execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 220720131375Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 220820131375Smrg execbuf.batch_start_offset = 0; 220920131375Smrg execbuf.batch_len = used; 221020131375Smrg execbuf.cliprects_ptr = (uintptr_t) cliprects; 221120131375Smrg execbuf.num_cliprects = num_cliprects; 221220131375Smrg execbuf.DR1 = 0; 221320131375Smrg execbuf.DR4 = DR4; 221420131375Smrg 221520131375Smrg ret = drmIoctl(bufmgr_gem->fd, 221620131375Smrg DRM_IOCTL_I915_GEM_EXECBUFFER, 221720131375Smrg &execbuf); 221820131375Smrg if (ret != 0) { 221920131375Smrg ret = -errno; 222020131375Smrg if (errno == ENOSPC) { 222120131375Smrg DBG("Execbuffer fails to pin. " 222220131375Smrg "Estimate: %u. Actual: %u. Available: %u\n", 222320131375Smrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 222420131375Smrg bufmgr_gem-> 222520131375Smrg exec_count), 222620131375Smrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 222720131375Smrg bufmgr_gem-> 222820131375Smrg exec_count), 222920131375Smrg (unsigned int)bufmgr_gem->gtt_size); 223020131375Smrg } 223120131375Smrg } 223220131375Smrg drm_intel_update_buffer_offsets(bufmgr_gem); 223320131375Smrg 223420131375Smrg if (bufmgr_gem->bufmgr.debug) 223520131375Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 223620131375Smrg 223720131375Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 223820131375Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 223920131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 224020131375Smrg 224120131375Smrg bo_gem->idle = false; 224220131375Smrg 224320131375Smrg /* Disconnect the buffer from the validate list */ 224420131375Smrg bo_gem->validate_index = -1; 224520131375Smrg bufmgr_gem->exec_bos[i] = NULL; 224620131375Smrg } 224720131375Smrg bufmgr_gem->exec_count = 0; 224820131375Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 224920131375Smrg 225020131375Smrg return ret; 225120131375Smrg} 225220131375Smrg 225320131375Smrgstatic int 225420131375Smrgdo_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 225520131375Smrg drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 225620131375Smrg unsigned int flags) 225720131375Smrg{ 225820131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 225920131375Smrg struct drm_i915_gem_execbuffer2 execbuf; 226020131375Smrg int ret = 0; 226120131375Smrg int i; 226220131375Smrg 226320131375Smrg switch (flags & 0x7) { 226420131375Smrg default: 226520131375Smrg return -EINVAL; 226620131375Smrg case I915_EXEC_BLT: 22679ce4edccSmrg if (!bufmgr_gem->has_blt) 22689ce4edccSmrg return -EINVAL; 22699ce4edccSmrg break; 22709ce4edccSmrg case I915_EXEC_BSD: 22719ce4edccSmrg if (!bufmgr_gem->has_bsd) 22729ce4edccSmrg return -EINVAL; 22739ce4edccSmrg break; 227420131375Smrg case I915_EXEC_VEBOX: 227520131375Smrg if (!bufmgr_gem->has_vebox) 227620131375Smrg return -EINVAL; 227720131375Smrg break; 22789ce4edccSmrg case I915_EXEC_RENDER: 22799ce4edccSmrg case I915_EXEC_DEFAULT: 22809ce4edccSmrg break; 22819ce4edccSmrg } 2282aaba2545Smrg 228322944501Smrg pthread_mutex_lock(&bufmgr_gem->lock); 228422944501Smrg /* Update indices and set up the validate list. */ 228522944501Smrg drm_intel_gem_bo_process_reloc2(bo); 228622944501Smrg 228722944501Smrg /* Add the batch buffer to the validation list. There are no relocations 228822944501Smrg * pointing to it. 228922944501Smrg */ 229022944501Smrg drm_intel_add_validate_buffer2(bo, 0); 229122944501Smrg 229220131375Smrg VG_CLEAR(execbuf); 229322944501Smrg execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 229422944501Smrg execbuf.buffer_count = bufmgr_gem->exec_count; 229522944501Smrg execbuf.batch_start_offset = 0; 229622944501Smrg execbuf.batch_len = used; 229722944501Smrg execbuf.cliprects_ptr = (uintptr_t)cliprects; 229822944501Smrg execbuf.num_cliprects = num_cliprects; 229922944501Smrg execbuf.DR1 = 0; 230022944501Smrg execbuf.DR4 = DR4; 230120131375Smrg execbuf.flags = flags; 230220131375Smrg if (ctx == NULL) 230320131375Smrg i915_execbuffer2_set_context_id(execbuf, 0); 230420131375Smrg else 230520131375Smrg i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 230622944501Smrg execbuf.rsvd2 = 0; 230722944501Smrg 230820131375Smrg aub_exec(bo, flags, used); 230920131375Smrg 231020131375Smrg if (bufmgr_gem->no_exec) 231120131375Smrg goto skip_execution; 231220131375Smrg 23136d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 23146d98c517Smrg DRM_IOCTL_I915_GEM_EXECBUFFER2, 23156d98c517Smrg &execbuf); 231622944501Smrg if (ret != 0) { 231722944501Smrg ret = -errno; 23186d98c517Smrg if (ret == -ENOSPC) { 23199ce4edccSmrg DBG("Execbuffer fails to pin. " 23209ce4edccSmrg "Estimate: %u. Actual: %u. Available: %u\n", 23219ce4edccSmrg drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 23229ce4edccSmrg bufmgr_gem->exec_count), 23239ce4edccSmrg drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 23249ce4edccSmrg bufmgr_gem->exec_count), 23259ce4edccSmrg (unsigned int) bufmgr_gem->gtt_size); 232622944501Smrg } 232722944501Smrg } 232822944501Smrg drm_intel_update_buffer_offsets2(bufmgr_gem); 232922944501Smrg 233020131375Smrgskip_execution: 233122944501Smrg if (bufmgr_gem->bufmgr.debug) 233222944501Smrg drm_intel_gem_dump_validation_list(bufmgr_gem); 233322944501Smrg 233422944501Smrg for (i = 0; i < bufmgr_gem->exec_count; i++) { 233522944501Smrg drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 233622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 233722944501Smrg 233820131375Smrg bo_gem->idle = false; 233920131375Smrg 234022944501Smrg /* Disconnect the buffer from the validate list */ 234122944501Smrg bo_gem->validate_index = -1; 234222944501Smrg bufmgr_gem->exec_bos[i] = NULL; 234322944501Smrg } 234422944501Smrg bufmgr_gem->exec_count = 0; 234522944501Smrg pthread_mutex_unlock(&bufmgr_gem->lock); 234622944501Smrg 234722944501Smrg return ret; 234822944501Smrg} 234922944501Smrg 2350aaba2545Smrgstatic int 2351aaba2545Smrgdrm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2352aaba2545Smrg drm_clip_rect_t *cliprects, int num_cliprects, 2353aaba2545Smrg int DR4) 2354aaba2545Smrg{ 235520131375Smrg return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 235620131375Smrg I915_EXEC_RENDER); 235720131375Smrg} 235820131375Smrg 235920131375Smrgstatic int 236020131375Smrgdrm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 236120131375Smrg drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 236220131375Smrg unsigned int flags) 236320131375Smrg{ 236420131375Smrg return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 236520131375Smrg flags); 236620131375Smrg} 236720131375Smrg 236820131375Smrgint 236920131375Smrgdrm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 237020131375Smrg int used, unsigned int flags) 237120131375Smrg{ 237220131375Smrg return do_exec2(bo, used, ctx, NULL, 0, 0, flags); 2373aaba2545Smrg} 2374aaba2545Smrg 237522944501Smrgstatic int 237622944501Smrgdrm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 237722944501Smrg{ 237822944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 237922944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 238022944501Smrg struct drm_i915_gem_pin pin; 238122944501Smrg int ret; 238222944501Smrg 238320131375Smrg VG_CLEAR(pin); 238422944501Smrg pin.handle = bo_gem->gem_handle; 238522944501Smrg pin.alignment = alignment; 238622944501Smrg 23876d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 23886d98c517Smrg DRM_IOCTL_I915_GEM_PIN, 23896d98c517Smrg &pin); 239022944501Smrg if (ret != 0) 239122944501Smrg return -errno; 239222944501Smrg 239320131375Smrg bo->offset64 = pin.offset; 239422944501Smrg bo->offset = pin.offset; 239522944501Smrg return 0; 239622944501Smrg} 239722944501Smrg 239822944501Smrgstatic int 239922944501Smrgdrm_intel_gem_bo_unpin(drm_intel_bo *bo) 240022944501Smrg{ 240122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 240222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 240322944501Smrg struct drm_i915_gem_unpin unpin; 240422944501Smrg int ret; 240522944501Smrg 240620131375Smrg VG_CLEAR(unpin); 240722944501Smrg unpin.handle = bo_gem->gem_handle; 240822944501Smrg 24096d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 241022944501Smrg if (ret != 0) 241122944501Smrg return -errno; 241222944501Smrg 241322944501Smrg return 0; 241422944501Smrg} 241522944501Smrg 241622944501Smrgstatic int 24176d98c517Smrgdrm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 24186d98c517Smrg uint32_t tiling_mode, 24196d98c517Smrg uint32_t stride) 242022944501Smrg{ 242122944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 242222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 242322944501Smrg struct drm_i915_gem_set_tiling set_tiling; 242422944501Smrg int ret; 242522944501Smrg 24266d98c517Smrg if (bo_gem->global_name == 0 && 24276d98c517Smrg tiling_mode == bo_gem->tiling_mode && 24286d98c517Smrg stride == bo_gem->stride) 242922944501Smrg return 0; 243022944501Smrg 243122944501Smrg memset(&set_tiling, 0, sizeof(set_tiling)); 243222944501Smrg do { 24336d98c517Smrg /* set_tiling is slightly broken and overwrites the 24346d98c517Smrg * input on the error path, so we have to open code 24356d98c517Smrg * rmIoctl. 24366d98c517Smrg */ 24376d98c517Smrg set_tiling.handle = bo_gem->gem_handle; 24386d98c517Smrg set_tiling.tiling_mode = tiling_mode; 243922944501Smrg set_tiling.stride = stride; 244022944501Smrg 244122944501Smrg ret = ioctl(bufmgr_gem->fd, 244222944501Smrg DRM_IOCTL_I915_GEM_SET_TILING, 244322944501Smrg &set_tiling); 24446d98c517Smrg } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 24456d98c517Smrg if (ret == -1) 24466d98c517Smrg return -errno; 24476d98c517Smrg 24486d98c517Smrg bo_gem->tiling_mode = set_tiling.tiling_mode; 24496d98c517Smrg bo_gem->swizzle_mode = set_tiling.swizzle_mode; 24506d98c517Smrg bo_gem->stride = set_tiling.stride; 24516d98c517Smrg return 0; 24526d98c517Smrg} 24536d98c517Smrg 24546d98c517Smrgstatic int 24556d98c517Smrgdrm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 24566d98c517Smrg uint32_t stride) 24576d98c517Smrg{ 24586d98c517Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 24596d98c517Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 24606d98c517Smrg int ret; 24616d98c517Smrg 24626d98c517Smrg /* Linear buffers have no stride. By ensuring that we only ever use 24636d98c517Smrg * stride 0 with linear buffers, we simplify our code. 24646d98c517Smrg */ 24656d98c517Smrg if (*tiling_mode == I915_TILING_NONE) 24666d98c517Smrg stride = 0; 24676d98c517Smrg 24686d98c517Smrg ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 24696d98c517Smrg if (ret == 0) 2470aaba2545Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 247122944501Smrg 247222944501Smrg *tiling_mode = bo_gem->tiling_mode; 2473aaba2545Smrg return ret; 247422944501Smrg} 247522944501Smrg 247622944501Smrgstatic int 247722944501Smrgdrm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 247822944501Smrg uint32_t * swizzle_mode) 247922944501Smrg{ 248022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 248122944501Smrg 248222944501Smrg *tiling_mode = bo_gem->tiling_mode; 248322944501Smrg *swizzle_mode = bo_gem->swizzle_mode; 248422944501Smrg return 0; 248522944501Smrg} 248622944501Smrg 248720131375Smrgdrm_intel_bo * 248820131375Smrgdrm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 248920131375Smrg{ 249020131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 249120131375Smrg int ret; 249220131375Smrg uint32_t handle; 249320131375Smrg drm_intel_bo_gem *bo_gem; 249420131375Smrg struct drm_i915_gem_get_tiling get_tiling; 249520131375Smrg drmMMListHead *list; 249620131375Smrg 249720131375Smrg ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 249820131375Smrg 249920131375Smrg /* 250020131375Smrg * See if the kernel has already returned this buffer to us. Just as 250120131375Smrg * for named buffers, we must not create two bo's pointing at the same 250220131375Smrg * kernel object 250320131375Smrg */ 250420131375Smrg for (list = bufmgr_gem->named.next; 250520131375Smrg list != &bufmgr_gem->named; 250620131375Smrg list = list->next) { 250720131375Smrg bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 250820131375Smrg if (bo_gem->gem_handle == handle) { 250920131375Smrg drm_intel_gem_bo_reference(&bo_gem->bo); 251020131375Smrg return &bo_gem->bo; 251120131375Smrg } 251220131375Smrg } 251320131375Smrg 251420131375Smrg if (ret) { 251520131375Smrg fprintf(stderr,"ret is %d %d\n", ret, errno); 251620131375Smrg return NULL; 251720131375Smrg } 251820131375Smrg 251920131375Smrg bo_gem = calloc(1, sizeof(*bo_gem)); 252020131375Smrg if (!bo_gem) 252120131375Smrg return NULL; 252220131375Smrg 252320131375Smrg /* Determine size of bo. The fd-to-handle ioctl really should 252420131375Smrg * return the size, but it doesn't. If we have kernel 3.12 or 252520131375Smrg * later, we can lseek on the prime fd to get the size. Older 252620131375Smrg * kernels will just fail, in which case we fall back to the 252720131375Smrg * provided (estimated or guess size). */ 252820131375Smrg ret = lseek(prime_fd, 0, SEEK_END); 252920131375Smrg if (ret != -1) 253020131375Smrg bo_gem->bo.size = ret; 253120131375Smrg else 253220131375Smrg bo_gem->bo.size = size; 253320131375Smrg 253420131375Smrg bo_gem->bo.handle = handle; 253520131375Smrg bo_gem->bo.bufmgr = bufmgr; 253620131375Smrg 253720131375Smrg bo_gem->gem_handle = handle; 253820131375Smrg 253920131375Smrg atomic_set(&bo_gem->refcount, 1); 254020131375Smrg 254120131375Smrg bo_gem->name = "prime"; 254220131375Smrg bo_gem->validate_index = -1; 254320131375Smrg bo_gem->reloc_tree_fences = 0; 254420131375Smrg bo_gem->used_as_reloc_target = false; 254520131375Smrg bo_gem->has_error = false; 254620131375Smrg bo_gem->reusable = false; 254720131375Smrg 254820131375Smrg DRMINITLISTHEAD(&bo_gem->vma_list); 254920131375Smrg DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 255020131375Smrg 255120131375Smrg VG_CLEAR(get_tiling); 255220131375Smrg get_tiling.handle = bo_gem->gem_handle; 255320131375Smrg ret = drmIoctl(bufmgr_gem->fd, 255420131375Smrg DRM_IOCTL_I915_GEM_GET_TILING, 255520131375Smrg &get_tiling); 255620131375Smrg if (ret != 0) { 255720131375Smrg drm_intel_gem_bo_unreference(&bo_gem->bo); 255820131375Smrg return NULL; 255920131375Smrg } 256020131375Smrg bo_gem->tiling_mode = get_tiling.tiling_mode; 256120131375Smrg bo_gem->swizzle_mode = get_tiling.swizzle_mode; 256220131375Smrg /* XXX stride is unknown */ 256320131375Smrg drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 256420131375Smrg 256520131375Smrg return &bo_gem->bo; 256620131375Smrg} 256720131375Smrg 256820131375Smrgint 256920131375Smrgdrm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 257020131375Smrg{ 257120131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 257220131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 257320131375Smrg 257420131375Smrg if (DRMLISTEMPTY(&bo_gem->name_list)) 257520131375Smrg DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 257620131375Smrg 257720131375Smrg if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 257820131375Smrg DRM_CLOEXEC, prime_fd) != 0) 257920131375Smrg return -errno; 258020131375Smrg 258120131375Smrg bo_gem->reusable = false; 258220131375Smrg 258320131375Smrg return 0; 258420131375Smrg} 258520131375Smrg 258622944501Smrgstatic int 258722944501Smrgdrm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 258822944501Smrg{ 258922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 259022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 259122944501Smrg int ret; 259222944501Smrg 259322944501Smrg if (!bo_gem->global_name) { 259420131375Smrg struct drm_gem_flink flink; 259520131375Smrg 259620131375Smrg VG_CLEAR(flink); 259722944501Smrg flink.handle = bo_gem->gem_handle; 259822944501Smrg 25996d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 260022944501Smrg if (ret != 0) 260122944501Smrg return -errno; 260220131375Smrg 260322944501Smrg bo_gem->global_name = flink.name; 260420131375Smrg bo_gem->reusable = false; 260520131375Smrg 260620131375Smrg if (DRMLISTEMPTY(&bo_gem->name_list)) 260720131375Smrg DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 260822944501Smrg } 260922944501Smrg 261022944501Smrg *name = bo_gem->global_name; 261122944501Smrg return 0; 261222944501Smrg} 261322944501Smrg 261422944501Smrg/** 261522944501Smrg * Enables unlimited caching of buffer objects for reuse. 261622944501Smrg * 261722944501Smrg * This is potentially very memory expensive, as the cache at each bucket 261822944501Smrg * size is only bounded by how many buffers of that size we've managed to have 261922944501Smrg * in flight at once. 262022944501Smrg */ 262122944501Smrgvoid 262222944501Smrgdrm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 262322944501Smrg{ 262422944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 262522944501Smrg 262620131375Smrg bufmgr_gem->bo_reuse = true; 262722944501Smrg} 262822944501Smrg 262922944501Smrg/** 263022944501Smrg * Enable use of fenced reloc type. 263122944501Smrg * 263222944501Smrg * New code should enable this to avoid unnecessary fence register 263322944501Smrg * allocation. If this option is not enabled, all relocs will have fence 263422944501Smrg * register allocated. 263522944501Smrg */ 263622944501Smrgvoid 263722944501Smrgdrm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 263822944501Smrg{ 263922944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 264022944501Smrg 264122944501Smrg if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 264220131375Smrg bufmgr_gem->fenced_relocs = true; 264322944501Smrg} 264422944501Smrg 264522944501Smrg/** 264622944501Smrg * Return the additional aperture space required by the tree of buffer objects 264722944501Smrg * rooted at bo. 264822944501Smrg */ 264922944501Smrgstatic int 265022944501Smrgdrm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 265122944501Smrg{ 265222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 265322944501Smrg int i; 265422944501Smrg int total = 0; 265522944501Smrg 265622944501Smrg if (bo == NULL || bo_gem->included_in_check_aperture) 265722944501Smrg return 0; 265822944501Smrg 265922944501Smrg total += bo->size; 266020131375Smrg bo_gem->included_in_check_aperture = true; 266122944501Smrg 266222944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 266322944501Smrg total += 266422944501Smrg drm_intel_gem_bo_get_aperture_space(bo_gem-> 266522944501Smrg reloc_target_info[i].bo); 266622944501Smrg 266722944501Smrg return total; 266822944501Smrg} 266922944501Smrg 267022944501Smrg/** 267122944501Smrg * Count the number of buffers in this list that need a fence reg 267222944501Smrg * 267322944501Smrg * If the count is greater than the number of available regs, we'll have 267422944501Smrg * to ask the caller to resubmit a batch with fewer tiled buffers. 267522944501Smrg * 267622944501Smrg * This function over-counts if the same buffer is used multiple times. 267722944501Smrg */ 267822944501Smrgstatic unsigned int 267922944501Smrgdrm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 268022944501Smrg{ 268122944501Smrg int i; 268222944501Smrg unsigned int total = 0; 268322944501Smrg 268422944501Smrg for (i = 0; i < count; i++) { 268522944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 268622944501Smrg 268722944501Smrg if (bo_gem == NULL) 268822944501Smrg continue; 268922944501Smrg 269022944501Smrg total += bo_gem->reloc_tree_fences; 269122944501Smrg } 269222944501Smrg return total; 269322944501Smrg} 269422944501Smrg 269522944501Smrg/** 269622944501Smrg * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 269722944501Smrg * for the next drm_intel_bufmgr_check_aperture_space() call. 269822944501Smrg */ 269922944501Smrgstatic void 270022944501Smrgdrm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 270122944501Smrg{ 270222944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 270322944501Smrg int i; 270422944501Smrg 270522944501Smrg if (bo == NULL || !bo_gem->included_in_check_aperture) 270622944501Smrg return; 270722944501Smrg 270820131375Smrg bo_gem->included_in_check_aperture = false; 270922944501Smrg 271022944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) 271122944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 271222944501Smrg reloc_target_info[i].bo); 271322944501Smrg} 271422944501Smrg 271522944501Smrg/** 271622944501Smrg * Return a conservative estimate for the amount of aperture required 271722944501Smrg * for a collection of buffers. This may double-count some buffers. 271822944501Smrg */ 271922944501Smrgstatic unsigned int 272022944501Smrgdrm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 272122944501Smrg{ 272222944501Smrg int i; 272322944501Smrg unsigned int total = 0; 272422944501Smrg 272522944501Smrg for (i = 0; i < count; i++) { 272622944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 272722944501Smrg if (bo_gem != NULL) 272822944501Smrg total += bo_gem->reloc_tree_size; 272922944501Smrg } 273022944501Smrg return total; 273122944501Smrg} 273222944501Smrg 273322944501Smrg/** 273422944501Smrg * Return the amount of aperture needed for a collection of buffers. 273522944501Smrg * This avoids double counting any buffers, at the cost of looking 273622944501Smrg * at every buffer in the set. 273722944501Smrg */ 273822944501Smrgstatic unsigned int 273922944501Smrgdrm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 274022944501Smrg{ 274122944501Smrg int i; 274222944501Smrg unsigned int total = 0; 274322944501Smrg 274422944501Smrg for (i = 0; i < count; i++) { 274522944501Smrg total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 274622944501Smrg /* For the first buffer object in the array, we get an 274722944501Smrg * accurate count back for its reloc_tree size (since nothing 274822944501Smrg * had been flagged as being counted yet). We can save that 274922944501Smrg * value out as a more conservative reloc_tree_size that 275022944501Smrg * avoids double-counting target buffers. Since the first 275122944501Smrg * buffer happens to usually be the batch buffer in our 275222944501Smrg * callers, this can pull us back from doing the tree 275322944501Smrg * walk on every new batch emit. 275422944501Smrg */ 275522944501Smrg if (i == 0) { 275622944501Smrg drm_intel_bo_gem *bo_gem = 275722944501Smrg (drm_intel_bo_gem *) bo_array[i]; 275822944501Smrg bo_gem->reloc_tree_size = total; 275922944501Smrg } 276022944501Smrg } 276122944501Smrg 276222944501Smrg for (i = 0; i < count; i++) 276322944501Smrg drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 276422944501Smrg return total; 276522944501Smrg} 276622944501Smrg 276722944501Smrg/** 276822944501Smrg * Return -1 if the batchbuffer should be flushed before attempting to 276922944501Smrg * emit rendering referencing the buffers pointed to by bo_array. 277022944501Smrg * 277122944501Smrg * This is required because if we try to emit a batchbuffer with relocations 277222944501Smrg * to a tree of buffers that won't simultaneously fit in the aperture, 277322944501Smrg * the rendering will return an error at a point where the software is not 277422944501Smrg * prepared to recover from it. 277522944501Smrg * 277622944501Smrg * However, we also want to emit the batchbuffer significantly before we reach 277722944501Smrg * the limit, as a series of batchbuffers each of which references buffers 277822944501Smrg * covering almost all of the aperture means that at each emit we end up 277922944501Smrg * waiting to evict a buffer from the last rendering, and we get synchronous 278022944501Smrg * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 278122944501Smrg * get better parallelism. 278222944501Smrg */ 278322944501Smrgstatic int 278422944501Smrgdrm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 278522944501Smrg{ 278622944501Smrg drm_intel_bufmgr_gem *bufmgr_gem = 278722944501Smrg (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 278822944501Smrg unsigned int total = 0; 278922944501Smrg unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 279022944501Smrg int total_fences; 279122944501Smrg 279222944501Smrg /* Check for fence reg constraints if necessary */ 279322944501Smrg if (bufmgr_gem->available_fences) { 279422944501Smrg total_fences = drm_intel_gem_total_fences(bo_array, count); 279522944501Smrg if (total_fences > bufmgr_gem->available_fences) 279622944501Smrg return -ENOSPC; 279722944501Smrg } 279822944501Smrg 279922944501Smrg total = drm_intel_gem_estimate_batch_space(bo_array, count); 280022944501Smrg 280122944501Smrg if (total > threshold) 280222944501Smrg total = drm_intel_gem_compute_batch_space(bo_array, count); 280322944501Smrg 280422944501Smrg if (total > threshold) { 280522944501Smrg DBG("check_space: overflowed available aperture, " 280622944501Smrg "%dkb vs %dkb\n", 280722944501Smrg total / 1024, (int)bufmgr_gem->gtt_size / 1024); 280822944501Smrg return -ENOSPC; 280922944501Smrg } else { 281022944501Smrg DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 281122944501Smrg (int)bufmgr_gem->gtt_size / 1024); 281222944501Smrg return 0; 281322944501Smrg } 281422944501Smrg} 281522944501Smrg 281622944501Smrg/* 281722944501Smrg * Disable buffer reuse for objects which are shared with the kernel 281822944501Smrg * as scanout buffers 281922944501Smrg */ 282022944501Smrgstatic int 282122944501Smrgdrm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 282222944501Smrg{ 282322944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 282422944501Smrg 282520131375Smrg bo_gem->reusable = false; 282622944501Smrg return 0; 282722944501Smrg} 282822944501Smrg 2829aaba2545Smrgstatic int 2830aaba2545Smrgdrm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 2831aaba2545Smrg{ 2832aaba2545Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2833aaba2545Smrg 2834aaba2545Smrg return bo_gem->reusable; 2835aaba2545Smrg} 2836aaba2545Smrg 283722944501Smrgstatic int 283822944501Smrg_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 283922944501Smrg{ 284022944501Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 284122944501Smrg int i; 284222944501Smrg 284322944501Smrg for (i = 0; i < bo_gem->reloc_count; i++) { 284422944501Smrg if (bo_gem->reloc_target_info[i].bo == target_bo) 284522944501Smrg return 1; 2846aaba2545Smrg if (bo == bo_gem->reloc_target_info[i].bo) 2847aaba2545Smrg continue; 284822944501Smrg if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 284922944501Smrg target_bo)) 285022944501Smrg return 1; 285122944501Smrg } 285222944501Smrg 285322944501Smrg return 0; 285422944501Smrg} 285522944501Smrg 285622944501Smrg/** Return true if target_bo is referenced by bo's relocation tree. */ 285722944501Smrgstatic int 285822944501Smrgdrm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 285922944501Smrg{ 286022944501Smrg drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 286122944501Smrg 286222944501Smrg if (bo == NULL || target_bo == NULL) 286322944501Smrg return 0; 286422944501Smrg if (target_bo_gem->used_as_reloc_target) 286522944501Smrg return _drm_intel_gem_bo_references(bo, target_bo); 286622944501Smrg return 0; 286722944501Smrg} 286822944501Smrg 2869aaba2545Smrgstatic void 2870aaba2545Smrgadd_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 2871aaba2545Smrg{ 2872aaba2545Smrg unsigned int i = bufmgr_gem->num_buckets; 2873aaba2545Smrg 2874aaba2545Smrg assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2875aaba2545Smrg 2876aaba2545Smrg DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2877aaba2545Smrg bufmgr_gem->cache_bucket[i].size = size; 2878aaba2545Smrg bufmgr_gem->num_buckets++; 2879aaba2545Smrg} 2880aaba2545Smrg 2881aaba2545Smrgstatic void 2882aaba2545Smrginit_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2883aaba2545Smrg{ 2884aaba2545Smrg unsigned long size, cache_max_size = 64 * 1024 * 1024; 2885aaba2545Smrg 2886aaba2545Smrg /* OK, so power of two buckets was too wasteful of memory. 2887aaba2545Smrg * Give 3 other sizes between each power of two, to hopefully 2888aaba2545Smrg * cover things accurately enough. (The alternative is 2889aaba2545Smrg * probably to just go for exact matching of sizes, and assume 2890aaba2545Smrg * that for things like composited window resize the tiled 2891aaba2545Smrg * width/height alignment and rounding of sizes to pages will 2892aaba2545Smrg * get us useful cache hit rates anyway) 2893aaba2545Smrg */ 2894aaba2545Smrg add_bucket(bufmgr_gem, 4096); 2895aaba2545Smrg add_bucket(bufmgr_gem, 4096 * 2); 2896aaba2545Smrg add_bucket(bufmgr_gem, 4096 * 3); 2897aaba2545Smrg 2898aaba2545Smrg /* Initialize the linked lists for BO reuse cache. */ 2899aaba2545Smrg for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2900aaba2545Smrg add_bucket(bufmgr_gem, size); 2901aaba2545Smrg 2902aaba2545Smrg add_bucket(bufmgr_gem, size + size * 1 / 4); 2903aaba2545Smrg add_bucket(bufmgr_gem, size + size * 2 / 4); 2904aaba2545Smrg add_bucket(bufmgr_gem, size + size * 3 / 4); 2905aaba2545Smrg } 2906aaba2545Smrg} 2907aaba2545Smrg 290820131375Smrgvoid 290920131375Smrgdrm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 291020131375Smrg{ 291120131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 291220131375Smrg 291320131375Smrg bufmgr_gem->vma_max = limit; 291420131375Smrg 291520131375Smrg drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 291620131375Smrg} 291720131375Smrg 291820131375Smrg/** 291920131375Smrg * Get the PCI ID for the device. This can be overridden by setting the 292020131375Smrg * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 292120131375Smrg */ 292220131375Smrgstatic int 292320131375Smrgget_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 292420131375Smrg{ 292520131375Smrg char *devid_override; 292620131375Smrg int devid; 292720131375Smrg int ret; 292820131375Smrg drm_i915_getparam_t gp; 292920131375Smrg 293020131375Smrg if (geteuid() == getuid()) { 293120131375Smrg devid_override = getenv("INTEL_DEVID_OVERRIDE"); 293220131375Smrg if (devid_override) { 293320131375Smrg bufmgr_gem->no_exec = true; 293420131375Smrg return strtod(devid_override, NULL); 293520131375Smrg } 293620131375Smrg } 293720131375Smrg 293820131375Smrg VG_CLEAR(devid); 293920131375Smrg VG_CLEAR(gp); 294020131375Smrg gp.param = I915_PARAM_CHIPSET_ID; 294120131375Smrg gp.value = &devid; 294220131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 294320131375Smrg if (ret) { 294420131375Smrg fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 294520131375Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 294620131375Smrg } 294720131375Smrg return devid; 294820131375Smrg} 294920131375Smrg 295020131375Smrgint 295120131375Smrgdrm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 295220131375Smrg{ 295320131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 295420131375Smrg 295520131375Smrg return bufmgr_gem->pci_device; 295620131375Smrg} 295720131375Smrg 295820131375Smrg/** 295920131375Smrg * Sets the AUB filename. 296020131375Smrg * 296120131375Smrg * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 296220131375Smrg * for it to have any effect. 296320131375Smrg */ 296420131375Smrgvoid 296520131375Smrgdrm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 296620131375Smrg const char *filename) 296720131375Smrg{ 296820131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 296920131375Smrg 297020131375Smrg free(bufmgr_gem->aub_filename); 297120131375Smrg if (filename) 297220131375Smrg bufmgr_gem->aub_filename = strdup(filename); 297320131375Smrg} 297420131375Smrg 297520131375Smrg/** 297620131375Smrg * Sets up AUB dumping. 297720131375Smrg * 297820131375Smrg * This is a trace file format that can be used with the simulator. 297920131375Smrg * Packets are emitted in a format somewhat like GPU command packets. 298020131375Smrg * You can set up a GTT and upload your objects into the referenced 298120131375Smrg * space, then send off batchbuffers and get BMPs out the other end. 298220131375Smrg */ 298320131375Smrgvoid 298420131375Smrgdrm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 298520131375Smrg{ 298620131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 298720131375Smrg int entry = 0x200003; 298820131375Smrg int i; 298920131375Smrg int gtt_size = 0x10000; 299020131375Smrg const char *filename; 299120131375Smrg 299220131375Smrg if (!enable) { 299320131375Smrg if (bufmgr_gem->aub_file) { 299420131375Smrg fclose(bufmgr_gem->aub_file); 299520131375Smrg bufmgr_gem->aub_file = NULL; 299620131375Smrg } 299720131375Smrg return; 299820131375Smrg } 299920131375Smrg 300020131375Smrg if (geteuid() != getuid()) 300120131375Smrg return; 300220131375Smrg 300320131375Smrg if (bufmgr_gem->aub_filename) 300420131375Smrg filename = bufmgr_gem->aub_filename; 300520131375Smrg else 300620131375Smrg filename = "intel.aub"; 300720131375Smrg bufmgr_gem->aub_file = fopen(filename, "w+"); 300820131375Smrg if (!bufmgr_gem->aub_file) 300920131375Smrg return; 301020131375Smrg 301120131375Smrg /* Start allocating objects from just after the GTT. */ 301220131375Smrg bufmgr_gem->aub_offset = gtt_size; 301320131375Smrg 301420131375Smrg /* Start with a (required) version packet. */ 301520131375Smrg aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); 301620131375Smrg aub_out(bufmgr_gem, 301720131375Smrg (4 << AUB_HEADER_MAJOR_SHIFT) | 301820131375Smrg (0 << AUB_HEADER_MINOR_SHIFT)); 301920131375Smrg for (i = 0; i < 8; i++) { 302020131375Smrg aub_out(bufmgr_gem, 0); /* app name */ 302120131375Smrg } 302220131375Smrg aub_out(bufmgr_gem, 0); /* timestamp */ 302320131375Smrg aub_out(bufmgr_gem, 0); /* timestamp */ 302420131375Smrg aub_out(bufmgr_gem, 0); /* comment len */ 302520131375Smrg 302620131375Smrg /* Set up the GTT. The max we can handle is 256M */ 302720131375Smrg aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 302820131375Smrg aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE); 302920131375Smrg aub_out(bufmgr_gem, 0); /* subtype */ 303020131375Smrg aub_out(bufmgr_gem, 0); /* offset */ 303120131375Smrg aub_out(bufmgr_gem, gtt_size); /* size */ 303220131375Smrg if (bufmgr_gem->gen >= 8) 303320131375Smrg aub_out(bufmgr_gem, 0); 303420131375Smrg for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { 303520131375Smrg aub_out(bufmgr_gem, entry); 303620131375Smrg } 303720131375Smrg} 303820131375Smrg 303920131375Smrgdrm_intel_context * 304020131375Smrgdrm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 304120131375Smrg{ 304220131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 304320131375Smrg struct drm_i915_gem_context_create create; 304420131375Smrg drm_intel_context *context = NULL; 304520131375Smrg int ret; 304620131375Smrg 304720131375Smrg context = calloc(1, sizeof(*context)); 304820131375Smrg if (!context) 304920131375Smrg return NULL; 305020131375Smrg 305120131375Smrg VG_CLEAR(create); 305220131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 305320131375Smrg if (ret != 0) { 305420131375Smrg DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 305520131375Smrg strerror(errno)); 305620131375Smrg free(context); 305720131375Smrg return NULL; 305820131375Smrg } 305920131375Smrg 306020131375Smrg context->ctx_id = create.ctx_id; 306120131375Smrg context->bufmgr = bufmgr; 306220131375Smrg 306320131375Smrg return context; 306420131375Smrg} 306520131375Smrg 306620131375Smrgvoid 306720131375Smrgdrm_intel_gem_context_destroy(drm_intel_context *ctx) 306820131375Smrg{ 306920131375Smrg drm_intel_bufmgr_gem *bufmgr_gem; 307020131375Smrg struct drm_i915_gem_context_destroy destroy; 307120131375Smrg int ret; 307220131375Smrg 307320131375Smrg if (ctx == NULL) 307420131375Smrg return; 307520131375Smrg 307620131375Smrg VG_CLEAR(destroy); 307720131375Smrg 307820131375Smrg bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 307920131375Smrg destroy.ctx_id = ctx->ctx_id; 308020131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 308120131375Smrg &destroy); 308220131375Smrg if (ret != 0) 308320131375Smrg fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 308420131375Smrg strerror(errno)); 308520131375Smrg 308620131375Smrg free(ctx); 308720131375Smrg} 308820131375Smrg 308920131375Smrgint 309020131375Smrgdrm_intel_get_reset_stats(drm_intel_context *ctx, 309120131375Smrg uint32_t *reset_count, 309220131375Smrg uint32_t *active, 309320131375Smrg uint32_t *pending) 309420131375Smrg{ 309520131375Smrg drm_intel_bufmgr_gem *bufmgr_gem; 309620131375Smrg struct drm_i915_reset_stats stats; 309720131375Smrg int ret; 309820131375Smrg 309920131375Smrg if (ctx == NULL) 310020131375Smrg return -EINVAL; 310120131375Smrg 310220131375Smrg memset(&stats, 0, sizeof(stats)); 310320131375Smrg 310420131375Smrg bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 310520131375Smrg stats.ctx_id = ctx->ctx_id; 310620131375Smrg ret = drmIoctl(bufmgr_gem->fd, 310720131375Smrg DRM_IOCTL_I915_GET_RESET_STATS, 310820131375Smrg &stats); 310920131375Smrg if (ret == 0) { 311020131375Smrg if (reset_count != NULL) 311120131375Smrg *reset_count = stats.reset_count; 311220131375Smrg 311320131375Smrg if (active != NULL) 311420131375Smrg *active = stats.batch_active; 311520131375Smrg 311620131375Smrg if (pending != NULL) 311720131375Smrg *pending = stats.batch_pending; 311820131375Smrg } 311920131375Smrg 312020131375Smrg return ret; 312120131375Smrg} 312220131375Smrg 312320131375Smrgint 312420131375Smrgdrm_intel_reg_read(drm_intel_bufmgr *bufmgr, 312520131375Smrg uint32_t offset, 312620131375Smrg uint64_t *result) 312720131375Smrg{ 312820131375Smrg drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 312920131375Smrg struct drm_i915_reg_read reg_read; 313020131375Smrg int ret; 313120131375Smrg 313220131375Smrg VG_CLEAR(reg_read); 313320131375Smrg reg_read.offset = offset; 313420131375Smrg 313520131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 313620131375Smrg 313720131375Smrg *result = reg_read.val; 313820131375Smrg return ret; 313920131375Smrg} 314020131375Smrg 314120131375Smrg 314220131375Smrg/** 314320131375Smrg * Annotate the given bo for use in aub dumping. 314420131375Smrg * 314520131375Smrg * \param annotations is an array of drm_intel_aub_annotation objects 314620131375Smrg * describing the type of data in various sections of the bo. Each 314720131375Smrg * element of the array specifies the type and subtype of a section of 314820131375Smrg * the bo, and the past-the-end offset of that section. The elements 314920131375Smrg * of \c annotations must be sorted so that ending_offset is 315020131375Smrg * increasing. 315120131375Smrg * 315220131375Smrg * \param count is the number of elements in the \c annotations array. 315320131375Smrg * If \c count is zero, then \c annotations will not be dereferenced. 315420131375Smrg * 315520131375Smrg * Annotations are copied into a private data structure, so caller may 315620131375Smrg * re-use the memory pointed to by \c annotations after the call 315720131375Smrg * returns. 315820131375Smrg * 315920131375Smrg * Annotations are stored for the lifetime of the bo; to reset to the 316020131375Smrg * default state (no annotations), call this function with a \c count 316120131375Smrg * of zero. 316220131375Smrg */ 316320131375Smrgvoid 316420131375Smrgdrm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 316520131375Smrg drm_intel_aub_annotation *annotations, 316620131375Smrg unsigned count) 316720131375Smrg{ 316820131375Smrg drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 316920131375Smrg unsigned size = sizeof(*annotations) * count; 317020131375Smrg drm_intel_aub_annotation *new_annotations = 317120131375Smrg count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL; 317220131375Smrg if (new_annotations == NULL) { 317320131375Smrg free(bo_gem->aub_annotations); 317420131375Smrg bo_gem->aub_annotations = NULL; 317520131375Smrg bo_gem->aub_annotation_count = 0; 317620131375Smrg return; 317720131375Smrg } 317820131375Smrg memcpy(new_annotations, annotations, size); 317920131375Smrg bo_gem->aub_annotations = new_annotations; 318020131375Smrg bo_gem->aub_annotation_count = count; 318120131375Smrg} 318220131375Smrg 318322944501Smrg/** 318422944501Smrg * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 318522944501Smrg * and manage map buffer objections. 318622944501Smrg * 318722944501Smrg * \param fd File descriptor of the opened DRM device. 318822944501Smrg */ 318922944501Smrgdrm_intel_bufmgr * 319022944501Smrgdrm_intel_bufmgr_gem_init(int fd, int batch_size) 319122944501Smrg{ 319222944501Smrg drm_intel_bufmgr_gem *bufmgr_gem; 319322944501Smrg struct drm_i915_gem_get_aperture aperture; 319422944501Smrg drm_i915_getparam_t gp; 319520131375Smrg int ret, tmp; 319620131375Smrg bool exec2 = false; 319722944501Smrg 319822944501Smrg bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 319922944501Smrg if (bufmgr_gem == NULL) 320022944501Smrg return NULL; 320122944501Smrg 320222944501Smrg bufmgr_gem->fd = fd; 320322944501Smrg 320422944501Smrg if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 320522944501Smrg free(bufmgr_gem); 320622944501Smrg return NULL; 320722944501Smrg } 320822944501Smrg 32096d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, 32106d98c517Smrg DRM_IOCTL_I915_GEM_GET_APERTURE, 32116d98c517Smrg &aperture); 321222944501Smrg 321322944501Smrg if (ret == 0) 321422944501Smrg bufmgr_gem->gtt_size = aperture.aper_available_size; 321522944501Smrg else { 321622944501Smrg fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 321722944501Smrg strerror(errno)); 321822944501Smrg bufmgr_gem->gtt_size = 128 * 1024 * 1024; 321922944501Smrg fprintf(stderr, "Assuming %dkB available aperture size.\n" 322022944501Smrg "May lead to reduced performance or incorrect " 322122944501Smrg "rendering.\n", 322222944501Smrg (int)bufmgr_gem->gtt_size / 1024); 322322944501Smrg } 322422944501Smrg 322520131375Smrg bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 322622944501Smrg 322720131375Smrg if (IS_GEN2(bufmgr_gem->pci_device)) 322822944501Smrg bufmgr_gem->gen = 2; 322920131375Smrg else if (IS_GEN3(bufmgr_gem->pci_device)) 323022944501Smrg bufmgr_gem->gen = 3; 323120131375Smrg else if (IS_GEN4(bufmgr_gem->pci_device)) 323222944501Smrg bufmgr_gem->gen = 4; 323320131375Smrg else if (IS_GEN5(bufmgr_gem->pci_device)) 323420131375Smrg bufmgr_gem->gen = 5; 323520131375Smrg else if (IS_GEN6(bufmgr_gem->pci_device)) 323622944501Smrg bufmgr_gem->gen = 6; 323720131375Smrg else if (IS_GEN7(bufmgr_gem->pci_device)) 323820131375Smrg bufmgr_gem->gen = 7; 323920131375Smrg else if (IS_GEN8(bufmgr_gem->pci_device)) 324020131375Smrg bufmgr_gem->gen = 8; 324120131375Smrg else { 324220131375Smrg free(bufmgr_gem); 324320131375Smrg return NULL; 324420131375Smrg } 324520131375Smrg 324620131375Smrg if (IS_GEN3(bufmgr_gem->pci_device) && 324720131375Smrg bufmgr_gem->gtt_size > 256*1024*1024) { 324820131375Smrg /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 324920131375Smrg * be used for tiled blits. To simplify the accounting, just 325020131375Smrg * substract the unmappable part (fixed to 256MB on all known 325120131375Smrg * gen3 devices) if the kernel advertises it. */ 325220131375Smrg bufmgr_gem->gtt_size -= 256*1024*1024; 325320131375Smrg } 325420131375Smrg 325520131375Smrg VG_CLEAR(gp); 325620131375Smrg gp.value = &tmp; 325722944501Smrg 325822944501Smrg gp.param = I915_PARAM_HAS_EXECBUF2; 32596d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 326022944501Smrg if (!ret) 326120131375Smrg exec2 = true; 326222944501Smrg 3263aaba2545Smrg gp.param = I915_PARAM_HAS_BSD; 32646d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 32659ce4edccSmrg bufmgr_gem->has_bsd = ret == 0; 32669ce4edccSmrg 32679ce4edccSmrg gp.param = I915_PARAM_HAS_BLT; 32689ce4edccSmrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 32699ce4edccSmrg bufmgr_gem->has_blt = ret == 0; 32709ce4edccSmrg 32719ce4edccSmrg gp.param = I915_PARAM_HAS_RELAXED_FENCING; 32729ce4edccSmrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 32739ce4edccSmrg bufmgr_gem->has_relaxed_fencing = ret == 0; 3274aaba2545Smrg 327520131375Smrg gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 327620131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 327720131375Smrg bufmgr_gem->has_wait_timeout = ret == 0; 327820131375Smrg 327920131375Smrg gp.param = I915_PARAM_HAS_LLC; 328020131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 328120131375Smrg if (ret != 0) { 328220131375Smrg /* Kernel does not supports HAS_LLC query, fallback to GPU 328320131375Smrg * generation detection and assume that we have LLC on GEN6/7 328420131375Smrg */ 328520131375Smrg bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 328620131375Smrg IS_GEN7(bufmgr_gem->pci_device)); 328720131375Smrg } else 328820131375Smrg bufmgr_gem->has_llc = *gp.value; 328920131375Smrg 329020131375Smrg gp.param = I915_PARAM_HAS_VEBOX; 329120131375Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 329220131375Smrg bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 329320131375Smrg 329422944501Smrg if (bufmgr_gem->gen < 4) { 329522944501Smrg gp.param = I915_PARAM_NUM_FENCES_AVAIL; 329622944501Smrg gp.value = &bufmgr_gem->available_fences; 32976d98c517Smrg ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 329822944501Smrg if (ret) { 329922944501Smrg fprintf(stderr, "get fences failed: %d [%d]\n", ret, 330022944501Smrg errno); 330122944501Smrg fprintf(stderr, "param: %d, val: %d\n", gp.param, 330222944501Smrg *gp.value); 330322944501Smrg bufmgr_gem->available_fences = 0; 330422944501Smrg } else { 330522944501Smrg /* XXX The kernel reports the total number of fences, 330622944501Smrg * including any that may be pinned. 330722944501Smrg * 330822944501Smrg * We presume that there will be at least one pinned 330922944501Smrg * fence for the scanout buffer, but there may be more 331022944501Smrg * than one scanout and the user may be manually 331122944501Smrg * pinning buffers. Let's move to execbuffer2 and 331222944501Smrg * thereby forget the insanity of using fences... 331322944501Smrg */ 331422944501Smrg bufmgr_gem->available_fences -= 2; 331522944501Smrg if (bufmgr_gem->available_fences < 0) 331622944501Smrg bufmgr_gem->available_fences = 0; 331722944501Smrg } 331822944501Smrg } 331922944501Smrg 332022944501Smrg /* Let's go with one relocation per every 2 dwords (but round down a bit 332122944501Smrg * since a power of two will mean an extra page allocation for the reloc 332222944501Smrg * buffer). 332322944501Smrg * 332422944501Smrg * Every 4 was too few for the blender benchmark. 332522944501Smrg */ 332622944501Smrg bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 332722944501Smrg 332822944501Smrg bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 332922944501Smrg bufmgr_gem->bufmgr.bo_alloc_for_render = 333022944501Smrg drm_intel_gem_bo_alloc_for_render; 333122944501Smrg bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 333222944501Smrg bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 333322944501Smrg bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 333422944501Smrg bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 333522944501Smrg bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 333622944501Smrg bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 333722944501Smrg bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 333822944501Smrg bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 333922944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 334022944501Smrg bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 334122944501Smrg bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 334222944501Smrg bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 334322944501Smrg bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 334422944501Smrg bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 334522944501Smrg bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 334622944501Smrg /* Use the new one if available */ 3347aaba2545Smrg if (exec2) { 334822944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 33499ce4edccSmrg bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3350aaba2545Smrg } else 335122944501Smrg bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 335222944501Smrg bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 335322944501Smrg bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 335422944501Smrg bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 335522944501Smrg bufmgr_gem->bufmgr.debug = 0; 335622944501Smrg bufmgr_gem->bufmgr.check_aperture_space = 335722944501Smrg drm_intel_gem_check_aperture_space; 335822944501Smrg bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3359aaba2545Smrg bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 336022944501Smrg bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 336122944501Smrg drm_intel_gem_get_pipe_from_crtc_id; 336222944501Smrg bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 336322944501Smrg 336420131375Smrg DRMINITLISTHEAD(&bufmgr_gem->named); 3365aaba2545Smrg init_cache_buckets(bufmgr_gem); 336622944501Smrg 336720131375Smrg DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 336820131375Smrg bufmgr_gem->vma_max = -1; /* unlimited by default */ 336920131375Smrg 337022944501Smrg return &bufmgr_gem->bufmgr; 337122944501Smrg} 3372