iris_bufmgr.h revision 7ec681f3
1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef IRIS_BUFMGR_H 25#define IRIS_BUFMGR_H 26 27#include <stdbool.h> 28#include <stdint.h> 29#include <stdio.h> 30#include <sys/types.h> 31#include "c11/threads.h" 32#include "util/macros.h" 33#include "util/u_atomic.h" 34#include "util/u_dynarray.h" 35#include "util/list.h" 36#include "util/simple_mtx.h" 37#include "pipe/p_defines.h" 38#include "pipebuffer/pb_slab.h" 39 40struct intel_device_info; 41struct pipe_debug_callback; 42struct isl_surf; 43struct iris_syncobj; 44 45/** 46 * Memory zones. When allocating a buffer, you can request that it is 47 * placed into a specific region of the virtual address space (PPGTT). 48 * 49 * Most buffers can go anywhere (IRIS_MEMZONE_OTHER). Some buffers are 50 * accessed via an offset from a base address. STATE_BASE_ADDRESS has 51 * a maximum 4GB size for each region, so we need to restrict those 52 * buffers to be within 4GB of the base. Each memory zone corresponds 53 * to a particular base address. 54 * 55 * We lay out the virtual address space as follows: 56 * 57 * - [0, 4K): Nothing (empty page for null address) 58 * - [4K, 4G): Shaders (Instruction Base Address) 59 * - [4G, 8G): Surfaces & Binders (Surface State Base Address, Bindless ...) 60 * - [8G, 12G): Dynamic (Dynamic State Base Address) 61 * - [12G, *): Other (everything else in the full 48-bit VMA) 62 * 63 * A special buffer for border color lives at the start of the dynamic state 64 * memory zone. This unfortunately has to be handled specially because the 65 * SAMPLER_STATE "Indirect State Pointer" field is only a 24-bit pointer. 66 * 67 * Each GL context uses a separate GEM context, which technically gives them 68 * each a separate VMA. However, we assign address globally, so buffers will 69 * have the same address in all GEM contexts. This lets us have a single BO 70 * field for the address, which is easy and cheap. 71 */ 72enum iris_memory_zone { 73 IRIS_MEMZONE_SHADER, 74 IRIS_MEMZONE_BINDER, 75 IRIS_MEMZONE_BINDLESS, 76 IRIS_MEMZONE_SURFACE, 77 IRIS_MEMZONE_DYNAMIC, 78 IRIS_MEMZONE_OTHER, 79 80 IRIS_MEMZONE_BORDER_COLOR_POOL, 81}; 82 83/* Intentionally exclude single buffer "zones" */ 84#define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1) 85 86#define IRIS_BINDER_SIZE (64 * 1024) 87#define IRIS_MAX_BINDERS 100 88#define IRIS_BINDLESS_SIZE (8 * 1024 * 1024) 89 90#define IRIS_MEMZONE_SHADER_START (0ull * (1ull << 32)) 91#define IRIS_MEMZONE_BINDER_START (1ull * (1ull << 32)) 92#define IRIS_MEMZONE_BINDLESS_START (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE) 93#define IRIS_MEMZONE_SURFACE_START (IRIS_MEMZONE_BINDLESS_START + IRIS_BINDLESS_SIZE) 94#define IRIS_MEMZONE_DYNAMIC_START (2ull * (1ull << 32)) 95#define IRIS_MEMZONE_OTHER_START (3ull * (1ull << 32)) 96 97#define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START 98#define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024) 99 100/** 101 * Classification of the various incoherent caches of the GPU into a number of 102 * caching domains. 103 */ 104enum iris_domain { 105 /** Render color cache. */ 106 IRIS_DOMAIN_RENDER_WRITE = 0, 107 /** (Hi)Z/stencil cache. */ 108 IRIS_DOMAIN_DEPTH_WRITE, 109 /** Data port (HDC) cache. */ 110 IRIS_DOMAIN_DATA_WRITE, 111 /** Any other read-write cache. */ 112 IRIS_DOMAIN_OTHER_WRITE, 113 /** Vertex cache. */ 114 IRIS_DOMAIN_VF_READ, 115 /** Any other read-only cache. */ 116 IRIS_DOMAIN_OTHER_READ, 117 /** Number of caching domains. */ 118 NUM_IRIS_DOMAINS, 119 /** Not a real cache, use to opt out of the cache tracking mechanism. */ 120 IRIS_DOMAIN_NONE = NUM_IRIS_DOMAINS 121}; 122 123/** 124 * Whether a caching domain is guaranteed not to write any data to memory. 125 */ 126static inline bool 127iris_domain_is_read_only(enum iris_domain access) 128{ 129 return access == IRIS_DOMAIN_OTHER_READ || 130 access == IRIS_DOMAIN_VF_READ; 131} 132 133enum iris_mmap_mode { 134 IRIS_MMAP_NONE, /**< Cannot be mapped */ 135 IRIS_MMAP_UC, /**< Fully uncached memory map */ 136 IRIS_MMAP_WC, /**< Write-combining map with no caching of reads */ 137 IRIS_MMAP_WB, /**< Write-back mapping with CPU caches enabled */ 138}; 139 140#define IRIS_BATCH_COUNT 2 141 142struct iris_bo_screen_deps { 143 struct iris_syncobj *write_syncobjs[IRIS_BATCH_COUNT]; 144 struct iris_syncobj *read_syncobjs[IRIS_BATCH_COUNT]; 145}; 146 147struct iris_bo { 148 /** 149 * Size in bytes of the buffer object. 150 * 151 * The size may be larger than the size originally requested for the 152 * allocation, such as being aligned to page size. 153 */ 154 uint64_t size; 155 156 /** Buffer manager context associated with this buffer object */ 157 struct iris_bufmgr *bufmgr; 158 159 /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */ 160 uint32_t hash; 161 162 /** The GEM handle for this buffer object. */ 163 uint32_t gem_handle; 164 165 /** 166 * Virtual address of the buffer inside the PPGTT (Per-Process Graphics 167 * Translation Table). 168 * 169 * Although each hardware context has its own VMA, we assign BO's to the 170 * same address in all contexts, for simplicity. 171 */ 172 uint64_t address; 173 174 /** 175 * If non-zero, then this bo has an aux-map translation to this address. 176 */ 177 uint64_t aux_map_address; 178 179 /** 180 * If this BO is referenced by a batch, this _may_ be the index into the 181 * batch->exec_bos[] list. 182 * 183 * Note that a single buffer may be used by multiple batches/contexts, 184 * and thus appear in multiple lists, but we only track one index here. 185 * In the common case one can guess that batch->exec_bos[bo->index] == bo 186 * and double check if that's true to avoid a linear list walk. 187 * 188 * XXX: this is not ideal now that we have more than one batch per context, 189 * XXX: as the index will flop back and forth between the render index and 190 * XXX: compute index... 191 */ 192 unsigned index; 193 194 int refcount; 195 const char *name; 196 197 /** BO cache list */ 198 struct list_head head; 199 200 /** 201 * Synchronization sequence number of most recent access of this BO from 202 * each caching domain. 203 * 204 * Although this is a global field, use in multiple contexts should be 205 * safe, see iris_emit_buffer_barrier_for() for details. 206 * 207 * Also align it to 64 bits. This will make atomic operations faster on 32 208 * bit platforms. 209 */ 210 uint64_t last_seqnos[NUM_IRIS_DOMAINS] __attribute__ ((aligned (8))); 211 212 /** Up to one per screen, may need realloc. */ 213 struct iris_bo_screen_deps *deps; 214 int deps_size; 215 216 /** 217 * Boolean of whether the GPU is definitely not accessing the buffer. 218 * 219 * This is only valid when reusable, since non-reusable 220 * buffers are those that have been shared with other 221 * processes, so we don't know their state. 222 */ 223 bool idle; 224 225 union { 226 struct { 227 uint64_t kflags; 228 229 time_t free_time; 230 231 /** Mapped address for the buffer, saved across map/unmap cycles */ 232 void *map; 233 234 /** List of GEM handle exports of this buffer (bo_export) */ 235 struct list_head exports; 236 237 /** 238 * Kernel-assigned global name for this object 239 * 240 * List contains both flink named and prime fd'd objects 241 */ 242 unsigned global_name; 243 244 /** The mmap coherency mode selected at BO allocation time */ 245 enum iris_mmap_mode mmap_mode; 246 247 /** Was this buffer imported from an external client? */ 248 bool imported; 249 250 /** Has this buffer been exported to external clients? */ 251 bool exported; 252 253 /** Boolean of whether this buffer can be re-used */ 254 bool reusable; 255 256 /** Boolean of whether this buffer points into user memory */ 257 bool userptr; 258 259 /** Boolean of whether this was allocated from local memory */ 260 bool local; 261 } real; 262 struct { 263 struct pb_slab_entry entry; 264 struct iris_bo *real; 265 } slab; 266 }; 267}; 268 269#define BO_ALLOC_ZEROED (1<<0) 270#define BO_ALLOC_COHERENT (1<<1) 271#define BO_ALLOC_SMEM (1<<2) 272#define BO_ALLOC_SCANOUT (1<<3) 273#define BO_ALLOC_NO_SUBALLOC (1<<4) 274 275/** 276 * Allocate a buffer object. 277 * 278 * Buffer objects are not necessarily initially mapped into CPU virtual 279 * address space or graphics device aperture. They must be mapped 280 * using iris_bo_map() to be used by the CPU. 281 */ 282struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr, 283 const char *name, 284 uint64_t size, 285 uint32_t alignment, 286 enum iris_memory_zone memzone, 287 unsigned flags); 288 289struct iris_bo * 290iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name, 291 void *ptr, size_t size, 292 enum iris_memory_zone memzone); 293 294/** Takes a reference on a buffer object */ 295static inline void 296iris_bo_reference(struct iris_bo *bo) 297{ 298 p_atomic_inc(&bo->refcount); 299} 300 301/** 302 * Releases a reference on a buffer object, freeing the data if 303 * no references remain. 304 */ 305void iris_bo_unreference(struct iris_bo *bo); 306 307#define MAP_READ PIPE_MAP_READ 308#define MAP_WRITE PIPE_MAP_WRITE 309#define MAP_ASYNC PIPE_MAP_UNSYNCHRONIZED 310#define MAP_PERSISTENT PIPE_MAP_PERSISTENT 311#define MAP_COHERENT PIPE_MAP_COHERENT 312/* internal */ 313#define MAP_RAW (PIPE_MAP_DRV_PRV << 0) 314#define MAP_INTERNAL_MASK (MAP_RAW) 315 316#define MAP_FLAGS (MAP_READ | MAP_WRITE | MAP_ASYNC | \ 317 MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK) 318 319/** 320 * Maps the buffer into userspace. 321 * 322 * This function will block waiting for any existing execution on the 323 * buffer to complete, first. The resulting mapping is returned. 324 */ 325MUST_CHECK void *iris_bo_map(struct pipe_debug_callback *dbg, 326 struct iris_bo *bo, unsigned flags); 327 328/** 329 * Reduces the refcount on the userspace mapping of the buffer 330 * object. 331 */ 332static inline int iris_bo_unmap(struct iris_bo *bo) { return 0; } 333 334/** 335 * Waits for rendering to an object by the GPU to have completed. 336 * 337 * This is not required for any access to the BO by bo_map, 338 * bo_subdata, etc. It is merely a way for the driver to implement 339 * glFinish. 340 */ 341void iris_bo_wait_rendering(struct iris_bo *bo); 342 343 344/** 345 * Unref a buffer manager instance. 346 */ 347void iris_bufmgr_unref(struct iris_bufmgr *bufmgr); 348 349/** 350 * Create a visible name for a buffer which can be used by other apps 351 * 352 * \param buf Buffer to create a name for 353 * \param name Returned name 354 */ 355int iris_bo_flink(struct iris_bo *bo, uint32_t *name); 356 357/** 358 * Returns true if the BO is backed by a real GEM object, false if it's 359 * a wrapper that's suballocated from a larger BO. 360 */ 361static inline bool 362iris_bo_is_real(struct iris_bo *bo) 363{ 364 return bo->gem_handle != 0; 365} 366 367/** 368 * Unwrap any slab-allocated wrapper BOs to get the BO for the underlying 369 * backing storage, which is a real BO associated with a GEM object. 370 */ 371static inline struct iris_bo * 372iris_get_backing_bo(struct iris_bo *bo) 373{ 374 if (!iris_bo_is_real(bo)) 375 bo = bo->slab.real; 376 377 /* We only allow one level of wrapping. */ 378 assert(iris_bo_is_real(bo)); 379 380 return bo; 381} 382 383/** 384 * Is this buffer shared with external clients (imported or exported)? 385 */ 386static inline bool 387iris_bo_is_external(const struct iris_bo *bo) 388{ 389 bo = iris_get_backing_bo((struct iris_bo *) bo); 390 return bo->real.exported || bo->real.imported; 391} 392 393static inline bool 394iris_bo_is_imported(const struct iris_bo *bo) 395{ 396 bo = iris_get_backing_bo((struct iris_bo *) bo); 397 return bo->real.imported; 398} 399 400static inline bool 401iris_bo_is_exported(const struct iris_bo *bo) 402{ 403 bo = iris_get_backing_bo((struct iris_bo *) bo); 404 return bo->real.exported; 405} 406 407static inline enum iris_mmap_mode 408iris_bo_mmap_mode(const struct iris_bo *bo) 409{ 410 bo = iris_get_backing_bo((struct iris_bo *) bo); 411 return bo->real.mmap_mode; 412} 413 414/** 415 * Mark a buffer as being shared with other external clients. 416 */ 417void iris_bo_mark_exported(struct iris_bo *bo); 418 419/** 420 * Returns true if mapping the buffer for write could cause the process 421 * to block, due to the object being active in the GPU. 422 */ 423bool iris_bo_busy(struct iris_bo *bo); 424 425/** 426 * Specify the volatility of the buffer. 427 * \param bo Buffer to create a name for 428 * \param madv The purgeable status 429 * 430 * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be 431 * reclaimed under memory pressure. If you subsequently require the buffer, 432 * then you must pass I915_MADV_WILLNEED to mark the buffer as required. 433 * 434 * Returns 1 if the buffer was retained, or 0 if it was discarded whilst 435 * marked as I915_MADV_DONTNEED. 436 */ 437int iris_bo_madvise(struct iris_bo *bo, int madv); 438 439struct iris_bufmgr *iris_bufmgr_get_for_fd(struct intel_device_info *devinfo, 440 int fd, bool bo_reuse); 441int iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr); 442 443struct iris_bo *iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr, 444 const char *name, 445 unsigned handle); 446 447void* iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr); 448 449int iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns); 450 451uint32_t iris_create_hw_context(struct iris_bufmgr *bufmgr); 452uint32_t iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id); 453 454#define IRIS_CONTEXT_LOW_PRIORITY ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2) 455#define IRIS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY) 456#define IRIS_CONTEXT_HIGH_PRIORITY ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2) 457 458int iris_hw_context_set_priority(struct iris_bufmgr *bufmgr, 459 uint32_t ctx_id, int priority); 460 461void iris_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id); 462 463int iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling); 464int iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf); 465 466int iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd); 467struct iris_bo *iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd); 468 469/** 470 * Exports a bo as a GEM handle into a given DRM file descriptor 471 * \param bo Buffer to export 472 * \param drm_fd File descriptor where the new handle is created 473 * \param out_handle Pointer to store the new handle 474 * 475 * Returns 0 if the buffer was successfully exported, a non zero error code 476 * otherwise. 477 */ 478int iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd, 479 uint32_t *out_handle); 480 481uint32_t iris_bo_export_gem_handle(struct iris_bo *bo); 482 483int iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *out); 484 485/** 486 * Returns the BO's address relative to the appropriate base address. 487 * 488 * All of our base addresses are programmed to the start of a 4GB region, 489 * so simply returning the bottom 32 bits of the BO address will give us 490 * the offset from whatever base address corresponds to that memory region. 491 */ 492static inline uint32_t 493iris_bo_offset_from_base_address(struct iris_bo *bo) 494{ 495 /* This only works for buffers in the memory zones corresponding to a 496 * base address - the top, unbounded memory zone doesn't have a base. 497 */ 498 assert(bo->address < IRIS_MEMZONE_OTHER_START); 499 return bo->address; 500} 501 502/** 503 * Track access of a BO from the specified caching domain and sequence number. 504 * 505 * Can be used without locking. Only the most recent access (i.e. highest 506 * seqno) is tracked. 507 */ 508static inline void 509iris_bo_bump_seqno(struct iris_bo *bo, uint64_t seqno, 510 enum iris_domain type) 511{ 512 uint64_t *const last_seqno = &bo->last_seqnos[type]; 513 uint64_t tmp, prev_seqno = p_atomic_read(last_seqno); 514 515 while (prev_seqno < seqno && 516 prev_seqno != (tmp = p_atomic_cmpxchg(last_seqno, prev_seqno, seqno))) 517 prev_seqno = tmp; 518} 519 520enum iris_memory_zone iris_memzone_for_address(uint64_t address); 521 522int iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr); 523 524simple_mtx_t *iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr); 525 526#endif /* IRIS_BUFMGR_H */ 527