iris_bufmgr.h revision 7ec681f3
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef IRIS_BUFMGR_H
25#define IRIS_BUFMGR_H
26
27#include <stdbool.h>
28#include <stdint.h>
29#include <stdio.h>
30#include <sys/types.h>
31#include "c11/threads.h"
32#include "util/macros.h"
33#include "util/u_atomic.h"
34#include "util/u_dynarray.h"
35#include "util/list.h"
36#include "util/simple_mtx.h"
37#include "pipe/p_defines.h"
38#include "pipebuffer/pb_slab.h"
39
40struct intel_device_info;
41struct pipe_debug_callback;
42struct isl_surf;
43struct iris_syncobj;
44
45/**
46 * Memory zones.  When allocating a buffer, you can request that it is
47 * placed into a specific region of the virtual address space (PPGTT).
48 *
49 * Most buffers can go anywhere (IRIS_MEMZONE_OTHER).  Some buffers are
50 * accessed via an offset from a base address.  STATE_BASE_ADDRESS has
51 * a maximum 4GB size for each region, so we need to restrict those
52 * buffers to be within 4GB of the base.  Each memory zone corresponds
53 * to a particular base address.
54 *
55 * We lay out the virtual address space as follows:
56 *
57 * - [0,   4K): Nothing            (empty page for null address)
58 * - [4K,  4G): Shaders            (Instruction Base Address)
59 * - [4G,  8G): Surfaces & Binders (Surface State Base Address, Bindless ...)
60 * - [8G, 12G): Dynamic            (Dynamic State Base Address)
61 * - [12G, *):  Other              (everything else in the full 48-bit VMA)
62 *
63 * A special buffer for border color lives at the start of the dynamic state
64 * memory zone.  This unfortunately has to be handled specially because the
65 * SAMPLER_STATE "Indirect State Pointer" field is only a 24-bit pointer.
66 *
67 * Each GL context uses a separate GEM context, which technically gives them
68 * each a separate VMA.  However, we assign address globally, so buffers will
69 * have the same address in all GEM contexts.  This lets us have a single BO
70 * field for the address, which is easy and cheap.
71 */
72enum iris_memory_zone {
73   IRIS_MEMZONE_SHADER,
74   IRIS_MEMZONE_BINDER,
75   IRIS_MEMZONE_BINDLESS,
76   IRIS_MEMZONE_SURFACE,
77   IRIS_MEMZONE_DYNAMIC,
78   IRIS_MEMZONE_OTHER,
79
80   IRIS_MEMZONE_BORDER_COLOR_POOL,
81};
82
83/* Intentionally exclude single buffer "zones" */
84#define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1)
85
86#define IRIS_BINDER_SIZE (64 * 1024)
87#define IRIS_MAX_BINDERS 100
88#define IRIS_BINDLESS_SIZE (8 * 1024 * 1024)
89
90#define IRIS_MEMZONE_SHADER_START     (0ull * (1ull << 32))
91#define IRIS_MEMZONE_BINDER_START     (1ull * (1ull << 32))
92#define IRIS_MEMZONE_BINDLESS_START   (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE)
93#define IRIS_MEMZONE_SURFACE_START    (IRIS_MEMZONE_BINDLESS_START + IRIS_BINDLESS_SIZE)
94#define IRIS_MEMZONE_DYNAMIC_START    (2ull * (1ull << 32))
95#define IRIS_MEMZONE_OTHER_START      (3ull * (1ull << 32))
96
97#define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START
98#define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024)
99
100/**
101 * Classification of the various incoherent caches of the GPU into a number of
102 * caching domains.
103 */
104enum iris_domain {
105   /** Render color cache. */
106   IRIS_DOMAIN_RENDER_WRITE = 0,
107   /** (Hi)Z/stencil cache. */
108   IRIS_DOMAIN_DEPTH_WRITE,
109   /** Data port (HDC) cache. */
110   IRIS_DOMAIN_DATA_WRITE,
111   /** Any other read-write cache. */
112   IRIS_DOMAIN_OTHER_WRITE,
113   /** Vertex cache. */
114   IRIS_DOMAIN_VF_READ,
115   /** Any other read-only cache. */
116   IRIS_DOMAIN_OTHER_READ,
117   /** Number of caching domains. */
118   NUM_IRIS_DOMAINS,
119   /** Not a real cache, use to opt out of the cache tracking mechanism. */
120   IRIS_DOMAIN_NONE = NUM_IRIS_DOMAINS
121};
122
123/**
124 * Whether a caching domain is guaranteed not to write any data to memory.
125 */
126static inline bool
127iris_domain_is_read_only(enum iris_domain access)
128{
129   return access == IRIS_DOMAIN_OTHER_READ ||
130          access == IRIS_DOMAIN_VF_READ;
131}
132
133enum iris_mmap_mode {
134   IRIS_MMAP_NONE, /**< Cannot be mapped */
135   IRIS_MMAP_UC, /**< Fully uncached memory map */
136   IRIS_MMAP_WC, /**< Write-combining map with no caching of reads */
137   IRIS_MMAP_WB, /**< Write-back mapping with CPU caches enabled */
138};
139
140#define IRIS_BATCH_COUNT 2
141
142struct iris_bo_screen_deps {
143   struct iris_syncobj *write_syncobjs[IRIS_BATCH_COUNT];
144   struct iris_syncobj *read_syncobjs[IRIS_BATCH_COUNT];
145};
146
147struct iris_bo {
148   /**
149    * Size in bytes of the buffer object.
150    *
151    * The size may be larger than the size originally requested for the
152    * allocation, such as being aligned to page size.
153    */
154   uint64_t size;
155
156   /** Buffer manager context associated with this buffer object */
157   struct iris_bufmgr *bufmgr;
158
159   /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
160   uint32_t hash;
161
162   /** The GEM handle for this buffer object. */
163   uint32_t gem_handle;
164
165   /**
166    * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
167    * Translation Table).
168    *
169    * Although each hardware context has its own VMA, we assign BO's to the
170    * same address in all contexts, for simplicity.
171    */
172   uint64_t address;
173
174   /**
175    * If non-zero, then this bo has an aux-map translation to this address.
176    */
177   uint64_t aux_map_address;
178
179   /**
180    * If this BO is referenced by a batch, this _may_ be the index into the
181    * batch->exec_bos[] list.
182    *
183    * Note that a single buffer may be used by multiple batches/contexts,
184    * and thus appear in multiple lists, but we only track one index here.
185    * In the common case one can guess that batch->exec_bos[bo->index] == bo
186    * and double check if that's true to avoid a linear list walk.
187    *
188    * XXX: this is not ideal now that we have more than one batch per context,
189    * XXX: as the index will flop back and forth between the render index and
190    * XXX: compute index...
191    */
192   unsigned index;
193
194   int refcount;
195   const char *name;
196
197   /** BO cache list */
198   struct list_head head;
199
200   /**
201    * Synchronization sequence number of most recent access of this BO from
202    * each caching domain.
203    *
204    * Although this is a global field, use in multiple contexts should be
205    * safe, see iris_emit_buffer_barrier_for() for details.
206    *
207    * Also align it to 64 bits. This will make atomic operations faster on 32
208    * bit platforms.
209    */
210   uint64_t last_seqnos[NUM_IRIS_DOMAINS] __attribute__ ((aligned (8)));
211
212   /** Up to one per screen, may need realloc. */
213   struct iris_bo_screen_deps *deps;
214   int deps_size;
215
216   /**
217    * Boolean of whether the GPU is definitely not accessing the buffer.
218    *
219    * This is only valid when reusable, since non-reusable
220    * buffers are those that have been shared with other
221    * processes, so we don't know their state.
222    */
223   bool idle;
224
225   union {
226      struct {
227         uint64_t kflags;
228
229         time_t free_time;
230
231         /** Mapped address for the buffer, saved across map/unmap cycles */
232         void *map;
233
234         /** List of GEM handle exports of this buffer (bo_export) */
235         struct list_head exports;
236
237         /**
238          * Kernel-assigned global name for this object
239          *
240          * List contains both flink named and prime fd'd objects
241          */
242         unsigned global_name;
243
244         /** The mmap coherency mode selected at BO allocation time */
245         enum iris_mmap_mode mmap_mode;
246
247         /** Was this buffer imported from an external client? */
248         bool imported;
249
250         /** Has this buffer been exported to external clients? */
251         bool exported;
252
253         /** Boolean of whether this buffer can be re-used */
254         bool reusable;
255
256         /** Boolean of whether this buffer points into user memory */
257         bool userptr;
258
259         /** Boolean of whether this was allocated from local memory */
260         bool local;
261      } real;
262      struct {
263         struct pb_slab_entry entry;
264         struct iris_bo *real;
265      } slab;
266   };
267};
268
269#define BO_ALLOC_ZEROED      (1<<0)
270#define BO_ALLOC_COHERENT    (1<<1)
271#define BO_ALLOC_SMEM        (1<<2)
272#define BO_ALLOC_SCANOUT     (1<<3)
273#define BO_ALLOC_NO_SUBALLOC (1<<4)
274
275/**
276 * Allocate a buffer object.
277 *
278 * Buffer objects are not necessarily initially mapped into CPU virtual
279 * address space or graphics device aperture.  They must be mapped
280 * using iris_bo_map() to be used by the CPU.
281 */
282struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr,
283                              const char *name,
284                              uint64_t size,
285                              uint32_t alignment,
286                              enum iris_memory_zone memzone,
287                              unsigned flags);
288
289struct iris_bo *
290iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
291                       void *ptr, size_t size,
292                       enum iris_memory_zone memzone);
293
294/** Takes a reference on a buffer object */
295static inline void
296iris_bo_reference(struct iris_bo *bo)
297{
298   p_atomic_inc(&bo->refcount);
299}
300
301/**
302 * Releases a reference on a buffer object, freeing the data if
303 * no references remain.
304 */
305void iris_bo_unreference(struct iris_bo *bo);
306
307#define MAP_READ          PIPE_MAP_READ
308#define MAP_WRITE         PIPE_MAP_WRITE
309#define MAP_ASYNC         PIPE_MAP_UNSYNCHRONIZED
310#define MAP_PERSISTENT    PIPE_MAP_PERSISTENT
311#define MAP_COHERENT      PIPE_MAP_COHERENT
312/* internal */
313#define MAP_RAW           (PIPE_MAP_DRV_PRV << 0)
314#define MAP_INTERNAL_MASK (MAP_RAW)
315
316#define MAP_FLAGS         (MAP_READ | MAP_WRITE | MAP_ASYNC | \
317                           MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
318
319/**
320 * Maps the buffer into userspace.
321 *
322 * This function will block waiting for any existing execution on the
323 * buffer to complete, first.  The resulting mapping is returned.
324 */
325MUST_CHECK void *iris_bo_map(struct pipe_debug_callback *dbg,
326                             struct iris_bo *bo, unsigned flags);
327
328/**
329 * Reduces the refcount on the userspace mapping of the buffer
330 * object.
331 */
332static inline int iris_bo_unmap(struct iris_bo *bo) { return 0; }
333
334/**
335 * Waits for rendering to an object by the GPU to have completed.
336 *
337 * This is not required for any access to the BO by bo_map,
338 * bo_subdata, etc.  It is merely a way for the driver to implement
339 * glFinish.
340 */
341void iris_bo_wait_rendering(struct iris_bo *bo);
342
343
344/**
345 * Unref a buffer manager instance.
346 */
347void iris_bufmgr_unref(struct iris_bufmgr *bufmgr);
348
349/**
350 * Create a visible name for a buffer which can be used by other apps
351 *
352 * \param buf Buffer to create a name for
353 * \param name Returned name
354 */
355int iris_bo_flink(struct iris_bo *bo, uint32_t *name);
356
357/**
358 * Returns true if the BO is backed by a real GEM object, false if it's
359 * a wrapper that's suballocated from a larger BO.
360 */
361static inline bool
362iris_bo_is_real(struct iris_bo *bo)
363{
364   return bo->gem_handle != 0;
365}
366
367/**
368 * Unwrap any slab-allocated wrapper BOs to get the BO for the underlying
369 * backing storage, which is a real BO associated with a GEM object.
370 */
371static inline struct iris_bo *
372iris_get_backing_bo(struct iris_bo *bo)
373{
374   if (!iris_bo_is_real(bo))
375      bo = bo->slab.real;
376
377   /* We only allow one level of wrapping. */
378   assert(iris_bo_is_real(bo));
379
380   return bo;
381}
382
383/**
384 * Is this buffer shared with external clients (imported or exported)?
385 */
386static inline bool
387iris_bo_is_external(const struct iris_bo *bo)
388{
389   bo = iris_get_backing_bo((struct iris_bo *) bo);
390   return bo->real.exported || bo->real.imported;
391}
392
393static inline bool
394iris_bo_is_imported(const struct iris_bo *bo)
395{
396   bo = iris_get_backing_bo((struct iris_bo *) bo);
397   return bo->real.imported;
398}
399
400static inline bool
401iris_bo_is_exported(const struct iris_bo *bo)
402{
403   bo = iris_get_backing_bo((struct iris_bo *) bo);
404   return bo->real.exported;
405}
406
407static inline enum iris_mmap_mode
408iris_bo_mmap_mode(const struct iris_bo *bo)
409{
410   bo = iris_get_backing_bo((struct iris_bo *) bo);
411   return bo->real.mmap_mode;
412}
413
414/**
415 * Mark a buffer as being shared with other external clients.
416 */
417void iris_bo_mark_exported(struct iris_bo *bo);
418
419/**
420 * Returns true  if mapping the buffer for write could cause the process
421 * to block, due to the object being active in the GPU.
422 */
423bool iris_bo_busy(struct iris_bo *bo);
424
425/**
426 * Specify the volatility of the buffer.
427 * \param bo Buffer to create a name for
428 * \param madv The purgeable status
429 *
430 * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
431 * reclaimed under memory pressure. If you subsequently require the buffer,
432 * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
433 *
434 * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
435 * marked as I915_MADV_DONTNEED.
436 */
437int iris_bo_madvise(struct iris_bo *bo, int madv);
438
439struct iris_bufmgr *iris_bufmgr_get_for_fd(struct intel_device_info *devinfo,
440                                           int fd, bool bo_reuse);
441int iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr);
442
443struct iris_bo *iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
444                                             const char *name,
445                                             unsigned handle);
446
447void* iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr);
448
449int iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns);
450
451uint32_t iris_create_hw_context(struct iris_bufmgr *bufmgr);
452uint32_t iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
453
454#define IRIS_CONTEXT_LOW_PRIORITY    ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
455#define IRIS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
456#define IRIS_CONTEXT_HIGH_PRIORITY   ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2)
457
458int iris_hw_context_set_priority(struct iris_bufmgr *bufmgr,
459                                 uint32_t ctx_id, int priority);
460
461void iris_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
462
463int iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling);
464int iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf);
465
466int iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd);
467struct iris_bo *iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd);
468
469/**
470 * Exports a bo as a GEM handle into a given DRM file descriptor
471 * \param bo Buffer to export
472 * \param drm_fd File descriptor where the new handle is created
473 * \param out_handle Pointer to store the new handle
474 *
475 * Returns 0 if the buffer was successfully exported, a non zero error code
476 * otherwise.
477 */
478int iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
479                                         uint32_t *out_handle);
480
481uint32_t iris_bo_export_gem_handle(struct iris_bo *bo);
482
483int iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *out);
484
485/**
486 * Returns the BO's address relative to the appropriate base address.
487 *
488 * All of our base addresses are programmed to the start of a 4GB region,
489 * so simply returning the bottom 32 bits of the BO address will give us
490 * the offset from whatever base address corresponds to that memory region.
491 */
492static inline uint32_t
493iris_bo_offset_from_base_address(struct iris_bo *bo)
494{
495   /* This only works for buffers in the memory zones corresponding to a
496    * base address - the top, unbounded memory zone doesn't have a base.
497    */
498   assert(bo->address < IRIS_MEMZONE_OTHER_START);
499   return bo->address;
500}
501
502/**
503 * Track access of a BO from the specified caching domain and sequence number.
504 *
505 * Can be used without locking.  Only the most recent access (i.e. highest
506 * seqno) is tracked.
507 */
508static inline void
509iris_bo_bump_seqno(struct iris_bo *bo, uint64_t seqno,
510                   enum iris_domain type)
511{
512   uint64_t *const last_seqno = &bo->last_seqnos[type];
513   uint64_t tmp, prev_seqno = p_atomic_read(last_seqno);
514
515   while (prev_seqno < seqno &&
516          prev_seqno != (tmp = p_atomic_cmpxchg(last_seqno, prev_seqno, seqno)))
517      prev_seqno = tmp;
518}
519
520enum iris_memory_zone iris_memzone_for_address(uint64_t address);
521
522int iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr);
523
524simple_mtx_t *iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr);
525
526#endif /* IRIS_BUFMGR_H */
527