17ec681f3Smrg/**************************************************************************
27ec681f3Smrg *
37ec681f3Smrg * Copyright 2018-2019 Alyssa Rosenzweig
47ec681f3Smrg * Copyright 2018-2019 Collabora, Ltd.
57ec681f3Smrg * Copyright © 2015 Intel Corporation
67ec681f3Smrg * All Rights Reserved.
77ec681f3Smrg *
87ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
97ec681f3Smrg * copy of this software and associated documentation files (the
107ec681f3Smrg * "Software"), to deal in the Software without restriction, including
117ec681f3Smrg * without limitation the rights to use, copy, modify, merge, publish,
127ec681f3Smrg * distribute, sub license, and/or sell copies of the Software, and to
137ec681f3Smrg * permit persons to whom the Software is furnished to do so, subject to
147ec681f3Smrg * the following conditions:
157ec681f3Smrg *
167ec681f3Smrg * The above copyright notice and this permission notice (including the
177ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions
187ec681f3Smrg * of the Software.
197ec681f3Smrg *
207ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
217ec681f3Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
227ec681f3Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
237ec681f3Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
247ec681f3Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
257ec681f3Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
267ec681f3Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
277ec681f3Smrg *
287ec681f3Smrg **************************************************************************/
297ec681f3Smrg
307ec681f3Smrg#ifndef PAN_DEVICE_H
317ec681f3Smrg#define PAN_DEVICE_H
327ec681f3Smrg
337ec681f3Smrg#include <xf86drm.h>
347ec681f3Smrg#include "renderonly/renderonly.h"
357ec681f3Smrg#include "util/u_dynarray.h"
367ec681f3Smrg#include "util/bitset.h"
377ec681f3Smrg#include "util/list.h"
387ec681f3Smrg#include "util/sparse_array.h"
397ec681f3Smrg
407ec681f3Smrg#include "panfrost/util/pan_ir.h"
417ec681f3Smrg#include "pan_pool.h"
427ec681f3Smrg#include "pan_util.h"
437ec681f3Smrg
447ec681f3Smrg#include <genxml/gen_macros.h>
457ec681f3Smrg
467ec681f3Smrg#if defined(__cplusplus)
477ec681f3Smrgextern "C" {
487ec681f3Smrg#endif
497ec681f3Smrg
507ec681f3Smrg/* Driver limits */
517ec681f3Smrg#define PAN_MAX_CONST_BUFFERS 16
527ec681f3Smrg
537ec681f3Smrg/* How many power-of-two levels in the BO cache do we want? 2^12
547ec681f3Smrg * minimum chosen as it is the page size that all allocations are
557ec681f3Smrg * rounded to */
567ec681f3Smrg
577ec681f3Smrg#define MIN_BO_CACHE_BUCKET (12) /* 2^12 = 4KB */
587ec681f3Smrg#define MAX_BO_CACHE_BUCKET (22) /* 2^22 = 4MB */
597ec681f3Smrg
607ec681f3Smrg/* Fencepost problem, hence the off-by-one */
617ec681f3Smrg#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
627ec681f3Smrg
637ec681f3Smrgstruct pan_blitter {
647ec681f3Smrg        struct {
657ec681f3Smrg                struct pan_pool *pool;
667ec681f3Smrg                struct hash_table *blit;
677ec681f3Smrg                struct hash_table *blend;
687ec681f3Smrg                pthread_mutex_t lock;
697ec681f3Smrg        } shaders;
707ec681f3Smrg        struct {
717ec681f3Smrg                struct pan_pool *pool;
727ec681f3Smrg                struct hash_table *rsds;
737ec681f3Smrg                pthread_mutex_t lock;
747ec681f3Smrg        } rsds;
757ec681f3Smrg};
767ec681f3Smrg
777ec681f3Smrgstruct pan_blend_shaders {
787ec681f3Smrg        struct hash_table *shaders;
797ec681f3Smrg        pthread_mutex_t lock;
807ec681f3Smrg};
817ec681f3Smrg
827ec681f3Smrgenum pan_indirect_draw_flags {
837ec681f3Smrg        PAN_INDIRECT_DRAW_NO_INDEX = 0 << 0,
847ec681f3Smrg        PAN_INDIRECT_DRAW_1B_INDEX = 1 << 0,
857ec681f3Smrg        PAN_INDIRECT_DRAW_2B_INDEX = 2 << 0,
867ec681f3Smrg        PAN_INDIRECT_DRAW_4B_INDEX = 3 << 0,
877ec681f3Smrg        PAN_INDIRECT_DRAW_INDEX_SIZE_MASK = 3 << 0,
887ec681f3Smrg        PAN_INDIRECT_DRAW_HAS_PSIZ = 1 << 2,
897ec681f3Smrg        PAN_INDIRECT_DRAW_PRIMITIVE_RESTART = 1 << 3,
907ec681f3Smrg        PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE = 1 << 4,
917ec681f3Smrg        PAN_INDIRECT_DRAW_LAST_FLAG = PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE,
927ec681f3Smrg        PAN_INDIRECT_DRAW_FLAGS_MASK = (PAN_INDIRECT_DRAW_LAST_FLAG << 1) - 1,
937ec681f3Smrg        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX = PAN_INDIRECT_DRAW_LAST_FLAG << 1,
947ec681f3Smrg        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX,
957ec681f3Smrg        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_4B_INDEX,
967ec681f3Smrg        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX_PRIM_RESTART,
977ec681f3Smrg        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX_PRIM_RESTART,
987ec681f3Smrg        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_3B_INDEX_PRIM_RESTART,
997ec681f3Smrg        PAN_INDIRECT_DRAW_NUM_SHADERS,
1007ec681f3Smrg};
1017ec681f3Smrg
1027ec681f3Smrgstruct pan_indirect_draw_shader {
1037ec681f3Smrg        struct panfrost_ubo_push push;
1047ec681f3Smrg        mali_ptr rsd;
1057ec681f3Smrg};
1067ec681f3Smrg
1077ec681f3Smrgstruct pan_indirect_draw_shaders {
1087ec681f3Smrg        struct pan_indirect_draw_shader shaders[PAN_INDIRECT_DRAW_NUM_SHADERS];
1097ec681f3Smrg
1107ec681f3Smrg        /* Take the lock when initializing the draw shaders context or when
1117ec681f3Smrg         * allocating from the binary pool.
1127ec681f3Smrg         */
1137ec681f3Smrg        pthread_mutex_t lock;
1147ec681f3Smrg
1157ec681f3Smrg        /* A memory pool for shader binaries. We currently don't allocate a
1167ec681f3Smrg         * single BO for all shaders up-front because estimating shader size
1177ec681f3Smrg         * is not trivial, and changes to the compiler might influence this
1187ec681f3Smrg         * estimation.
1197ec681f3Smrg         */
1207ec681f3Smrg        struct pan_pool *bin_pool;
1217ec681f3Smrg
1227ec681f3Smrg        /* BO containing all renderer states attached to the compute shaders.
1237ec681f3Smrg         * Those are built at shader compilation time and re-used every time
1247ec681f3Smrg         * panfrost_emit_indirect_draw() is called.
1257ec681f3Smrg         */
1267ec681f3Smrg        struct panfrost_bo *states;
1277ec681f3Smrg
1287ec681f3Smrg        /* Varying memory is allocated dynamically by compute jobs from this
1297ec681f3Smrg         * heap.
1307ec681f3Smrg         */
1317ec681f3Smrg        struct panfrost_bo *varying_heap;
1327ec681f3Smrg};
1337ec681f3Smrg
1347ec681f3Smrgstruct pan_indirect_dispatch {
1357ec681f3Smrg        struct panfrost_ubo_push push;
1367ec681f3Smrg        struct panfrost_bo *bin;
1377ec681f3Smrg        struct panfrost_bo *descs;
1387ec681f3Smrg};
1397ec681f3Smrg
1407ec681f3Smrg/** Implementation-defined tiler features */
1417ec681f3Smrgstruct panfrost_tiler_features {
1427ec681f3Smrg        /** Number of bytes per tiler bin */
1437ec681f3Smrg        unsigned bin_size;
1447ec681f3Smrg
1457ec681f3Smrg        /** Maximum number of levels that may be simultaneously enabled.
1467ec681f3Smrg         * Invariant: bitcount(hierarchy_mask) <= max_levels */
1477ec681f3Smrg        unsigned max_levels;
1487ec681f3Smrg};
1497ec681f3Smrg
1507ec681f3Smrgstruct panfrost_device {
1517ec681f3Smrg        /* For ralloc */
1527ec681f3Smrg        void *memctx;
1537ec681f3Smrg
1547ec681f3Smrg        int fd;
1557ec681f3Smrg
1567ec681f3Smrg        /* Properties of the GPU in use */
1577ec681f3Smrg        unsigned arch;
1587ec681f3Smrg        unsigned gpu_id;
1597ec681f3Smrg        unsigned core_count;
1607ec681f3Smrg        unsigned thread_tls_alloc;
1617ec681f3Smrg        struct panfrost_tiler_features tiler_features;
1627ec681f3Smrg        unsigned quirks;
1637ec681f3Smrg        bool has_afbc;
1647ec681f3Smrg
1657ec681f3Smrg        /* Table of formats, indexed by a PIPE format */
1667ec681f3Smrg        const struct panfrost_format *formats;
1677ec681f3Smrg
1687ec681f3Smrg        /* Bitmask of supported compressed texture formats */
1697ec681f3Smrg        uint32_t compressed_formats;
1707ec681f3Smrg
1717ec681f3Smrg        /* debug flags, see pan_util.h how to interpret */
1727ec681f3Smrg        unsigned debug;
1737ec681f3Smrg
1747ec681f3Smrg        drmVersionPtr kernel_version;
1757ec681f3Smrg
1767ec681f3Smrg        struct renderonly *ro;
1777ec681f3Smrg
1787ec681f3Smrg        pthread_mutex_t bo_map_lock;
1797ec681f3Smrg        struct util_sparse_array bo_map;
1807ec681f3Smrg
1817ec681f3Smrg        struct {
1827ec681f3Smrg                pthread_mutex_t lock;
1837ec681f3Smrg
1847ec681f3Smrg                /* List containing all cached BOs sorted in LRU (Least
1857ec681f3Smrg                 * Recently Used) order. This allows us to quickly evict BOs
1867ec681f3Smrg                 * that are more than 1 second old.
1877ec681f3Smrg                 */
1887ec681f3Smrg                struct list_head lru;
1897ec681f3Smrg
1907ec681f3Smrg                /* The BO cache is a set of buckets with power-of-two sizes
1917ec681f3Smrg                 * ranging from 2^12 (4096, the page size) to
1927ec681f3Smrg                 * 2^(12 + MAX_BO_CACHE_BUCKETS).
1937ec681f3Smrg                 * Each bucket is a linked list of free panfrost_bo objects. */
1947ec681f3Smrg
1957ec681f3Smrg                struct list_head buckets[NR_BO_CACHE_BUCKETS];
1967ec681f3Smrg        } bo_cache;
1977ec681f3Smrg
1987ec681f3Smrg        struct pan_blitter blitter;
1997ec681f3Smrg        struct pan_blend_shaders blend_shaders;
2007ec681f3Smrg        struct pan_indirect_draw_shaders indirect_draw_shaders;
2017ec681f3Smrg        struct pan_indirect_dispatch indirect_dispatch;
2027ec681f3Smrg
2037ec681f3Smrg        /* Tiler heap shared across all tiler jobs, allocated against the
2047ec681f3Smrg         * device since there's only a single tiler. Since this is invisible to
2057ec681f3Smrg         * the CPU, it's okay for multiple contexts to reference it
2067ec681f3Smrg         * simultaneously; by keeping on the device struct, we eliminate a
2077ec681f3Smrg         * costly per-context allocation. */
2087ec681f3Smrg
2097ec681f3Smrg        struct panfrost_bo *tiler_heap;
2107ec681f3Smrg
2117ec681f3Smrg        /* The tiler heap is shared by all contexts, and is written by tiler
2127ec681f3Smrg         * jobs and read by fragment job. We need to ensure that a
2137ec681f3Smrg         * vertex/tiler job chain from one context is not inserted between
2147ec681f3Smrg         * the vertex/tiler and fragment job of another context, otherwise
2157ec681f3Smrg         * we end up with tiler heap corruption.
2167ec681f3Smrg         */
2177ec681f3Smrg        pthread_mutex_t submit_lock;
2187ec681f3Smrg
2197ec681f3Smrg        /* Sample positions are preloaded into a write-once constant buffer,
2207ec681f3Smrg         * such that they can be referenced fore free later. Needed
2217ec681f3Smrg         * unconditionally on Bifrost, and useful for sharing with Midgard */
2227ec681f3Smrg
2237ec681f3Smrg        struct panfrost_bo *sample_positions;
2247ec681f3Smrg};
2257ec681f3Smrg
2267ec681f3Smrgvoid
2277ec681f3Smrgpanfrost_open_device(void *memctx, int fd, struct panfrost_device *dev);
2287ec681f3Smrg
2297ec681f3Smrgvoid
2307ec681f3Smrgpanfrost_close_device(struct panfrost_device *dev);
2317ec681f3Smrg
2327ec681f3Smrgbool
2337ec681f3Smrgpanfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt);
2347ec681f3Smrg
2357ec681f3Smrgvoid
2367ec681f3Smrgpanfrost_upload_sample_positions(struct panfrost_device *dev);
2377ec681f3Smrg
2387ec681f3Smrgmali_ptr
2397ec681f3Smrgpanfrost_sample_positions(const struct panfrost_device *dev,
2407ec681f3Smrg                enum mali_sample_pattern pattern);
2417ec681f3Smrgvoid
2427ec681f3Smrgpanfrost_query_sample_position(
2437ec681f3Smrg                enum mali_sample_pattern pattern,
2447ec681f3Smrg                unsigned sample_idx,
2457ec681f3Smrg                float *out);
2467ec681f3Smrg
2477ec681f3Smrgstatic inline struct panfrost_bo *
2487ec681f3Smrgpan_lookup_bo(struct panfrost_device *dev, uint32_t gem_handle)
2497ec681f3Smrg{
2507ec681f3Smrg        return (struct panfrost_bo *)util_sparse_array_get(&dev->bo_map, gem_handle);
2517ec681f3Smrg}
2527ec681f3Smrg
2537ec681f3Smrgstatic inline bool
2547ec681f3Smrgpan_is_bifrost(const struct panfrost_device *dev)
2557ec681f3Smrg{
2567ec681f3Smrg        return dev->arch >= 6 && dev->arch <= 7;
2577ec681f3Smrg}
2587ec681f3Smrg
2597ec681f3Smrg#if defined(__cplusplus)
2607ec681f3Smrg} // extern "C"
2617ec681f3Smrg#endif
2627ec681f3Smrg
2637ec681f3Smrg#endif
264