17ec681f3Smrg/************************************************************************** 27ec681f3Smrg * 37ec681f3Smrg * Copyright 2018-2019 Alyssa Rosenzweig 47ec681f3Smrg * Copyright 2018-2019 Collabora, Ltd. 57ec681f3Smrg * Copyright © 2015 Intel Corporation 67ec681f3Smrg * All Rights Reserved. 77ec681f3Smrg * 87ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 97ec681f3Smrg * copy of this software and associated documentation files (the 107ec681f3Smrg * "Software"), to deal in the Software without restriction, including 117ec681f3Smrg * without limitation the rights to use, copy, modify, merge, publish, 127ec681f3Smrg * distribute, sub license, and/or sell copies of the Software, and to 137ec681f3Smrg * permit persons to whom the Software is furnished to do so, subject to 147ec681f3Smrg * the following conditions: 157ec681f3Smrg * 167ec681f3Smrg * The above copyright notice and this permission notice (including the 177ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions 187ec681f3Smrg * of the Software. 197ec681f3Smrg * 207ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 217ec681f3Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 227ec681f3Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 237ec681f3Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 247ec681f3Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 257ec681f3Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 267ec681f3Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 277ec681f3Smrg * 287ec681f3Smrg **************************************************************************/ 297ec681f3Smrg 307ec681f3Smrg#ifndef PAN_DEVICE_H 317ec681f3Smrg#define PAN_DEVICE_H 327ec681f3Smrg 337ec681f3Smrg#include <xf86drm.h> 347ec681f3Smrg#include "renderonly/renderonly.h" 357ec681f3Smrg#include "util/u_dynarray.h" 367ec681f3Smrg#include "util/bitset.h" 377ec681f3Smrg#include "util/list.h" 387ec681f3Smrg#include "util/sparse_array.h" 397ec681f3Smrg 407ec681f3Smrg#include "panfrost/util/pan_ir.h" 417ec681f3Smrg#include "pan_pool.h" 427ec681f3Smrg#include "pan_util.h" 437ec681f3Smrg 447ec681f3Smrg#include <genxml/gen_macros.h> 457ec681f3Smrg 467ec681f3Smrg#if defined(__cplusplus) 477ec681f3Smrgextern "C" { 487ec681f3Smrg#endif 497ec681f3Smrg 507ec681f3Smrg/* Driver limits */ 517ec681f3Smrg#define PAN_MAX_CONST_BUFFERS 16 527ec681f3Smrg 537ec681f3Smrg/* How many power-of-two levels in the BO cache do we want? 2^12 547ec681f3Smrg * minimum chosen as it is the page size that all allocations are 557ec681f3Smrg * rounded to */ 567ec681f3Smrg 577ec681f3Smrg#define MIN_BO_CACHE_BUCKET (12) /* 2^12 = 4KB */ 587ec681f3Smrg#define MAX_BO_CACHE_BUCKET (22) /* 2^22 = 4MB */ 597ec681f3Smrg 607ec681f3Smrg/* Fencepost problem, hence the off-by-one */ 617ec681f3Smrg#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1) 627ec681f3Smrg 637ec681f3Smrgstruct pan_blitter { 647ec681f3Smrg struct { 657ec681f3Smrg struct pan_pool *pool; 667ec681f3Smrg struct hash_table *blit; 677ec681f3Smrg struct hash_table *blend; 687ec681f3Smrg pthread_mutex_t lock; 697ec681f3Smrg } shaders; 707ec681f3Smrg struct { 717ec681f3Smrg struct pan_pool *pool; 727ec681f3Smrg struct hash_table *rsds; 737ec681f3Smrg pthread_mutex_t lock; 747ec681f3Smrg } rsds; 757ec681f3Smrg}; 767ec681f3Smrg 777ec681f3Smrgstruct pan_blend_shaders { 787ec681f3Smrg struct hash_table *shaders; 797ec681f3Smrg pthread_mutex_t lock; 807ec681f3Smrg}; 817ec681f3Smrg 827ec681f3Smrgenum pan_indirect_draw_flags { 837ec681f3Smrg PAN_INDIRECT_DRAW_NO_INDEX = 0 << 0, 847ec681f3Smrg PAN_INDIRECT_DRAW_1B_INDEX = 1 << 0, 857ec681f3Smrg PAN_INDIRECT_DRAW_2B_INDEX = 2 << 0, 867ec681f3Smrg PAN_INDIRECT_DRAW_4B_INDEX = 3 << 0, 877ec681f3Smrg PAN_INDIRECT_DRAW_INDEX_SIZE_MASK = 3 << 0, 887ec681f3Smrg PAN_INDIRECT_DRAW_HAS_PSIZ = 1 << 2, 897ec681f3Smrg PAN_INDIRECT_DRAW_PRIMITIVE_RESTART = 1 << 3, 907ec681f3Smrg PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE = 1 << 4, 917ec681f3Smrg PAN_INDIRECT_DRAW_LAST_FLAG = PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE, 927ec681f3Smrg PAN_INDIRECT_DRAW_FLAGS_MASK = (PAN_INDIRECT_DRAW_LAST_FLAG << 1) - 1, 937ec681f3Smrg PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX = PAN_INDIRECT_DRAW_LAST_FLAG << 1, 947ec681f3Smrg PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX, 957ec681f3Smrg PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_4B_INDEX, 967ec681f3Smrg PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX_PRIM_RESTART, 977ec681f3Smrg PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX_PRIM_RESTART, 987ec681f3Smrg PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_3B_INDEX_PRIM_RESTART, 997ec681f3Smrg PAN_INDIRECT_DRAW_NUM_SHADERS, 1007ec681f3Smrg}; 1017ec681f3Smrg 1027ec681f3Smrgstruct pan_indirect_draw_shader { 1037ec681f3Smrg struct panfrost_ubo_push push; 1047ec681f3Smrg mali_ptr rsd; 1057ec681f3Smrg}; 1067ec681f3Smrg 1077ec681f3Smrgstruct pan_indirect_draw_shaders { 1087ec681f3Smrg struct pan_indirect_draw_shader shaders[PAN_INDIRECT_DRAW_NUM_SHADERS]; 1097ec681f3Smrg 1107ec681f3Smrg /* Take the lock when initializing the draw shaders context or when 1117ec681f3Smrg * allocating from the binary pool. 1127ec681f3Smrg */ 1137ec681f3Smrg pthread_mutex_t lock; 1147ec681f3Smrg 1157ec681f3Smrg /* A memory pool for shader binaries. We currently don't allocate a 1167ec681f3Smrg * single BO for all shaders up-front because estimating shader size 1177ec681f3Smrg * is not trivial, and changes to the compiler might influence this 1187ec681f3Smrg * estimation. 1197ec681f3Smrg */ 1207ec681f3Smrg struct pan_pool *bin_pool; 1217ec681f3Smrg 1227ec681f3Smrg /* BO containing all renderer states attached to the compute shaders. 1237ec681f3Smrg * Those are built at shader compilation time and re-used every time 1247ec681f3Smrg * panfrost_emit_indirect_draw() is called. 1257ec681f3Smrg */ 1267ec681f3Smrg struct panfrost_bo *states; 1277ec681f3Smrg 1287ec681f3Smrg /* Varying memory is allocated dynamically by compute jobs from this 1297ec681f3Smrg * heap. 1307ec681f3Smrg */ 1317ec681f3Smrg struct panfrost_bo *varying_heap; 1327ec681f3Smrg}; 1337ec681f3Smrg 1347ec681f3Smrgstruct pan_indirect_dispatch { 1357ec681f3Smrg struct panfrost_ubo_push push; 1367ec681f3Smrg struct panfrost_bo *bin; 1377ec681f3Smrg struct panfrost_bo *descs; 1387ec681f3Smrg}; 1397ec681f3Smrg 1407ec681f3Smrg/** Implementation-defined tiler features */ 1417ec681f3Smrgstruct panfrost_tiler_features { 1427ec681f3Smrg /** Number of bytes per tiler bin */ 1437ec681f3Smrg unsigned bin_size; 1447ec681f3Smrg 1457ec681f3Smrg /** Maximum number of levels that may be simultaneously enabled. 1467ec681f3Smrg * Invariant: bitcount(hierarchy_mask) <= max_levels */ 1477ec681f3Smrg unsigned max_levels; 1487ec681f3Smrg}; 1497ec681f3Smrg 1507ec681f3Smrgstruct panfrost_device { 1517ec681f3Smrg /* For ralloc */ 1527ec681f3Smrg void *memctx; 1537ec681f3Smrg 1547ec681f3Smrg int fd; 1557ec681f3Smrg 1567ec681f3Smrg /* Properties of the GPU in use */ 1577ec681f3Smrg unsigned arch; 1587ec681f3Smrg unsigned gpu_id; 1597ec681f3Smrg unsigned core_count; 1607ec681f3Smrg unsigned thread_tls_alloc; 1617ec681f3Smrg struct panfrost_tiler_features tiler_features; 1627ec681f3Smrg unsigned quirks; 1637ec681f3Smrg bool has_afbc; 1647ec681f3Smrg 1657ec681f3Smrg /* Table of formats, indexed by a PIPE format */ 1667ec681f3Smrg const struct panfrost_format *formats; 1677ec681f3Smrg 1687ec681f3Smrg /* Bitmask of supported compressed texture formats */ 1697ec681f3Smrg uint32_t compressed_formats; 1707ec681f3Smrg 1717ec681f3Smrg /* debug flags, see pan_util.h how to interpret */ 1727ec681f3Smrg unsigned debug; 1737ec681f3Smrg 1747ec681f3Smrg drmVersionPtr kernel_version; 1757ec681f3Smrg 1767ec681f3Smrg struct renderonly *ro; 1777ec681f3Smrg 1787ec681f3Smrg pthread_mutex_t bo_map_lock; 1797ec681f3Smrg struct util_sparse_array bo_map; 1807ec681f3Smrg 1817ec681f3Smrg struct { 1827ec681f3Smrg pthread_mutex_t lock; 1837ec681f3Smrg 1847ec681f3Smrg /* List containing all cached BOs sorted in LRU (Least 1857ec681f3Smrg * Recently Used) order. This allows us to quickly evict BOs 1867ec681f3Smrg * that are more than 1 second old. 1877ec681f3Smrg */ 1887ec681f3Smrg struct list_head lru; 1897ec681f3Smrg 1907ec681f3Smrg /* The BO cache is a set of buckets with power-of-two sizes 1917ec681f3Smrg * ranging from 2^12 (4096, the page size) to 1927ec681f3Smrg * 2^(12 + MAX_BO_CACHE_BUCKETS). 1937ec681f3Smrg * Each bucket is a linked list of free panfrost_bo objects. */ 1947ec681f3Smrg 1957ec681f3Smrg struct list_head buckets[NR_BO_CACHE_BUCKETS]; 1967ec681f3Smrg } bo_cache; 1977ec681f3Smrg 1987ec681f3Smrg struct pan_blitter blitter; 1997ec681f3Smrg struct pan_blend_shaders blend_shaders; 2007ec681f3Smrg struct pan_indirect_draw_shaders indirect_draw_shaders; 2017ec681f3Smrg struct pan_indirect_dispatch indirect_dispatch; 2027ec681f3Smrg 2037ec681f3Smrg /* Tiler heap shared across all tiler jobs, allocated against the 2047ec681f3Smrg * device since there's only a single tiler. Since this is invisible to 2057ec681f3Smrg * the CPU, it's okay for multiple contexts to reference it 2067ec681f3Smrg * simultaneously; by keeping on the device struct, we eliminate a 2077ec681f3Smrg * costly per-context allocation. */ 2087ec681f3Smrg 2097ec681f3Smrg struct panfrost_bo *tiler_heap; 2107ec681f3Smrg 2117ec681f3Smrg /* The tiler heap is shared by all contexts, and is written by tiler 2127ec681f3Smrg * jobs and read by fragment job. We need to ensure that a 2137ec681f3Smrg * vertex/tiler job chain from one context is not inserted between 2147ec681f3Smrg * the vertex/tiler and fragment job of another context, otherwise 2157ec681f3Smrg * we end up with tiler heap corruption. 2167ec681f3Smrg */ 2177ec681f3Smrg pthread_mutex_t submit_lock; 2187ec681f3Smrg 2197ec681f3Smrg /* Sample positions are preloaded into a write-once constant buffer, 2207ec681f3Smrg * such that they can be referenced fore free later. Needed 2217ec681f3Smrg * unconditionally on Bifrost, and useful for sharing with Midgard */ 2227ec681f3Smrg 2237ec681f3Smrg struct panfrost_bo *sample_positions; 2247ec681f3Smrg}; 2257ec681f3Smrg 2267ec681f3Smrgvoid 2277ec681f3Smrgpanfrost_open_device(void *memctx, int fd, struct panfrost_device *dev); 2287ec681f3Smrg 2297ec681f3Smrgvoid 2307ec681f3Smrgpanfrost_close_device(struct panfrost_device *dev); 2317ec681f3Smrg 2327ec681f3Smrgbool 2337ec681f3Smrgpanfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt); 2347ec681f3Smrg 2357ec681f3Smrgvoid 2367ec681f3Smrgpanfrost_upload_sample_positions(struct panfrost_device *dev); 2377ec681f3Smrg 2387ec681f3Smrgmali_ptr 2397ec681f3Smrgpanfrost_sample_positions(const struct panfrost_device *dev, 2407ec681f3Smrg enum mali_sample_pattern pattern); 2417ec681f3Smrgvoid 2427ec681f3Smrgpanfrost_query_sample_position( 2437ec681f3Smrg enum mali_sample_pattern pattern, 2447ec681f3Smrg unsigned sample_idx, 2457ec681f3Smrg float *out); 2467ec681f3Smrg 2477ec681f3Smrgstatic inline struct panfrost_bo * 2487ec681f3Smrgpan_lookup_bo(struct panfrost_device *dev, uint32_t gem_handle) 2497ec681f3Smrg{ 2507ec681f3Smrg return (struct panfrost_bo *)util_sparse_array_get(&dev->bo_map, gem_handle); 2517ec681f3Smrg} 2527ec681f3Smrg 2537ec681f3Smrgstatic inline bool 2547ec681f3Smrgpan_is_bifrost(const struct panfrost_device *dev) 2557ec681f3Smrg{ 2567ec681f3Smrg return dev->arch >= 6 && dev->arch <= 7; 2577ec681f3Smrg} 2587ec681f3Smrg 2597ec681f3Smrg#if defined(__cplusplus) 2607ec681f3Smrg} // extern "C" 2617ec681f3Smrg#endif 2627ec681f3Smrg 2637ec681f3Smrg#endif 264