17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd.
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg * Authors:
247ec681f3Smrg *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
257ec681f3Smrg */
267ec681f3Smrg
277ec681f3Smrg#include <xf86drm.h>
287ec681f3Smrg
297ec681f3Smrg#include "util/u_math.h"
307ec681f3Smrg#include "util/macros.h"
317ec681f3Smrg#include "util/hash_table.h"
327ec681f3Smrg#include "util/u_thread.h"
337ec681f3Smrg#include "drm-uapi/panfrost_drm.h"
347ec681f3Smrg#include "pan_encoder.h"
357ec681f3Smrg#include "pan_device.h"
367ec681f3Smrg#include "panfrost-quirks.h"
377ec681f3Smrg#include "pan_bo.h"
387ec681f3Smrg#include "pan_texture.h"
397ec681f3Smrg#include "wrap.h"
407ec681f3Smrg#include "pan_util.h"
417ec681f3Smrg
427ec681f3Smrg/* Abstraction over the raw drm_panfrost_get_param ioctl for fetching
437ec681f3Smrg * information about devices */
447ec681f3Smrg
457ec681f3Smrgstatic __u64
467ec681f3Smrgpanfrost_query_raw(
477ec681f3Smrg                int fd,
487ec681f3Smrg                enum drm_panfrost_param param,
497ec681f3Smrg                bool required,
507ec681f3Smrg                unsigned default_value)
517ec681f3Smrg{
527ec681f3Smrg        struct drm_panfrost_get_param get_param = {0,};
537ec681f3Smrg        ASSERTED int ret;
547ec681f3Smrg
557ec681f3Smrg        get_param.param = param;
567ec681f3Smrg        ret = drmIoctl(fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param);
577ec681f3Smrg
587ec681f3Smrg        if (ret) {
597ec681f3Smrg                assert(!required);
607ec681f3Smrg                return default_value;
617ec681f3Smrg        }
627ec681f3Smrg
637ec681f3Smrg        return get_param.value;
647ec681f3Smrg}
657ec681f3Smrg
667ec681f3Smrgstatic unsigned
677ec681f3Smrgpanfrost_query_gpu_version(int fd)
687ec681f3Smrg{
697ec681f3Smrg#ifndef NDEBUG
707ec681f3Smrg        /* In debug builds, allow overriding the GPU ID, for example to run
717ec681f3Smrg         * Bifrost shader-db on a Midgard machine. This is a bit less heavy
727ec681f3Smrg         * handed than setting up the entirety of drm-shim */
737ec681f3Smrg        char *override_version = getenv("PAN_GPU_ID");
747ec681f3Smrg
757ec681f3Smrg        if (override_version)
767ec681f3Smrg                return strtol(override_version, NULL, 16);
777ec681f3Smrg#endif
787ec681f3Smrg
797ec681f3Smrg        return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0);
807ec681f3Smrg}
817ec681f3Smrg
827ec681f3Smrgstatic unsigned
837ec681f3Smrgpanfrost_query_gpu_revision(int fd)
847ec681f3Smrg{
857ec681f3Smrg        return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_REVISION, true, 0);
867ec681f3Smrg}
877ec681f3Smrg
887ec681f3Smrgstatic struct panfrost_tiler_features
897ec681f3Smrgpanfrost_query_tiler_features(int fd)
907ec681f3Smrg{
917ec681f3Smrg        /* Default value (2^9 bytes and 8 levels) to match old behaviour */
927ec681f3Smrg        uint32_t raw = panfrost_query_raw(fd, DRM_PANFROST_PARAM_TILER_FEATURES,
937ec681f3Smrg                        false, 0x809);
947ec681f3Smrg
957ec681f3Smrg        /* Bin size is log2 in the first byte, max levels in the second byte */
967ec681f3Smrg        return (struct panfrost_tiler_features) {
977ec681f3Smrg                .bin_size = (1 << (raw & BITFIELD_MASK(5))),
987ec681f3Smrg                .max_levels = (raw >> 8) & BITFIELD_MASK(4)
997ec681f3Smrg        };
1007ec681f3Smrg}
1017ec681f3Smrg
1027ec681f3Smrgstatic unsigned
1037ec681f3Smrgpanfrost_query_core_count(int fd)
1047ec681f3Smrg{
1057ec681f3Smrg        /* On older kernels, worst-case to 16 cores */
1067ec681f3Smrg
1077ec681f3Smrg        unsigned mask = panfrost_query_raw(fd,
1087ec681f3Smrg                        DRM_PANFROST_PARAM_SHADER_PRESENT, false, 0xffff);
1097ec681f3Smrg
1107ec681f3Smrg        /* Some cores might be absent. For TLS computation purposes, we care
1117ec681f3Smrg         * about the greatest ID + 1, which equals the core count if all cores
1127ec681f3Smrg         * are present, but allocates space for absent cores if needed.
1137ec681f3Smrg         * util_last_bit is defined to return the greatest bit set + 1, which
1147ec681f3Smrg         * is exactly what we need. */
1157ec681f3Smrg
1167ec681f3Smrg        return util_last_bit(mask);
1177ec681f3Smrg}
1187ec681f3Smrg
1197ec681f3Smrg/* Architectural maximums, since this register may be not implemented
1207ec681f3Smrg * by a given chip. G31 is actually 512 instead of 768 but it doesn't
1217ec681f3Smrg * really matter. */
1227ec681f3Smrg
1237ec681f3Smrgstatic unsigned
1247ec681f3Smrgpanfrost_max_thread_count(unsigned arch)
1257ec681f3Smrg{
1267ec681f3Smrg        switch (arch) {
1277ec681f3Smrg        /* Midgard */
1287ec681f3Smrg        case 4:
1297ec681f3Smrg        case 5:
1307ec681f3Smrg                return 256;
1317ec681f3Smrg
1327ec681f3Smrg        /* Bifrost, first generation */
1337ec681f3Smrg        case 6:
1347ec681f3Smrg                return 384;
1357ec681f3Smrg
1367ec681f3Smrg        /* Bifrost, second generation (G31 is 512 but it doesn't matter) */
1377ec681f3Smrg        case 7:
1387ec681f3Smrg                return 768;
1397ec681f3Smrg
1407ec681f3Smrg        /* Valhall (for completeness) */
1417ec681f3Smrg        default:
1427ec681f3Smrg                return 1024;
1437ec681f3Smrg        }
1447ec681f3Smrg}
1457ec681f3Smrg
1467ec681f3Smrgstatic unsigned
1477ec681f3Smrgpanfrost_query_thread_tls_alloc(int fd, unsigned major)
1487ec681f3Smrg{
1497ec681f3Smrg        unsigned tls = panfrost_query_raw(fd,
1507ec681f3Smrg                        DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, false, 0);
1517ec681f3Smrg
1527ec681f3Smrg        return (tls > 0) ? tls : panfrost_max_thread_count(major);
1537ec681f3Smrg}
1547ec681f3Smrg
1557ec681f3Smrgstatic uint32_t
1567ec681f3Smrgpanfrost_query_compressed_formats(int fd)
1577ec681f3Smrg{
1587ec681f3Smrg        /* If unspecified, assume ASTC/ETC only. Factory default for Juno, and
1597ec681f3Smrg         * should exist on any Mali configuration. All hardware should report
1607ec681f3Smrg         * these texture formats but the kernel might not be new enough. */
1617ec681f3Smrg
1627ec681f3Smrg        uint32_t default_set =
1637ec681f3Smrg                (1 << MALI_ETC2_RGB8) |
1647ec681f3Smrg                (1 << MALI_ETC2_R11_UNORM) |
1657ec681f3Smrg                (1 << MALI_ETC2_RGBA8) |
1667ec681f3Smrg                (1 << MALI_ETC2_RG11_UNORM) |
1677ec681f3Smrg                (1 << MALI_ETC2_R11_SNORM) |
1687ec681f3Smrg                (1 << MALI_ETC2_RG11_SNORM) |
1697ec681f3Smrg                (1 << MALI_ETC2_RGB8A1) |
1707ec681f3Smrg                (1 << MALI_ASTC_3D_LDR) |
1717ec681f3Smrg                (1 << MALI_ASTC_3D_HDR) |
1727ec681f3Smrg                (1 << MALI_ASTC_2D_LDR) |
1737ec681f3Smrg                (1 << MALI_ASTC_2D_HDR);
1747ec681f3Smrg
1757ec681f3Smrg        return panfrost_query_raw(fd, DRM_PANFROST_PARAM_TEXTURE_FEATURES0,
1767ec681f3Smrg                        false, default_set);
1777ec681f3Smrg}
1787ec681f3Smrg
1797ec681f3Smrg/* DRM_PANFROST_PARAM_TEXTURE_FEATURES0 will return a bitmask of supported
1807ec681f3Smrg * compressed formats, so we offer a helper to test if a format is supported */
1817ec681f3Smrg
1827ec681f3Smrgbool
1837ec681f3Smrgpanfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt)
1847ec681f3Smrg{
1857ec681f3Smrg        if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED)
1867ec681f3Smrg                return true;
1877ec681f3Smrg
1887ec681f3Smrg        unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED;
1897ec681f3Smrg        assert(idx < 32);
1907ec681f3Smrg
1917ec681f3Smrg        return dev->compressed_formats & (1 << idx);
1927ec681f3Smrg}
1937ec681f3Smrg
1947ec681f3Smrg/* Given a GPU ID like 0x860, return a prettified model name */
1957ec681f3Smrg
1967ec681f3Smrgconst char *
1977ec681f3Smrgpanfrost_model_name(unsigned gpu_id)
1987ec681f3Smrg{
1997ec681f3Smrg        switch (gpu_id) {
2007ec681f3Smrg        case 0x600: return "Mali-T600 (Panfrost)";
2017ec681f3Smrg        case 0x620: return "Mali-T620 (Panfrost)";
2027ec681f3Smrg        case 0x720: return "Mali-T720 (Panfrost)";
2037ec681f3Smrg        case 0x820: return "Mali-T820 (Panfrost)";
2047ec681f3Smrg        case 0x830: return "Mali-T830 (Panfrost)";
2057ec681f3Smrg        case 0x750: return "Mali-T760 (Panfrost)";
2067ec681f3Smrg        case 0x860: return "Mali-T860 (Panfrost)";
2077ec681f3Smrg        case 0x880: return "Mali-T880 (Panfrost)";
2087ec681f3Smrg        case 0x6221: return "Mali-G72 (Panfrost)";
2097ec681f3Smrg        case 0x7093: return "Mali-G31 (Panfrost)";
2107ec681f3Smrg        case 0x7212: return "Mali-G52 (Panfrost)";
2117ec681f3Smrg        case 0x7402: return "Mali-G52 r1 (Panfrost)";
2127ec681f3Smrg        default:
2137ec681f3Smrg                    unreachable("Invalid GPU ID");
2147ec681f3Smrg        }
2157ec681f3Smrg}
2167ec681f3Smrg
2177ec681f3Smrg/* Check for AFBC hardware support. AFBC is introduced in v5. Implementations
2187ec681f3Smrg * may omit it, signaled as a nonzero value in the AFBC_FEATURES property. */
2197ec681f3Smrg
2207ec681f3Smrgstatic bool
2217ec681f3Smrgpanfrost_query_afbc(int fd, unsigned arch)
2227ec681f3Smrg{
2237ec681f3Smrg        unsigned reg = panfrost_query_raw(fd,
2247ec681f3Smrg                                          DRM_PANFROST_PARAM_AFBC_FEATURES,
2257ec681f3Smrg                                          false, 0);
2267ec681f3Smrg
2277ec681f3Smrg        return (arch >= 5) && (reg == 0);
2287ec681f3Smrg}
2297ec681f3Smrg
2307ec681f3Smrgvoid
2317ec681f3Smrgpanfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
2327ec681f3Smrg{
2337ec681f3Smrg        dev->fd = fd;
2347ec681f3Smrg        dev->memctx = memctx;
2357ec681f3Smrg        dev->gpu_id = panfrost_query_gpu_version(fd);
2367ec681f3Smrg        dev->arch = pan_arch(dev->gpu_id);
2377ec681f3Smrg        dev->core_count = panfrost_query_core_count(fd);
2387ec681f3Smrg        dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(fd, dev->arch);
2397ec681f3Smrg        dev->kernel_version = drmGetVersion(fd);
2407ec681f3Smrg        unsigned revision = panfrost_query_gpu_revision(fd);
2417ec681f3Smrg        dev->quirks = panfrost_get_quirks(dev->gpu_id, revision);
2427ec681f3Smrg        dev->compressed_formats = panfrost_query_compressed_formats(fd);
2437ec681f3Smrg        dev->tiler_features = panfrost_query_tiler_features(fd);
2447ec681f3Smrg        dev->has_afbc = panfrost_query_afbc(fd, dev->arch);
2457ec681f3Smrg
2467ec681f3Smrg        if (dev->quirks & HAS_SWIZZLES)
2477ec681f3Smrg                dev->formats = panfrost_pipe_format_v6;
2487ec681f3Smrg        else
2497ec681f3Smrg                dev->formats = panfrost_pipe_format_v7;
2507ec681f3Smrg
2517ec681f3Smrg        util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512);
2527ec681f3Smrg
2537ec681f3Smrg        pthread_mutex_init(&dev->bo_cache.lock, NULL);
2547ec681f3Smrg        list_inithead(&dev->bo_cache.lru);
2557ec681f3Smrg
2567ec681f3Smrg        for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
2577ec681f3Smrg                list_inithead(&dev->bo_cache.buckets[i]);
2587ec681f3Smrg
2597ec681f3Smrg        /* Initialize pandecode before we start allocating */
2607ec681f3Smrg        if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
2617ec681f3Smrg                pandecode_initialize(!(dev->debug & PAN_DBG_TRACE));
2627ec681f3Smrg
2637ec681f3Smrg        /* Tiler heap is internally required by the tiler, which can only be
2647ec681f3Smrg         * active for a single job chain at once, so a single heap can be
2657ec681f3Smrg         * shared across batches/contextes */
2667ec681f3Smrg
2677ec681f3Smrg        dev->tiler_heap = panfrost_bo_create(dev, 64 * 1024 * 1024,
2687ec681f3Smrg                        PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap");
2697ec681f3Smrg
2707ec681f3Smrg        pthread_mutex_init(&dev->submit_lock, NULL);
2717ec681f3Smrg
2727ec681f3Smrg        /* Done once on init */
2737ec681f3Smrg        panfrost_upload_sample_positions(dev);
2747ec681f3Smrg}
2757ec681f3Smrg
2767ec681f3Smrgvoid
2777ec681f3Smrgpanfrost_close_device(struct panfrost_device *dev)
2787ec681f3Smrg{
2797ec681f3Smrg        pthread_mutex_destroy(&dev->submit_lock);
2807ec681f3Smrg        panfrost_bo_unreference(dev->tiler_heap);
2817ec681f3Smrg        panfrost_bo_cache_evict_all(dev);
2827ec681f3Smrg        pthread_mutex_destroy(&dev->bo_cache.lock);
2837ec681f3Smrg        drmFreeVersion(dev->kernel_version);
2847ec681f3Smrg        util_sparse_array_finish(&dev->bo_map);
2857ec681f3Smrg        close(dev->fd);
2867ec681f3Smrg}
287