drivers/panfrost/pan_allocate.c

b8e80941Smrg/*
b8e80941Smrg * © Copyright 2018 Alyssa Rosenzweig
b8e80941Smrg *
b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
b8e80941Smrg * to deal in the Software without restriction, including without limitation
b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
b8e80941Smrg *
b8e80941Smrg * The above copyright notice and this permission notice (including the next
b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
b8e80941Smrg * Software.
b8e80941Smrg *
b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
b8e80941Smrg * SOFTWARE.
b8e80941Smrg *
b8e80941Smrg */
b8e80941Smrg
b8e80941Smrg#include <stdio.h>
b8e80941Smrg#include <stdlib.h>
b8e80941Smrg#include <string.h>
b8e80941Smrg#include <assert.h>
b8e80941Smrg#include <panfrost-misc.h>
b8e80941Smrg#include <panfrost-job.h>
b8e80941Smrg#include "pan_context.h"
b8e80941Smrg
b8e80941Smrg/* TODO: What does this actually have to be? */
b8e80941Smrg#define ALIGNMENT 128
b8e80941Smrg
b8e80941Smrg/* Allocate a mapped chunk directly from a heap */
b8e80941Smrg
b8e80941Smrgstruct panfrost_transfer
b8e80941Smrgpanfrost_allocate_chunk(struct panfrost_context *ctx, size_t size, unsigned heap_id)
b8e80941Smrg{
b8e80941Smrg        size = ALIGN(size, ALIGNMENT);
b8e80941Smrg
b8e80941Smrg        struct pipe_context *gallium = (struct pipe_context *) ctx;
b8e80941Smrg        struct panfrost_screen *screen = pan_screen(gallium->screen);
b8e80941Smrg
b8e80941Smrg        struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, size, heap_id);
b8e80941Smrg        struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry;
b8e80941Smrg        struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab;
b8e80941Smrg
b8e80941Smrg        struct panfrost_transfer transfer = {
b8e80941Smrg                .cpu = backing->cpu + p_entry->offset,
b8e80941Smrg                .gpu = backing->gpu + p_entry->offset
b8e80941Smrg        };
b8e80941Smrg
b8e80941Smrg        return transfer;
b8e80941Smrg}
b8e80941Smrg
b8e80941Smrg/* Transient command stream pooling: command stream uploads try to simply copy
b8e80941Smrg * into whereever we left off. If there isn't space, we allocate a new entry
b8e80941Smrg * into the pool and copy there */
b8e80941Smrg
b8e80941Smrgstruct panfrost_transfer
b8e80941Smrgpanfrost_allocate_transient(struct panfrost_context *ctx, size_t sz)
b8e80941Smrg{
b8e80941Smrg        /* Pad the size */
b8e80941Smrg        sz = ALIGN(sz, ALIGNMENT);
b8e80941Smrg
b8e80941Smrg        /* Check if there is room in the current entry */
b8e80941Smrg        struct panfrost_transient_pool *pool = &ctx->transient_pools[ctx->cmdstream_i];
b8e80941Smrg
b8e80941Smrg        if ((pool->entry_offset + sz) > pool->entry_size) {
b8e80941Smrg                /* Don't overflow this entry -- advance to the next */
b8e80941Smrg
b8e80941Smrg                pool->entry_offset = 0;
b8e80941Smrg
b8e80941Smrg                pool->entry_index++;
b8e80941Smrg                assert(pool->entry_index < PANFROST_MAX_TRANSIENT_ENTRIES);
b8e80941Smrg
b8e80941Smrg                /* Check if this entry exists */
b8e80941Smrg
b8e80941Smrg                if (pool->entry_index >= pool->entry_count) {
b8e80941Smrg                        /* Don't overflow the pool -- allocate a new one */
b8e80941Smrg                        struct pipe_context *gallium = (struct pipe_context *) ctx;
b8e80941Smrg                        struct panfrost_screen *screen = pan_screen(gallium->screen);
b8e80941Smrg                        struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, pool->entry_size, HEAP_TRANSIENT);
b8e80941Smrg
b8e80941Smrg                        pool->entry_count++;
b8e80941Smrg                        pool->entries[pool->entry_index] = (struct panfrost_memory_entry *) entry;
b8e80941Smrg                }
b8e80941Smrg
b8e80941Smrg                /* Make sure we -still- won't overflow */
b8e80941Smrg                assert(sz < pool->entry_size);
b8e80941Smrg        }
b8e80941Smrg
b8e80941Smrg        /* We have an entry we can write to, so do the upload! */
b8e80941Smrg        struct panfrost_memory_entry *p_entry = pool->entries[pool->entry_index];
b8e80941Smrg        struct panfrost_memory *backing = (struct panfrost_memory *) p_entry->base.slab;
b8e80941Smrg
b8e80941Smrg        struct panfrost_transfer ret = {
b8e80941Smrg                .cpu = backing->cpu + p_entry->offset + pool->entry_offset,
b8e80941Smrg                .gpu = backing->gpu + p_entry->offset + pool->entry_offset
b8e80941Smrg        };
b8e80941Smrg
b8e80941Smrg        /* Advance the pointer */
b8e80941Smrg        pool->entry_offset += sz;
b8e80941Smrg
b8e80941Smrg        return ret;
b8e80941Smrg
b8e80941Smrg}
b8e80941Smrg
b8e80941Smrgmali_ptr
b8e80941Smrgpanfrost_upload_transient(struct panfrost_context *ctx, const void *data, size_t sz)
b8e80941Smrg{
b8e80941Smrg        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz);
b8e80941Smrg        memcpy(transfer.cpu, data, sz);
b8e80941Smrg        return transfer.gpu;
b8e80941Smrg}
b8e80941Smrg
b8e80941Smrg// TODO: An actual allocator, perhaps
b8e80941Smrg// TODO: Multiple stacks for multiple bases?
b8e80941Smrg
b8e80941Smrgint hack_stack_bottom = 4096; /* Don't interfere with constant offsets */
b8e80941Smrgint last_offset = 0;
b8e80941Smrg
b8e80941Smrgstatic inline int
b8e80941Smrgpandev_allocate_offset(int *stack, size_t sz)
b8e80941Smrg{
b8e80941Smrg        /* First, align the stack bottom to something nice; it's not critical
b8e80941Smrg         * at this point if we waste a little space to do so. */
b8e80941Smrg
b8e80941Smrg        int excess = *stack & (ALIGNMENT - 1);
b8e80941Smrg
b8e80941Smrg        /* Add the secret of my */
b8e80941Smrg        if (excess)
b8e80941Smrg                *stack += ALIGNMENT - excess;
b8e80941Smrg
b8e80941Smrg        /* Finally, use the new bottom for the allocation and move down the
b8e80941Smrg         * stack */
b8e80941Smrg
b8e80941Smrg        int ret = *stack;
b8e80941Smrg        *stack += sz;
b8e80941Smrg        return ret;
b8e80941Smrg}
b8e80941Smrg
b8e80941Smrginline mali_ptr
b8e80941Smrgpandev_upload(int cheating_offset, int *stack_bottom, mali_ptr base, void *base_map, const void *data, size_t sz, bool no_pad)
b8e80941Smrg{
b8e80941Smrg        int offset;
b8e80941Smrg
b8e80941Smrg        /* We're not positive about the sizes of all objects, but we don't want
b8e80941Smrg         * them to crash against each other either. Let the caller disable
b8e80941Smrg         * padding if they so choose, though. */
b8e80941Smrg
b8e80941Smrg        size_t padded_size = no_pad ? sz : sz * 2;
b8e80941Smrg
b8e80941Smrg        /* If no specific bottom is specified, use a global one... don't do
b8e80941Smrg         * this in production, kids */
b8e80941Smrg
b8e80941Smrg        if (!stack_bottom)
b8e80941Smrg                stack_bottom = &hack_stack_bottom;
b8e80941Smrg
b8e80941Smrg        /* Allocate space for the new GPU object, if required */
b8e80941Smrg
b8e80941Smrg        if (cheating_offset == -1) {
b8e80941Smrg                offset = pandev_allocate_offset(stack_bottom, padded_size);
b8e80941Smrg        } else {
b8e80941Smrg                offset = cheating_offset;
b8e80941Smrg                *stack_bottom = offset + sz;
b8e80941Smrg        }
b8e80941Smrg
b8e80941Smrg        /* Save last offset for sequential uploads (job descriptors) */
b8e80941Smrg        last_offset = offset + padded_size;
b8e80941Smrg
b8e80941Smrg        /* Upload it */
b8e80941Smrg        memcpy((uint8_t *) base_map + offset, data, sz);
b8e80941Smrg
b8e80941Smrg        /* Return the GPU address */
b8e80941Smrg        return base + offset;
b8e80941Smrg}
b8e80941Smrg
b8e80941Smrg/* Upload immediately after the last allocation */
b8e80941Smrg
b8e80941Smrgmali_ptr
b8e80941Smrgpandev_upload_sequential(mali_ptr base, void *base_map, const void *data, size_t sz)
b8e80941Smrg{
b8e80941Smrg        return pandev_upload(last_offset, NULL, base, base_map, data, sz, /* false */ true);
b8e80941Smrg}
b8e80941Smrg
b8e80941Smrg/* Simplified APIs for the real driver, rather than replays */
b8e80941Smrg
b8e80941Smrgmali_ptr
b8e80941Smrgpanfrost_upload(struct panfrost_memory *mem, const void *data, size_t sz, bool no_pad)
b8e80941Smrg{
b8e80941Smrg        /* Bounds check */
b8e80941Smrg        if ((mem->stack_bottom + sz) >= mem->size) {
b8e80941Smrg                printf("Out of memory, tried to upload %zd but only %zd available\n", sz, mem->size - mem->stack_bottom);
b8e80941Smrg                assert(0);
b8e80941Smrg        }
b8e80941Smrg
b8e80941Smrg        return pandev_upload(-1, &mem->stack_bottom, mem->gpu, mem->cpu, data, sz, no_pad);
b8e80941Smrg}
b8e80941Smrg
b8e80941Smrgmali_ptr
b8e80941Smrgpanfrost_upload_sequential(struct panfrost_memory *mem, const void *data, size_t sz)
b8e80941Smrg{
b8e80941Smrg        return pandev_upload(last_offset, &mem->stack_bottom, mem->gpu, mem->cpu, data, sz, true);
b8e80941Smrg}
b8e80941Smrg
b8e80941Smrg/* Simplified interface to allocate a chunk without any upload, to allow
b8e80941Smrg * zero-copy uploads. This is particularly useful when the copy would happen
b8e80941Smrg * anyway, for instance with texture swizzling. */
b8e80941Smrg
b8e80941Smrgvoid *
b8e80941Smrgpanfrost_allocate_transfer(struct panfrost_memory *mem, size_t sz, mali_ptr *gpu)
b8e80941Smrg{
b8e80941Smrg        int offset = pandev_allocate_offset(&mem->stack_bottom, sz);
b8e80941Smrg
b8e80941Smrg        *gpu = mem->gpu + offset;
b8e80941Smrg        return mem->cpu + offset;
b8e80941Smrg}