1b8e80941Smrg/* 2b8e80941Smrg * © Copyright 2018 Alyssa Rosenzweig 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b8e80941Smrg * SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg */ 24b8e80941Smrg 25b8e80941Smrg#include <stdio.h> 26b8e80941Smrg#include <stdlib.h> 27b8e80941Smrg#include <string.h> 28b8e80941Smrg#include <assert.h> 29b8e80941Smrg#include <panfrost-misc.h> 30b8e80941Smrg#include <panfrost-job.h> 31b8e80941Smrg#include "pan_context.h" 32b8e80941Smrg 33b8e80941Smrg/* TODO: What does this actually have to be? */ 34b8e80941Smrg#define ALIGNMENT 128 35b8e80941Smrg 36b8e80941Smrg/* Allocate a mapped chunk directly from a heap */ 37b8e80941Smrg 38b8e80941Smrgstruct panfrost_transfer 39b8e80941Smrgpanfrost_allocate_chunk(struct panfrost_context *ctx, size_t size, unsigned heap_id) 40b8e80941Smrg{ 41b8e80941Smrg size = ALIGN(size, ALIGNMENT); 42b8e80941Smrg 43b8e80941Smrg struct pipe_context *gallium = (struct pipe_context *) ctx; 44b8e80941Smrg struct panfrost_screen *screen = pan_screen(gallium->screen); 45b8e80941Smrg 46b8e80941Smrg struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, size, heap_id); 47b8e80941Smrg struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry; 48b8e80941Smrg struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab; 49b8e80941Smrg 50b8e80941Smrg struct panfrost_transfer transfer = { 51b8e80941Smrg .cpu = backing->cpu + p_entry->offset, 52b8e80941Smrg .gpu = backing->gpu + p_entry->offset 53b8e80941Smrg }; 54b8e80941Smrg 55b8e80941Smrg return transfer; 56b8e80941Smrg} 57b8e80941Smrg 58b8e80941Smrg/* Transient command stream pooling: command stream uploads try to simply copy 59b8e80941Smrg * into whereever we left off. If there isn't space, we allocate a new entry 60b8e80941Smrg * into the pool and copy there */ 61b8e80941Smrg 62b8e80941Smrgstruct panfrost_transfer 63b8e80941Smrgpanfrost_allocate_transient(struct panfrost_context *ctx, size_t sz) 64b8e80941Smrg{ 65b8e80941Smrg /* Pad the size */ 66b8e80941Smrg sz = ALIGN(sz, ALIGNMENT); 67b8e80941Smrg 68b8e80941Smrg /* Check if there is room in the current entry */ 69b8e80941Smrg struct panfrost_transient_pool *pool = &ctx->transient_pools[ctx->cmdstream_i]; 70b8e80941Smrg 71b8e80941Smrg if ((pool->entry_offset + sz) > pool->entry_size) { 72b8e80941Smrg /* Don't overflow this entry -- advance to the next */ 73b8e80941Smrg 74b8e80941Smrg pool->entry_offset = 0; 75b8e80941Smrg 76b8e80941Smrg pool->entry_index++; 77b8e80941Smrg assert(pool->entry_index < PANFROST_MAX_TRANSIENT_ENTRIES); 78b8e80941Smrg 79b8e80941Smrg /* Check if this entry exists */ 80b8e80941Smrg 81b8e80941Smrg if (pool->entry_index >= pool->entry_count) { 82b8e80941Smrg /* Don't overflow the pool -- allocate a new one */ 83b8e80941Smrg struct pipe_context *gallium = (struct pipe_context *) ctx; 84b8e80941Smrg struct panfrost_screen *screen = pan_screen(gallium->screen); 85b8e80941Smrg struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, pool->entry_size, HEAP_TRANSIENT); 86b8e80941Smrg 87b8e80941Smrg pool->entry_count++; 88b8e80941Smrg pool->entries[pool->entry_index] = (struct panfrost_memory_entry *) entry; 89b8e80941Smrg } 90b8e80941Smrg 91b8e80941Smrg /* Make sure we -still- won't overflow */ 92b8e80941Smrg assert(sz < pool->entry_size); 93b8e80941Smrg } 94b8e80941Smrg 95b8e80941Smrg /* We have an entry we can write to, so do the upload! */ 96b8e80941Smrg struct panfrost_memory_entry *p_entry = pool->entries[pool->entry_index]; 97b8e80941Smrg struct panfrost_memory *backing = (struct panfrost_memory *) p_entry->base.slab; 98b8e80941Smrg 99b8e80941Smrg struct panfrost_transfer ret = { 100b8e80941Smrg .cpu = backing->cpu + p_entry->offset + pool->entry_offset, 101b8e80941Smrg .gpu = backing->gpu + p_entry->offset + pool->entry_offset 102b8e80941Smrg }; 103b8e80941Smrg 104b8e80941Smrg /* Advance the pointer */ 105b8e80941Smrg pool->entry_offset += sz; 106b8e80941Smrg 107b8e80941Smrg return ret; 108b8e80941Smrg 109b8e80941Smrg} 110b8e80941Smrg 111b8e80941Smrgmali_ptr 112b8e80941Smrgpanfrost_upload_transient(struct panfrost_context *ctx, const void *data, size_t sz) 113b8e80941Smrg{ 114b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz); 115b8e80941Smrg memcpy(transfer.cpu, data, sz); 116b8e80941Smrg return transfer.gpu; 117b8e80941Smrg} 118b8e80941Smrg 119b8e80941Smrg// TODO: An actual allocator, perhaps 120b8e80941Smrg// TODO: Multiple stacks for multiple bases? 121b8e80941Smrg 122b8e80941Smrgint hack_stack_bottom = 4096; /* Don't interfere with constant offsets */ 123b8e80941Smrgint last_offset = 0; 124b8e80941Smrg 125b8e80941Smrgstatic inline int 126b8e80941Smrgpandev_allocate_offset(int *stack, size_t sz) 127b8e80941Smrg{ 128b8e80941Smrg /* First, align the stack bottom to something nice; it's not critical 129b8e80941Smrg * at this point if we waste a little space to do so. */ 130b8e80941Smrg 131b8e80941Smrg int excess = *stack & (ALIGNMENT - 1); 132b8e80941Smrg 133b8e80941Smrg /* Add the secret of my */ 134b8e80941Smrg if (excess) 135b8e80941Smrg *stack += ALIGNMENT - excess; 136b8e80941Smrg 137b8e80941Smrg /* Finally, use the new bottom for the allocation and move down the 138b8e80941Smrg * stack */ 139b8e80941Smrg 140b8e80941Smrg int ret = *stack; 141b8e80941Smrg *stack += sz; 142b8e80941Smrg return ret; 143b8e80941Smrg} 144b8e80941Smrg 145b8e80941Smrginline mali_ptr 146b8e80941Smrgpandev_upload(int cheating_offset, int *stack_bottom, mali_ptr base, void *base_map, const void *data, size_t sz, bool no_pad) 147b8e80941Smrg{ 148b8e80941Smrg int offset; 149b8e80941Smrg 150b8e80941Smrg /* We're not positive about the sizes of all objects, but we don't want 151b8e80941Smrg * them to crash against each other either. Let the caller disable 152b8e80941Smrg * padding if they so choose, though. */ 153b8e80941Smrg 154b8e80941Smrg size_t padded_size = no_pad ? sz : sz * 2; 155b8e80941Smrg 156b8e80941Smrg /* If no specific bottom is specified, use a global one... don't do 157b8e80941Smrg * this in production, kids */ 158b8e80941Smrg 159b8e80941Smrg if (!stack_bottom) 160b8e80941Smrg stack_bottom = &hack_stack_bottom; 161b8e80941Smrg 162b8e80941Smrg /* Allocate space for the new GPU object, if required */ 163b8e80941Smrg 164b8e80941Smrg if (cheating_offset == -1) { 165b8e80941Smrg offset = pandev_allocate_offset(stack_bottom, padded_size); 166b8e80941Smrg } else { 167b8e80941Smrg offset = cheating_offset; 168b8e80941Smrg *stack_bottom = offset + sz; 169b8e80941Smrg } 170b8e80941Smrg 171b8e80941Smrg /* Save last offset for sequential uploads (job descriptors) */ 172b8e80941Smrg last_offset = offset + padded_size; 173b8e80941Smrg 174b8e80941Smrg /* Upload it */ 175b8e80941Smrg memcpy((uint8_t *) base_map + offset, data, sz); 176b8e80941Smrg 177b8e80941Smrg /* Return the GPU address */ 178b8e80941Smrg return base + offset; 179b8e80941Smrg} 180b8e80941Smrg 181b8e80941Smrg/* Upload immediately after the last allocation */ 182b8e80941Smrg 183b8e80941Smrgmali_ptr 184b8e80941Smrgpandev_upload_sequential(mali_ptr base, void *base_map, const void *data, size_t sz) 185b8e80941Smrg{ 186b8e80941Smrg return pandev_upload(last_offset, NULL, base, base_map, data, sz, /* false */ true); 187b8e80941Smrg} 188b8e80941Smrg 189b8e80941Smrg/* Simplified APIs for the real driver, rather than replays */ 190b8e80941Smrg 191b8e80941Smrgmali_ptr 192b8e80941Smrgpanfrost_upload(struct panfrost_memory *mem, const void *data, size_t sz, bool no_pad) 193b8e80941Smrg{ 194b8e80941Smrg /* Bounds check */ 195b8e80941Smrg if ((mem->stack_bottom + sz) >= mem->size) { 196b8e80941Smrg printf("Out of memory, tried to upload %zd but only %zd available\n", sz, mem->size - mem->stack_bottom); 197b8e80941Smrg assert(0); 198b8e80941Smrg } 199b8e80941Smrg 200b8e80941Smrg return pandev_upload(-1, &mem->stack_bottom, mem->gpu, mem->cpu, data, sz, no_pad); 201b8e80941Smrg} 202b8e80941Smrg 203b8e80941Smrgmali_ptr 204b8e80941Smrgpanfrost_upload_sequential(struct panfrost_memory *mem, const void *data, size_t sz) 205b8e80941Smrg{ 206b8e80941Smrg return pandev_upload(last_offset, &mem->stack_bottom, mem->gpu, mem->cpu, data, sz, true); 207b8e80941Smrg} 208b8e80941Smrg 209b8e80941Smrg/* Simplified interface to allocate a chunk without any upload, to allow 210b8e80941Smrg * zero-copy uploads. This is particularly useful when the copy would happen 211b8e80941Smrg * anyway, for instance with texture swizzling. */ 212b8e80941Smrg 213b8e80941Smrgvoid * 214b8e80941Smrgpanfrost_allocate_transfer(struct panfrost_memory *mem, size_t sz, mali_ptr *gpu) 215b8e80941Smrg{ 216b8e80941Smrg int offset = pandev_allocate_offset(&mem->stack_bottom, sz); 217b8e80941Smrg 218b8e80941Smrg *gpu = mem->gpu + offset; 219b8e80941Smrg return mem->cpu + offset; 220b8e80941Smrg} 221