1b8e80941Smrg/*
2b8e80941Smrg * © Copyright 2018 Alyssa Rosenzweig
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#include <stdio.h>
26b8e80941Smrg#include <stdlib.h>
27b8e80941Smrg#include <string.h>
28b8e80941Smrg#include <assert.h>
29b8e80941Smrg#include <panfrost-misc.h>
30b8e80941Smrg#include <panfrost-job.h>
31b8e80941Smrg#include "pan_context.h"
32b8e80941Smrg
33b8e80941Smrg/* TODO: What does this actually have to be? */
34b8e80941Smrg#define ALIGNMENT 128
35b8e80941Smrg
36b8e80941Smrg/* Allocate a mapped chunk directly from a heap */
37b8e80941Smrg
38b8e80941Smrgstruct panfrost_transfer
39b8e80941Smrgpanfrost_allocate_chunk(struct panfrost_context *ctx, size_t size, unsigned heap_id)
40b8e80941Smrg{
41b8e80941Smrg        size = ALIGN(size, ALIGNMENT);
42b8e80941Smrg
43b8e80941Smrg        struct pipe_context *gallium = (struct pipe_context *) ctx;
44b8e80941Smrg        struct panfrost_screen *screen = pan_screen(gallium->screen);
45b8e80941Smrg
46b8e80941Smrg        struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, size, heap_id);
47b8e80941Smrg        struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry;
48b8e80941Smrg        struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab;
49b8e80941Smrg
50b8e80941Smrg        struct panfrost_transfer transfer = {
51b8e80941Smrg                .cpu = backing->cpu + p_entry->offset,
52b8e80941Smrg                .gpu = backing->gpu + p_entry->offset
53b8e80941Smrg        };
54b8e80941Smrg
55b8e80941Smrg        return transfer;
56b8e80941Smrg}
57b8e80941Smrg
58b8e80941Smrg/* Transient command stream pooling: command stream uploads try to simply copy
59b8e80941Smrg * into whereever we left off. If there isn't space, we allocate a new entry
60b8e80941Smrg * into the pool and copy there */
61b8e80941Smrg
62b8e80941Smrgstruct panfrost_transfer
63b8e80941Smrgpanfrost_allocate_transient(struct panfrost_context *ctx, size_t sz)
64b8e80941Smrg{
65b8e80941Smrg        /* Pad the size */
66b8e80941Smrg        sz = ALIGN(sz, ALIGNMENT);
67b8e80941Smrg
68b8e80941Smrg        /* Check if there is room in the current entry */
69b8e80941Smrg        struct panfrost_transient_pool *pool = &ctx->transient_pools[ctx->cmdstream_i];
70b8e80941Smrg
71b8e80941Smrg        if ((pool->entry_offset + sz) > pool->entry_size) {
72b8e80941Smrg                /* Don't overflow this entry -- advance to the next */
73b8e80941Smrg
74b8e80941Smrg                pool->entry_offset = 0;
75b8e80941Smrg
76b8e80941Smrg                pool->entry_index++;
77b8e80941Smrg                assert(pool->entry_index < PANFROST_MAX_TRANSIENT_ENTRIES);
78b8e80941Smrg
79b8e80941Smrg                /* Check if this entry exists */
80b8e80941Smrg
81b8e80941Smrg                if (pool->entry_index >= pool->entry_count) {
82b8e80941Smrg                        /* Don't overflow the pool -- allocate a new one */
83b8e80941Smrg                        struct pipe_context *gallium = (struct pipe_context *) ctx;
84b8e80941Smrg                        struct panfrost_screen *screen = pan_screen(gallium->screen);
85b8e80941Smrg                        struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, pool->entry_size, HEAP_TRANSIENT);
86b8e80941Smrg
87b8e80941Smrg                        pool->entry_count++;
88b8e80941Smrg                        pool->entries[pool->entry_index] = (struct panfrost_memory_entry *) entry;
89b8e80941Smrg                }
90b8e80941Smrg
91b8e80941Smrg                /* Make sure we -still- won't overflow */
92b8e80941Smrg                assert(sz < pool->entry_size);
93b8e80941Smrg        }
94b8e80941Smrg
95b8e80941Smrg        /* We have an entry we can write to, so do the upload! */
96b8e80941Smrg        struct panfrost_memory_entry *p_entry = pool->entries[pool->entry_index];
97b8e80941Smrg        struct panfrost_memory *backing = (struct panfrost_memory *) p_entry->base.slab;
98b8e80941Smrg
99b8e80941Smrg        struct panfrost_transfer ret = {
100b8e80941Smrg                .cpu = backing->cpu + p_entry->offset + pool->entry_offset,
101b8e80941Smrg                .gpu = backing->gpu + p_entry->offset + pool->entry_offset
102b8e80941Smrg        };
103b8e80941Smrg
104b8e80941Smrg        /* Advance the pointer */
105b8e80941Smrg        pool->entry_offset += sz;
106b8e80941Smrg
107b8e80941Smrg        return ret;
108b8e80941Smrg
109b8e80941Smrg}
110b8e80941Smrg
111b8e80941Smrgmali_ptr
112b8e80941Smrgpanfrost_upload_transient(struct panfrost_context *ctx, const void *data, size_t sz)
113b8e80941Smrg{
114b8e80941Smrg        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz);
115b8e80941Smrg        memcpy(transfer.cpu, data, sz);
116b8e80941Smrg        return transfer.gpu;
117b8e80941Smrg}
118b8e80941Smrg
119b8e80941Smrg// TODO: An actual allocator, perhaps
120b8e80941Smrg// TODO: Multiple stacks for multiple bases?
121b8e80941Smrg
122b8e80941Smrgint hack_stack_bottom = 4096; /* Don't interfere with constant offsets */
123b8e80941Smrgint last_offset = 0;
124b8e80941Smrg
125b8e80941Smrgstatic inline int
126b8e80941Smrgpandev_allocate_offset(int *stack, size_t sz)
127b8e80941Smrg{
128b8e80941Smrg        /* First, align the stack bottom to something nice; it's not critical
129b8e80941Smrg         * at this point if we waste a little space to do so. */
130b8e80941Smrg
131b8e80941Smrg        int excess = *stack & (ALIGNMENT - 1);
132b8e80941Smrg
133b8e80941Smrg        /* Add the secret of my */
134b8e80941Smrg        if (excess)
135b8e80941Smrg                *stack += ALIGNMENT - excess;
136b8e80941Smrg
137b8e80941Smrg        /* Finally, use the new bottom for the allocation and move down the
138b8e80941Smrg         * stack */
139b8e80941Smrg
140b8e80941Smrg        int ret = *stack;
141b8e80941Smrg        *stack += sz;
142b8e80941Smrg        return ret;
143b8e80941Smrg}
144b8e80941Smrg
145b8e80941Smrginline mali_ptr
146b8e80941Smrgpandev_upload(int cheating_offset, int *stack_bottom, mali_ptr base, void *base_map, const void *data, size_t sz, bool no_pad)
147b8e80941Smrg{
148b8e80941Smrg        int offset;
149b8e80941Smrg
150b8e80941Smrg        /* We're not positive about the sizes of all objects, but we don't want
151b8e80941Smrg         * them to crash against each other either. Let the caller disable
152b8e80941Smrg         * padding if they so choose, though. */
153b8e80941Smrg
154b8e80941Smrg        size_t padded_size = no_pad ? sz : sz * 2;
155b8e80941Smrg
156b8e80941Smrg        /* If no specific bottom is specified, use a global one... don't do
157b8e80941Smrg         * this in production, kids */
158b8e80941Smrg
159b8e80941Smrg        if (!stack_bottom)
160b8e80941Smrg                stack_bottom = &hack_stack_bottom;
161b8e80941Smrg
162b8e80941Smrg        /* Allocate space for the new GPU object, if required */
163b8e80941Smrg
164b8e80941Smrg        if (cheating_offset == -1) {
165b8e80941Smrg                offset = pandev_allocate_offset(stack_bottom, padded_size);
166b8e80941Smrg        } else {
167b8e80941Smrg                offset = cheating_offset;
168b8e80941Smrg                *stack_bottom = offset + sz;
169b8e80941Smrg        }
170b8e80941Smrg
171b8e80941Smrg        /* Save last offset for sequential uploads (job descriptors) */
172b8e80941Smrg        last_offset = offset + padded_size;
173b8e80941Smrg
174b8e80941Smrg        /* Upload it */
175b8e80941Smrg        memcpy((uint8_t *) base_map + offset, data, sz);
176b8e80941Smrg
177b8e80941Smrg        /* Return the GPU address */
178b8e80941Smrg        return base + offset;
179b8e80941Smrg}
180b8e80941Smrg
181b8e80941Smrg/* Upload immediately after the last allocation */
182b8e80941Smrg
183b8e80941Smrgmali_ptr
184b8e80941Smrgpandev_upload_sequential(mali_ptr base, void *base_map, const void *data, size_t sz)
185b8e80941Smrg{
186b8e80941Smrg        return pandev_upload(last_offset, NULL, base, base_map, data, sz, /* false */ true);
187b8e80941Smrg}
188b8e80941Smrg
189b8e80941Smrg/* Simplified APIs for the real driver, rather than replays */
190b8e80941Smrg
191b8e80941Smrgmali_ptr
192b8e80941Smrgpanfrost_upload(struct panfrost_memory *mem, const void *data, size_t sz, bool no_pad)
193b8e80941Smrg{
194b8e80941Smrg        /* Bounds check */
195b8e80941Smrg        if ((mem->stack_bottom + sz) >= mem->size) {
196b8e80941Smrg                printf("Out of memory, tried to upload %zd but only %zd available\n", sz, mem->size - mem->stack_bottom);
197b8e80941Smrg                assert(0);
198b8e80941Smrg        }
199b8e80941Smrg
200b8e80941Smrg        return pandev_upload(-1, &mem->stack_bottom, mem->gpu, mem->cpu, data, sz, no_pad);
201b8e80941Smrg}
202b8e80941Smrg
203b8e80941Smrgmali_ptr
204b8e80941Smrgpanfrost_upload_sequential(struct panfrost_memory *mem, const void *data, size_t sz)
205b8e80941Smrg{
206b8e80941Smrg        return pandev_upload(last_offset, &mem->stack_bottom, mem->gpu, mem->cpu, data, sz, true);
207b8e80941Smrg}
208b8e80941Smrg
209b8e80941Smrg/* Simplified interface to allocate a chunk without any upload, to allow
210b8e80941Smrg * zero-copy uploads. This is particularly useful when the copy would happen
211b8e80941Smrg * anyway, for instance with texture swizzling. */
212b8e80941Smrg
213b8e80941Smrgvoid *
214b8e80941Smrgpanfrost_allocate_transfer(struct panfrost_memory *mem, size_t sz, mali_ptr *gpu)
215b8e80941Smrg{
216b8e80941Smrg        int offset = pandev_allocate_offset(&mem->stack_bottom, sz);
217b8e80941Smrg
218b8e80941Smrg        *gpu = mem->gpu + offset;
219b8e80941Smrg        return mem->cpu + offset;
220b8e80941Smrg}
221