1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 *      Adam Rak <adam.rak@streamnovation.com>
23 */
24
25#include "pipe/p_defines.h"
26#include "pipe/p_state.h"
27#include "pipe/p_context.h"
28#include "util/u_blitter.h"
29#include "util/list.h"
30#include "util/u_transfer.h"
31#include "util/u_surface.h"
32#include "util/u_pack_color.h"
33#include "util/u_math.h"
34#include "util/u_memory.h"
35#include "util/u_inlines.h"
36#include "util/u_framebuffer.h"
37#include "r600_shader.h"
38#include "r600_pipe.h"
39#include "r600_formats.h"
40#include "compute_memory_pool.h"
41#include "evergreen_compute.h"
42#include "evergreen_compute_internal.h"
43#include <inttypes.h>
44
45#define ITEM_ALIGNMENT 1024
46
47/* A few forward declarations of static functions */
48static void compute_memory_shadow(struct compute_memory_pool* pool,
49	struct pipe_context *pipe, int device_to_host);
50
51static void compute_memory_defrag(struct compute_memory_pool *pool,
52	struct pipe_resource *src, struct pipe_resource *dst,
53	struct pipe_context *pipe);
54
55static int compute_memory_promote_item(struct compute_memory_pool *pool,
56	struct compute_memory_item *item, struct pipe_context *pipe,
57	int64_t allocated);
58
59static void compute_memory_move_item(struct compute_memory_pool *pool,
60	struct pipe_resource *src, struct pipe_resource *dst,
61	struct compute_memory_item *item, uint64_t new_start_in_dw,
62	struct pipe_context *pipe);
63
64static void compute_memory_transfer(struct compute_memory_pool* pool,
65	struct pipe_context * pipe, int device_to_host,
66	struct compute_memory_item* chunk, void* data,
67	int offset_in_chunk, int size);
68
69/**
70 * Creates a new pool.
71 */
72struct compute_memory_pool* compute_memory_pool_new(
73	struct r600_screen * rscreen)
74{
75	struct compute_memory_pool* pool = (struct compute_memory_pool*)
76				CALLOC(sizeof(struct compute_memory_pool), 1);
77	if (!pool)
78		return NULL;
79
80	COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
81
82	pool->screen = rscreen;
83	pool->item_list = (struct list_head *)
84				CALLOC(sizeof(struct list_head), 1);
85	pool->unallocated_list = (struct list_head *)
86				CALLOC(sizeof(struct list_head), 1);
87	list_inithead(pool->item_list);
88	list_inithead(pool->unallocated_list);
89	return pool;
90}
91
92/**
93 * Initializes the pool with a size of \a initial_size_in_dw.
94 * \param pool			The pool to be initialized.
95 * \param initial_size_in_dw	The initial size.
96 * \see compute_memory_grow_defrag_pool
97 */
98static void compute_memory_pool_init(struct compute_memory_pool * pool,
99	unsigned initial_size_in_dw)
100{
101
102	COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %u\n",
103		initial_size_in_dw);
104
105	pool->size_in_dw = initial_size_in_dw;
106	pool->bo = r600_compute_buffer_alloc_vram(pool->screen,
107						  pool->size_in_dw * 4);
108}
109
110/**
111 * Frees all stuff in the pool and the pool struct itself too.
112 */
113void compute_memory_pool_delete(struct compute_memory_pool* pool)
114{
115	COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
116	free(pool->shadow);
117	r600_resource_reference(&pool->bo, NULL);
118	/* In theory, all of the items were freed in compute_memory_free.
119	 * Just delete the list heads
120	 */
121	free(pool->item_list);
122	free(pool->unallocated_list);
123	/* And then the pool itself */
124	free(pool);
125}
126
127/**
128 * Reallocates and defragments the pool, conserves data.
129 * \returns -1 if it fails, 0 otherwise
130 * \see compute_memory_finalize_pending
131 */
132static int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool,
133	struct pipe_context *pipe, int new_size_in_dw)
134{
135	new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
136
137	COMPUTE_DBG(pool->screen, "* compute_memory_grow_defrag_pool() "
138		"new_size_in_dw = %d (%d bytes)\n",
139		new_size_in_dw, new_size_in_dw * 4);
140
141	assert(new_size_in_dw >= pool->size_in_dw);
142
143	if (!pool->bo) {
144		compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
145	} else {
146		struct r600_resource *temp = NULL;
147
148		temp = r600_compute_buffer_alloc_vram(pool->screen, new_size_in_dw * 4);
149
150		if (temp != NULL) {
151			struct pipe_resource *src = (struct pipe_resource *)pool->bo;
152			struct pipe_resource *dst = (struct pipe_resource *)temp;
153
154			COMPUTE_DBG(pool->screen, "  Growing and defragmenting the pool "
155					"using a temporary resource\n");
156
157			compute_memory_defrag(pool, src, dst, pipe);
158
159			/* Release the old buffer */
160			r600_resource_reference(&pool->bo, NULL);
161			pool->bo = temp;
162			pool->size_in_dw = new_size_in_dw;
163		}
164		else {
165			COMPUTE_DBG(pool->screen, "  The creation of the temporary resource failed\n"
166				"  Falling back to using 'shadow'\n");
167
168			compute_memory_shadow(pool, pipe, 1);
169			pool->shadow = realloc(pool->shadow, new_size_in_dw * 4);
170			if (pool->shadow == NULL)
171				return -1;
172
173			pool->size_in_dw = new_size_in_dw;
174			/* Release the old buffer */
175			r600_resource_reference(&pool->bo, NULL);
176			pool->bo = r600_compute_buffer_alloc_vram(pool->screen, pool->size_in_dw * 4);
177			compute_memory_shadow(pool, pipe, 0);
178
179			if (pool->status & POOL_FRAGMENTED) {
180				struct pipe_resource *src = (struct pipe_resource *)pool->bo;
181				compute_memory_defrag(pool, src, src, pipe);
182			}
183		}
184	}
185
186	return 0;
187}
188
189/**
190 * Copy pool from device to host, or host to device.
191 * \param device_to_host 1 for device->host, 0 for host->device
192 * \see compute_memory_grow_defrag_pool
193 */
194static void compute_memory_shadow(struct compute_memory_pool* pool,
195	struct pipe_context * pipe, int device_to_host)
196{
197	struct compute_memory_item chunk;
198
199	COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
200		device_to_host);
201
202	chunk.id = 0;
203	chunk.start_in_dw = 0;
204	chunk.size_in_dw = pool->size_in_dw;
205	compute_memory_transfer(pool, pipe, device_to_host, &chunk,
206				pool->shadow, 0, pool->size_in_dw*4);
207}
208
209/**
210 * Moves all the items marked for promotion from the \a unallocated_list
211 * to the \a item_list.
212 * \return -1 if it fails, 0 otherwise
213 * \see evergreen_set_global_binding
214 */
215int compute_memory_finalize_pending(struct compute_memory_pool* pool,
216	struct pipe_context * pipe)
217{
218	struct compute_memory_item *item, *next;
219
220	int64_t allocated = 0;
221	int64_t unallocated = 0;
222	int64_t last_pos;
223
224	int err = 0;
225
226	COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
227
228	LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
229		COMPUTE_DBG(pool->screen, "  + list: offset = %"PRIi64" id = %"PRIi64" size = %"PRIi64" "
230			"(%"PRIi64" bytes)\n", item->start_in_dw, item->id,
231			item->size_in_dw, item->size_in_dw * 4);
232	}
233
234	/* Calculate the total allocated size */
235	LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
236		allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
237	}
238
239	/* Calculate the total unallocated size of the items that
240	 * will be promoted to the pool */
241	LIST_FOR_EACH_ENTRY(item, pool->unallocated_list, link) {
242		if (item->status & ITEM_FOR_PROMOTING)
243			unallocated += align(item->size_in_dw, ITEM_ALIGNMENT);
244	}
245
246	if (unallocated == 0) {
247		return 0;
248	}
249
250	if (pool->size_in_dw < allocated + unallocated) {
251		err = compute_memory_grow_defrag_pool(pool, pipe, allocated + unallocated);
252		if (err == -1)
253			return -1;
254	}
255	else if (pool->status & POOL_FRAGMENTED) {
256		struct pipe_resource *src = (struct pipe_resource *)pool->bo;
257		compute_memory_defrag(pool, src, src, pipe);
258	}
259
260	/* After defragmenting the pool, allocated is equal to the first available
261	 * position for new items in the pool */
262	last_pos = allocated;
263
264	/* Loop through all the unallocated items, check if they are marked
265	 * for promoting, allocate space for them and add them to the item_list. */
266	LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
267		if (item->status & ITEM_FOR_PROMOTING) {
268			err = compute_memory_promote_item(pool, item, pipe, last_pos);
269			item->status &= ~ITEM_FOR_PROMOTING;
270
271			last_pos += align(item->size_in_dw, ITEM_ALIGNMENT);
272
273			if (err == -1)
274				return -1;
275		}
276	}
277
278	return 0;
279}
280
281/**
282 * Defragments the pool, so that there's no gap between items.
283 * \param pool	The pool to be defragmented
284 * \param src	The origin resource
285 * \param dst	The destination resource
286 * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending
287 */
288static void compute_memory_defrag(struct compute_memory_pool *pool,
289	struct pipe_resource *src, struct pipe_resource *dst,
290	struct pipe_context *pipe)
291{
292	struct compute_memory_item *item;
293	int64_t last_pos;
294
295	COMPUTE_DBG(pool->screen, "* compute_memory_defrag()\n");
296
297	last_pos = 0;
298	LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
299		if (src != dst || item->start_in_dw != last_pos) {
300			assert(last_pos <= item->start_in_dw);
301
302			compute_memory_move_item(pool, src, dst,
303					item, last_pos, pipe);
304		}
305
306		last_pos += align(item->size_in_dw, ITEM_ALIGNMENT);
307	}
308
309	pool->status &= ~POOL_FRAGMENTED;
310}
311
312/**
313 * Moves an item from the \a unallocated_list to the \a item_list.
314 * \param item	The item that will be promoted.
315 * \return -1 if it fails, 0 otherwise
316 * \see compute_memory_finalize_pending
317 */
318static int compute_memory_promote_item(struct compute_memory_pool *pool,
319		struct compute_memory_item *item, struct pipe_context *pipe,
320		int64_t start_in_dw)
321{
322	struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
323	struct r600_context *rctx = (struct r600_context *)pipe;
324	struct pipe_resource *src = (struct pipe_resource *)item->real_buffer;
325	struct pipe_resource *dst = (struct pipe_resource *)pool->bo;
326	struct pipe_box box;
327
328	COMPUTE_DBG(pool->screen, "* compute_memory_promote_item()\n"
329			"  + Promoting Item: %"PRIi64" , starting at: %"PRIi64" (%"PRIi64" bytes) "
330			"size: %"PRIi64" (%"PRIi64" bytes)\n\t\t\tnew start: %"PRIi64" (%"PRIi64" bytes)\n",
331			item->id, item->start_in_dw, item->start_in_dw * 4,
332			item->size_in_dw, item->size_in_dw * 4,
333			start_in_dw, start_in_dw * 4);
334
335	/* Remove the item from the unallocated list */
336	list_del(&item->link);
337
338	/* Add it back to the item_list */
339	list_addtail(&item->link, pool->item_list);
340	item->start_in_dw = start_in_dw;
341
342	if (src) {
343		u_box_1d(0, item->size_in_dw * 4, &box);
344
345		rctx->b.b.resource_copy_region(pipe,
346				dst, 0, item->start_in_dw * 4, 0 ,0,
347				src, 0, &box);
348
349		/* We check if the item is mapped for reading.
350		 * In this case, we need to keep the temporary buffer 'alive'
351		 * because it is possible to keep a map active for reading
352		 * while a kernel (that reads from it) executes */
353		if (!(item->status & ITEM_MAPPED_FOR_READING)) {
354			pool->screen->b.b.resource_destroy(screen, src);
355			item->real_buffer = NULL;
356		}
357	}
358
359	return 0;
360}
361
362/**
363 * Moves an item from the \a item_list to the \a unallocated_list.
364 * \param item	The item that will be demoted
365 * \see r600_compute_global_transfer_map
366 */
367void compute_memory_demote_item(struct compute_memory_pool *pool,
368	struct compute_memory_item *item, struct pipe_context *pipe)
369{
370	struct r600_context *rctx = (struct r600_context *)pipe;
371	struct pipe_resource *src = (struct pipe_resource *)pool->bo;
372	struct pipe_resource *dst;
373	struct pipe_box box;
374
375	COMPUTE_DBG(pool->screen, "* compute_memory_demote_item()\n"
376			"  + Demoting Item: %"PRIi64", starting at: %"PRIi64" (%"PRIi64" bytes) "
377			"size: %"PRIi64" (%"PRIi64" bytes)\n", item->id, item->start_in_dw,
378			item->start_in_dw * 4, item->size_in_dw, item->size_in_dw * 4);
379
380	/* First, we remove the item from the item_list */
381	list_del(&item->link);
382
383	/* Now we add it to the unallocated list */
384	list_addtail(&item->link, pool->unallocated_list);
385
386	/* We check if the intermediate buffer exists, and if it
387	 * doesn't, we create it again */
388	if (item->real_buffer == NULL) {
389		item->real_buffer = r600_compute_buffer_alloc_vram(
390				pool->screen, item->size_in_dw * 4);
391	}
392
393	dst = (struct pipe_resource *)item->real_buffer;
394
395	/* We transfer the memory from the item in the pool to the
396	 * temporary buffer */
397	u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
398
399	rctx->b.b.resource_copy_region(pipe,
400		dst, 0, 0, 0, 0,
401		src, 0, &box);
402
403	/* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
404	item->start_in_dw = -1;
405
406	if (item->link.next != pool->item_list) {
407		pool->status |= POOL_FRAGMENTED;
408	}
409}
410
411/**
412 * Moves the item \a item forward from the resource \a src to the
413 * resource \a dst at \a new_start_in_dw
414 *
415 * This function assumes two things:
416 * 1) The item is \b only moved forward, unless src is different from dst
417 * 2) The item \b won't change it's position inside the \a item_list
418 *
419 * \param item			The item that will be moved
420 * \param new_start_in_dw	The new position of the item in \a item_list
421 * \see compute_memory_defrag
422 */
423static void compute_memory_move_item(struct compute_memory_pool *pool,
424	struct pipe_resource *src, struct pipe_resource *dst,
425	struct compute_memory_item *item, uint64_t new_start_in_dw,
426	struct pipe_context *pipe)
427{
428	struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
429	struct r600_context *rctx = (struct r600_context *)pipe;
430	struct pipe_box box;
431
432	COMPUTE_DBG(pool->screen, "* compute_memory_move_item()\n"
433			"  + Moving item %"PRIi64" from %"PRIi64" (%"PRIi64" bytes) to %"PRIu64" (%"PRIu64" bytes)\n",
434			item->id, item->start_in_dw, item->start_in_dw * 4,
435			new_start_in_dw, new_start_in_dw * 4);
436
437	if (pool->item_list != item->link.prev) {
438		ASSERTED struct compute_memory_item *prev;
439		prev = container_of(item->link.prev, struct compute_memory_item, link);
440		assert(prev->start_in_dw + prev->size_in_dw <= new_start_in_dw);
441	}
442
443	u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
444
445	/* If the ranges don't overlap, or we are copying from one resource
446	 * to another, we can just copy the item directly */
447	if (src != dst || new_start_in_dw + item->size_in_dw <= item->start_in_dw) {
448
449		rctx->b.b.resource_copy_region(pipe,
450			dst, 0, new_start_in_dw * 4, 0, 0,
451			src, 0, &box);
452	} else {
453		/* The ranges overlap, we will try first to use an intermediate
454		 * resource to move the item */
455		struct pipe_resource *tmp = (struct pipe_resource *)
456			r600_compute_buffer_alloc_vram(pool->screen, item->size_in_dw * 4);
457
458		if (tmp != NULL) {
459			rctx->b.b.resource_copy_region(pipe,
460				tmp, 0, 0, 0, 0,
461				src, 0, &box);
462
463			box.x = 0;
464
465			rctx->b.b.resource_copy_region(pipe,
466				dst, 0, new_start_in_dw * 4, 0, 0,
467				tmp, 0, &box);
468
469			pool->screen->b.b.resource_destroy(screen, tmp);
470
471		} else {
472			/* The allocation of the temporary resource failed,
473			 * falling back to use mappings */
474			uint32_t *map;
475			int64_t offset;
476			struct pipe_transfer *trans;
477
478			offset = item->start_in_dw - new_start_in_dw;
479
480			u_box_1d(new_start_in_dw * 4, (offset + item->size_in_dw) * 4, &box);
481
482			map = pipe->buffer_map(pipe, src, 0, PIPE_MAP_READ_WRITE,
483				&box, &trans);
484
485			assert(map);
486			assert(trans);
487
488			memmove(map, map + offset, item->size_in_dw * 4);
489
490			pipe->buffer_unmap(pipe, trans);
491		}
492	}
493
494	item->start_in_dw = new_start_in_dw;
495}
496
497/**
498 * Frees the memory associated to the item with id \a id from the pool.
499 * \param id	The id of the item to be freed.
500 */
501void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
502{
503	struct compute_memory_item *item, *next;
504	struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
505	struct pipe_resource *res;
506
507	COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %"PRIi64" \n", id);
508
509	LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->item_list, link) {
510
511		if (item->id == id) {
512
513			if (item->link.next != pool->item_list) {
514				pool->status |= POOL_FRAGMENTED;
515			}
516
517			list_del(&item->link);
518
519			if (item->real_buffer) {
520				res = (struct pipe_resource *)item->real_buffer;
521				pool->screen->b.b.resource_destroy(
522						screen, res);
523			}
524
525			free(item);
526
527			return;
528		}
529	}
530
531	LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
532
533		if (item->id == id) {
534			list_del(&item->link);
535
536			if (item->real_buffer) {
537				res = (struct pipe_resource *)item->real_buffer;
538				pool->screen->b.b.resource_destroy(
539						screen, res);
540			}
541
542			free(item);
543
544			return;
545		}
546	}
547
548	fprintf(stderr, "Internal error, invalid id %"PRIi64" "
549		"for compute_memory_free\n", id);
550
551	assert(0 && "error");
552}
553
554/**
555 * Creates pending allocations for new items, these items are
556 * placed in the unallocated_list.
557 * \param size_in_dw	The size, in double words, of the new item.
558 * \return The new item
559 * \see r600_compute_global_buffer_create
560 */
561struct compute_memory_item* compute_memory_alloc(
562	struct compute_memory_pool* pool,
563	int64_t size_in_dw)
564{
565	struct compute_memory_item *new_item = NULL;
566
567	COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %"PRIi64" (%"PRIi64" bytes)\n",
568			size_in_dw, 4 * size_in_dw);
569
570	new_item = (struct compute_memory_item *)
571				CALLOC(sizeof(struct compute_memory_item), 1);
572	if (!new_item)
573		return NULL;
574
575	new_item->size_in_dw = size_in_dw;
576	new_item->start_in_dw = -1; /* mark pending */
577	new_item->id = pool->next_id++;
578	new_item->pool = pool;
579	new_item->real_buffer = NULL;
580
581	list_addtail(&new_item->link, pool->unallocated_list);
582
583	COMPUTE_DBG(pool->screen, "  + Adding item %p id = %"PRIi64" size = %"PRIi64" (%"PRIi64" bytes)\n",
584			new_item, new_item->id, new_item->size_in_dw,
585			new_item->size_in_dw * 4);
586	return new_item;
587}
588
589/**
590 * Transfer data host<->device, offset and size is in bytes.
591 * \param device_to_host 1 for device->host, 0 for host->device.
592 * \see compute_memory_shadow
593 */
594static void compute_memory_transfer(
595	struct compute_memory_pool* pool,
596	struct pipe_context * pipe,
597	int device_to_host,
598	struct compute_memory_item* chunk,
599	void* data,
600	int offset_in_chunk,
601	int size)
602{
603	int64_t aligned_size = pool->size_in_dw;
604	struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
605	int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
606
607	struct pipe_transfer *xfer;
608	uint32_t *map;
609
610	assert(gart);
611
612	COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
613		"offset_in_chunk = %d, size = %d\n", device_to_host,
614		offset_in_chunk, size);
615
616	if (device_to_host) {
617		map = pipe->buffer_map(pipe, gart, 0, PIPE_MAP_READ,
618			&(struct pipe_box) { .width = aligned_size * 4,
619			.height = 1, .depth = 1 }, &xfer);
620		assert(xfer);
621		assert(map);
622		memcpy(data, map + internal_offset, size);
623		pipe->buffer_unmap(pipe, xfer);
624	} else {
625		map = pipe->buffer_map(pipe, gart, 0, PIPE_MAP_WRITE,
626			&(struct pipe_box) { .width = aligned_size * 4,
627			.height = 1, .depth = 1 }, &xfer);
628		assert(xfer);
629		assert(map);
630		memcpy(map + internal_offset, data, size);
631		pipe->buffer_unmap(pipe, xfer);
632	}
633}
634