1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29#include <stdio.h>
30
31#include "radv_amdgpu_bo.h"
32
33#include <amdgpu.h>
34#include <amdgpu_drm.h>
35#include <inttypes.h>
36#include <pthread.h>
37#include <unistd.h>
38
39#include "util/u_atomic.h"
40
41static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
42
43static int
44radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
45		     amdgpu_bo_handle bo,
46		     uint64_t offset,
47		     uint64_t size,
48		     uint64_t addr,
49		     uint32_t bo_flags,
50		     uint32_t ops)
51{
52	uint64_t flags = AMDGPU_VM_PAGE_READABLE |
53			 AMDGPU_VM_PAGE_EXECUTABLE;
54
55	if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
56		flags |= AMDGPU_VM_MTYPE_UC;
57
58	if (!(bo_flags & RADEON_FLAG_READ_ONLY))
59		flags |= AMDGPU_VM_PAGE_WRITEABLE;
60
61	size = ALIGN(size, getpagesize());
62
63	return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
64				   flags, ops);
65}
66
67static void
68radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
69                               const struct radv_amdgpu_map_range *range)
70{
71	assert(range->size);
72
73	if (!range->bo)
74		return; /* TODO: PRT mapping */
75
76	p_atomic_inc(&range->bo->ref_count);
77	int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset,
78				     range->size, range->offset + bo->base.va,
79				     0, AMDGPU_VA_OP_MAP);
80	if (r)
81		abort();
82}
83
84static void
85radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
86                                 const struct radv_amdgpu_map_range *range)
87{
88	assert(range->size);
89
90	if (!range->bo)
91		return; /* TODO: PRT mapping */
92
93	int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset,
94				     range->size, range->offset + bo->base.va,
95				     0, AMDGPU_VA_OP_UNMAP);
96	if (r)
97		abort();
98	radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
99}
100
101static int bo_comparator(const void *ap, const void *bp) {
102	struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
103	struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
104	return (a > b) ? 1 : (a < b) ? -1 : 0;
105}
106
107static void
108radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
109{
110	if (bo->bo_capacity < bo->range_count) {
111		uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
112		bo->bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
113		bo->bo_capacity = new_count;
114	}
115
116	uint32_t temp_bo_count = 0;
117	for (uint32_t i = 0; i < bo->range_count; ++i)
118		if (bo->ranges[i].bo)
119			bo->bos[temp_bo_count++] = bo->ranges[i].bo;
120
121	qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
122
123	uint32_t final_bo_count = 1;
124	for (uint32_t i = 1; i < temp_bo_count; ++i)
125		if (bo->bos[i] != bo->bos[i - 1])
126			bo->bos[final_bo_count++] = bo->bos[i];
127
128	bo->bo_count = final_bo_count;
129}
130
131static void
132radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
133                                   uint64_t offset, uint64_t size,
134                                   struct radeon_winsys_bo *_bo, uint64_t bo_offset)
135{
136	struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
137	struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
138	int range_count_delta, new_idx;
139	int first = 0, last;
140	struct radv_amdgpu_map_range new_first, new_last;
141
142	assert(parent->is_virtual);
143	assert(!bo || !bo->is_virtual);
144
145	if (!size)
146		return;
147
148	/* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
149	if (parent->range_capacity - parent->range_count < 2) {
150		parent->range_capacity += 2;
151		parent->ranges = realloc(parent->ranges,
152		                         parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
153	}
154
155	/*
156	 * [first, last] is exactly the range of ranges that either overlap the
157	 * new parent, or are adjacent to it. This corresponds to the bind ranges
158	 * that may change.
159	 */
160	while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
161		++first;
162
163	last = first;
164	while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
165		++last;
166
167	/* Whether the first or last range are going to be totally removed or just
168	 * resized/left alone. Note that in the case of first == last, we will split
169	 * this into a part before and after the new range. The remove flag is then
170	 * whether to not create the corresponding split part. */
171	bool remove_first = parent->ranges[first].offset == offset;
172	bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
173	bool unmapped_first = false;
174
175	assert(parent->ranges[first].offset <= offset);
176	assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
177
178	/* Try to merge the new range with the first range. */
179	if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
180		size += offset - parent->ranges[first].offset;
181		offset = parent->ranges[first].offset;
182		bo_offset = parent->ranges[first].bo_offset;
183		remove_first = true;
184	}
185
186	/* Try to merge the new range with the last range. */
187	if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
188		size = parent->ranges[last].offset + parent->ranges[last].size - offset;
189		remove_last = true;
190	}
191
192	range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
193	new_idx = first + !remove_first;
194
195	/* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
196	for (int i = first + 1; i < last; ++i)
197		radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
198
199	/* If the first/last range are not left alone we unmap then and optionally map
200	 * them again after modifications. Not that this implicitly can do the splitting
201	 * if first == last. */
202	new_first = parent->ranges[first];
203	new_last = parent->ranges[last];
204
205	if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
206		radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
207		unmapped_first = true;
208
209		if (!remove_first) {
210			new_first.size = offset - new_first.offset;
211			radv_amdgpu_winsys_virtual_map(parent, &new_first);
212		}
213	}
214
215	if (parent->ranges[last].offset < offset + size || remove_last) {
216		if (first != last || !unmapped_first)
217			radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
218
219		if (!remove_last) {
220			new_last.size -= offset + size - new_last.offset;
221			new_last.offset = offset + size;
222			radv_amdgpu_winsys_virtual_map(parent, &new_last);
223		}
224	}
225
226	/* Moves the range list after last to account for the changed number of ranges. */
227	memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
228	        sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
229
230	if (!remove_first)
231		parent->ranges[first] = new_first;
232
233	if (!remove_last)
234		parent->ranges[new_idx + 1] = new_last;
235
236	/* Actually set up the new range. */
237	parent->ranges[new_idx].offset = offset;
238	parent->ranges[new_idx].size = size;
239	parent->ranges[new_idx].bo = bo;
240	parent->ranges[new_idx].bo_offset = bo_offset;
241
242	radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
243
244	parent->range_count += range_count_delta;
245
246	radv_amdgpu_winsys_rebuild_bo_list(parent);
247}
248
249static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
250{
251	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
252	struct radv_amdgpu_winsys *ws = bo->ws;
253
254	if (p_atomic_dec_return(&bo->ref_count))
255		return;
256	if (bo->is_virtual) {
257		for (uint32_t i = 0; i < bo->range_count; ++i) {
258			radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
259		}
260		free(bo->bos);
261		free(bo->ranges);
262	} else {
263		if (bo->ws->debug_all_bos) {
264			pthread_mutex_lock(&bo->ws->global_bo_list_lock);
265			LIST_DEL(&bo->global_list_item);
266			bo->ws->num_buffers--;
267			pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
268		}
269		radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va,
270				     0, AMDGPU_VA_OP_UNMAP);
271		amdgpu_bo_free(bo->bo);
272	}
273
274	if (bo->initial_domain & RADEON_DOMAIN_VRAM)
275		p_atomic_add(&ws->allocated_vram,
276			     -align64(bo->size, ws->info.gart_page_size));
277	if (bo->base.vram_cpu_access)
278		p_atomic_add(&ws->allocated_vram_vis,
279			     -align64(bo->size, ws->info.gart_page_size));
280	if (bo->initial_domain & RADEON_DOMAIN_GTT)
281		p_atomic_add(&ws->allocated_gtt,
282			     -align64(bo->size, ws->info.gart_page_size));
283
284	amdgpu_va_range_free(bo->va_handle);
285	FREE(bo);
286}
287
288static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo)
289{
290	struct radv_amdgpu_winsys *ws = bo->ws;
291
292	if (bo->ws->debug_all_bos) {
293		pthread_mutex_lock(&ws->global_bo_list_lock);
294		LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list);
295		ws->num_buffers++;
296		pthread_mutex_unlock(&ws->global_bo_list_lock);
297	}
298}
299
300static struct radeon_winsys_bo *
301radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
302			     uint64_t size,
303			     unsigned alignment,
304			     enum radeon_bo_domain initial_domain,
305			     unsigned flags,
306			     unsigned priority)
307{
308	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
309	struct radv_amdgpu_winsys_bo *bo;
310	struct amdgpu_bo_alloc_request request = {0};
311	amdgpu_bo_handle buf_handle;
312	uint64_t va = 0;
313	amdgpu_va_handle va_handle;
314	int r;
315	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
316	if (!bo) {
317		return NULL;
318	}
319
320	unsigned virt_alignment = alignment;
321	if (size >= ws->info.pte_fragment_size)
322		virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
323
324	r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
325				  size, virt_alignment, 0, &va, &va_handle,
326				  (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
327				   AMDGPU_VA_RANGE_HIGH);
328	if (r)
329		goto error_va_alloc;
330
331	bo->base.va = va;
332	bo->va_handle = va_handle;
333	bo->size = size;
334	bo->ws = ws;
335	bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
336	bo->ref_count = 1;
337
338	if (flags & RADEON_FLAG_VIRTUAL) {
339		bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
340		bo->range_count = 1;
341		bo->range_capacity = 1;
342
343		bo->ranges[0].offset = 0;
344		bo->ranges[0].size = size;
345		bo->ranges[0].bo = NULL;
346		bo->ranges[0].bo_offset = 0;
347
348		radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
349		return (struct radeon_winsys_bo *)bo;
350	}
351
352	request.alloc_size = size;
353	request.phys_alignment = alignment;
354
355	if (initial_domain & RADEON_DOMAIN_VRAM)
356		request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
357	if (initial_domain & RADEON_DOMAIN_GTT)
358		request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
359
360	if (flags & RADEON_FLAG_CPU_ACCESS) {
361		bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
362		request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
363	}
364	if (flags & RADEON_FLAG_NO_CPU_ACCESS)
365		request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
366	if (flags & RADEON_FLAG_GTT_WC)
367		request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
368	if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
369		request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
370	if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
371	    ws->info.has_local_buffers &&
372	    (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
373		bo->base.is_local = true;
374		request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
375	}
376
377	/* this won't do anything on pre 4.9 kernels */
378	if (ws->zero_all_vram_allocs && (initial_domain & RADEON_DOMAIN_VRAM))
379		request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
380	r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
381	if (r) {
382		fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
383		fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);
384		fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
385		fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
386		goto error_bo_alloc;
387	}
388
389	r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags,
390				 AMDGPU_VA_OP_MAP);
391	if (r)
392		goto error_va_map;
393
394	bo->bo = buf_handle;
395	bo->initial_domain = initial_domain;
396	bo->is_shared = false;
397	bo->priority = priority;
398
399	r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
400	assert(!r);
401
402	if (initial_domain & RADEON_DOMAIN_VRAM)
403		p_atomic_add(&ws->allocated_vram,
404			     align64(bo->size, ws->info.gart_page_size));
405	if (bo->base.vram_cpu_access)
406		p_atomic_add(&ws->allocated_vram_vis,
407			     align64(bo->size, ws->info.gart_page_size));
408	if (initial_domain & RADEON_DOMAIN_GTT)
409		p_atomic_add(&ws->allocated_gtt,
410			     align64(bo->size, ws->info.gart_page_size));
411
412	radv_amdgpu_add_buffer_to_global_list(bo);
413	return (struct radeon_winsys_bo *)bo;
414error_va_map:
415	amdgpu_bo_free(buf_handle);
416
417error_bo_alloc:
418	amdgpu_va_range_free(va_handle);
419
420error_va_alloc:
421	FREE(bo);
422	return NULL;
423}
424
425static void *
426radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
427{
428	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
429	int ret;
430	void *data;
431	ret = amdgpu_bo_cpu_map(bo->bo, &data);
432	if (ret)
433		return NULL;
434	return data;
435}
436
437static void
438radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
439{
440	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
441	amdgpu_bo_cpu_unmap(bo->bo);
442}
443
444static uint64_t
445radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
446				     uint64_t size, unsigned alignment)
447{
448	uint64_t vm_alignment = alignment;
449
450	/* Increase the VM alignment for faster address translation. */
451	if (size >= ws->info.pte_fragment_size)
452		vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
453
454	/* Gfx9: Increase the VM alignment to the most significant bit set
455	 * in the size for faster address translation.
456	 */
457	if (ws->info.chip_class >= GFX9) {
458		unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
459		uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
460
461		vm_alignment = MAX2(vm_alignment, msb_alignment);
462	}
463	return vm_alignment;
464}
465
466static struct radeon_winsys_bo *
467radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
468                               void *pointer,
469                               uint64_t size,
470			       unsigned priority)
471{
472	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
473	amdgpu_bo_handle buf_handle;
474	struct radv_amdgpu_winsys_bo *bo;
475	uint64_t va;
476	amdgpu_va_handle va_handle;
477	uint64_t vm_alignment;
478
479	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
480	if (!bo)
481		return NULL;
482
483	if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
484		goto error;
485
486	/* Using the optimal VM alignment also fixes GPU hangs for buffers that
487	 * are imported.
488	 */
489	vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
490							    ws->info.gart_page_size);
491
492	if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
493	                          size, vm_alignment, 0, &va, &va_handle,
494				  AMDGPU_VA_RANGE_HIGH))
495		goto error_va_alloc;
496
497	if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
498		goto error_va_map;
499
500	/* Initialize it */
501	bo->base.va = va;
502	bo->va_handle = va_handle;
503	bo->size = size;
504	bo->ref_count = 1;
505	bo->ws = ws;
506	bo->bo = buf_handle;
507	bo->initial_domain = RADEON_DOMAIN_GTT;
508	bo->priority = priority;
509
510	MAYBE_UNUSED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
511	assert(!r);
512
513	p_atomic_add(&ws->allocated_gtt,
514		     align64(bo->size, ws->info.gart_page_size));
515
516	radv_amdgpu_add_buffer_to_global_list(bo);
517	return (struct radeon_winsys_bo *)bo;
518
519error_va_map:
520	amdgpu_va_range_free(va_handle);
521
522error_va_alloc:
523	amdgpu_bo_free(buf_handle);
524
525error:
526	FREE(bo);
527	return NULL;
528}
529
530static struct radeon_winsys_bo *
531radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
532			      int fd, unsigned priority,
533			      unsigned *stride,
534			      unsigned *offset)
535{
536	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
537	struct radv_amdgpu_winsys_bo *bo;
538	uint64_t va;
539	amdgpu_va_handle va_handle;
540	enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
541	struct amdgpu_bo_import_result result = {0};
542	struct amdgpu_bo_info info = {0};
543	enum radeon_bo_domain initial = 0;
544	int r;
545	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
546	if (!bo)
547		return NULL;
548
549	r = amdgpu_bo_import(ws->dev, type, fd, &result);
550	if (r)
551		goto error;
552
553	r = amdgpu_bo_query_info(result.buf_handle, &info);
554	if (r)
555		goto error_query;
556
557	r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
558				  result.alloc_size, 1 << 20, 0, &va, &va_handle,
559				  AMDGPU_VA_RANGE_HIGH);
560	if (r)
561		goto error_query;
562
563	r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
564				 va, 0, AMDGPU_VA_OP_MAP);
565	if (r)
566		goto error_va_map;
567
568	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
569		initial |= RADEON_DOMAIN_VRAM;
570	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
571		initial |= RADEON_DOMAIN_GTT;
572
573	bo->bo = result.buf_handle;
574	bo->base.va = va;
575	bo->va_handle = va_handle;
576	bo->initial_domain = initial;
577	bo->size = result.alloc_size;
578	bo->is_shared = true;
579	bo->ws = ws;
580	bo->priority = priority;
581	bo->ref_count = 1;
582
583	r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
584	assert(!r);
585
586	if (bo->initial_domain & RADEON_DOMAIN_VRAM)
587		p_atomic_add(&ws->allocated_vram,
588			     align64(bo->size, ws->info.gart_page_size));
589	if (bo->initial_domain & RADEON_DOMAIN_GTT)
590		p_atomic_add(&ws->allocated_gtt,
591			     align64(bo->size, ws->info.gart_page_size));
592
593	radv_amdgpu_add_buffer_to_global_list(bo);
594	return (struct radeon_winsys_bo *)bo;
595error_va_map:
596	amdgpu_va_range_free(va_handle);
597
598error_query:
599	amdgpu_bo_free(result.buf_handle);
600
601error:
602	FREE(bo);
603	return NULL;
604}
605
606static bool
607radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
608			  struct radeon_winsys_bo *_bo,
609			  int *fd)
610{
611	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
612	enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
613	int r;
614	unsigned handle;
615	r = amdgpu_bo_export(bo->bo, type, &handle);
616	if (r)
617		return false;
618
619	*fd = (int)handle;
620	bo->is_shared = true;
621	return true;
622}
623
624static unsigned eg_tile_split(unsigned tile_split)
625{
626	switch (tile_split) {
627	case 0:     tile_split = 64;    break;
628	case 1:     tile_split = 128;   break;
629	case 2:     tile_split = 256;   break;
630	case 3:     tile_split = 512;   break;
631	default:
632	case 4:     tile_split = 1024;  break;
633	case 5:     tile_split = 2048;  break;
634	case 6:     tile_split = 4096;  break;
635	}
636	return tile_split;
637}
638
639static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
640{
641	switch (eg_tile_split) {
642	case 64:    return 0;
643	case 128:   return 1;
644	case 256:   return 2;
645	case 512:   return 3;
646	default:
647	case 1024:  return 4;
648	case 2048:  return 5;
649	case 4096:  return 6;
650	}
651}
652
653static void
654radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
655				   struct radeon_bo_metadata *md)
656{
657	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
658	struct amdgpu_bo_metadata metadata = {0};
659	uint32_t tiling_flags = 0;
660
661	if (bo->ws->info.chip_class >= GFX9) {
662		tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
663	} else {
664		if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
665			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
666		else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
667			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
668		else
669			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
670
671		tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
672		tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
673		tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
674		if (md->u.legacy.tile_split)
675			tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
676		tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
677		tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
678
679		if (md->u.legacy.scanout)
680			tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
681		else
682			tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
683	}
684
685	metadata.tiling_info = tiling_flags;
686	metadata.size_metadata = md->size_metadata;
687	memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
688
689	amdgpu_bo_set_metadata(bo->bo, &metadata);
690}
691
692static void
693radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo,
694                                   struct radeon_bo_metadata *md)
695{
696	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
697	struct amdgpu_bo_info info = {0};
698
699	int r = amdgpu_bo_query_info(bo->bo, &info);
700	if (r)
701		return;
702
703	uint64_t tiling_flags = info.metadata.tiling_info;
704
705	if (bo->ws->info.chip_class >= GFX9) {
706		md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
707	} else {
708		md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
709		md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
710
711		if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4)  /* 2D_TILED_THIN1 */
712			md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
713		else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
714			md->u.legacy.microtile = RADEON_LAYOUT_TILED;
715
716		md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
717		md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
718		md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
719		md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
720		md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
721		md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
722		md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
723	}
724
725	md->size_metadata = info.metadata.size_metadata;
726	memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
727}
728
729void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
730{
731	ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
732	ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
733	ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
734	ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
735	ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
736	ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
737	ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
738	ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
739	ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
740	ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
741}
742