10ed5401bSmrg/*
20ed5401bSmrg * Copyright 2022 Advanced Micro Devices, Inc.
30ed5401bSmrg *
40ed5401bSmrg * Permission is hereby granted, free of charge, to any person obtaining a
50ed5401bSmrg * copy of this software and associated documentation files (the "Software"),
60ed5401bSmrg * to deal in the Software without restriction, including without limitation
70ed5401bSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
80ed5401bSmrg * and/or sell copies of the Software, and to permit persons to whom the
90ed5401bSmrg * Software is furnished to do so, subject to the following conditions:
100ed5401bSmrg *
110ed5401bSmrg * The above copyright notice and this permission notice shall be included in
120ed5401bSmrg * all copies or substantial portions of the Software.
130ed5401bSmrg *
140ed5401bSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
150ed5401bSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
160ed5401bSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
170ed5401bSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
180ed5401bSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
190ed5401bSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
200ed5401bSmrg * OTHER DEALINGS IN THE SOFTWARE.
210ed5401bSmrg *
220ed5401bSmrg*/
230ed5401bSmrg
240ed5401bSmrg#include <unistd.h>
250ed5401bSmrg#include <stdio.h>
260ed5401bSmrg#include <stdlib.h>
270ed5401bSmrg#include <inttypes.h>
280ed5401bSmrg
290ed5401bSmrg#include "CUnit/Basic.h"
300ed5401bSmrg
310ed5401bSmrg#include "amdgpu_test.h"
320ed5401bSmrg#include "amdgpu_drm.h"
330ed5401bSmrg#include "amdgpu_internal.h"
340ed5401bSmrg
350ed5401bSmrg#define IB_SIZE 4096
360ed5401bSmrg#define MAX_RESOURCES 8
370ed5401bSmrg
380ed5401bSmrg#define DMA_SIZE 4097
390ed5401bSmrg#define DMA_DATA_BYTE 0xea
400ed5401bSmrg
410ed5401bSmrgstatic bool do_p2p;
420ed5401bSmrg
430ed5401bSmrgstatic amdgpu_device_handle executing_device_handle;
440ed5401bSmrgstatic uint32_t executing_device_major_version;
450ed5401bSmrgstatic uint32_t executing_device_minor_version;
460ed5401bSmrg
470ed5401bSmrgstatic amdgpu_device_handle peer_exporting_device_handle;
480ed5401bSmrgstatic uint32_t peer_exporting_device_major_version;
490ed5401bSmrgstatic uint32_t peer_exporting_device_minor_version;
500ed5401bSmrg
510ed5401bSmrgstatic amdgpu_context_handle context_handle;
520ed5401bSmrgstatic amdgpu_bo_handle ib_handle;
530ed5401bSmrgstatic uint32_t *ib_cpu;
540ed5401bSmrgstatic uint64_t ib_mc_address;
550ed5401bSmrgstatic amdgpu_va_handle ib_va_handle;
560ed5401bSmrgstatic uint32_t num_dword;
570ed5401bSmrg
580ed5401bSmrgstatic amdgpu_bo_handle resources[MAX_RESOURCES];
590ed5401bSmrgstatic unsigned num_resources;
600ed5401bSmrg
610ed5401bSmrgstatic uint8_t* reference_data;
620ed5401bSmrg
630ed5401bSmrgstatic void amdgpu_cp_dma_host_to_vram(void);
640ed5401bSmrgstatic void amdgpu_cp_dma_vram_to_host(void);
650ed5401bSmrgstatic void amdgpu_cp_dma_p2p_vram_to_vram(void);
660ed5401bSmrgstatic void amdgpu_cp_dma_p2p_host_to_vram(void);
670ed5401bSmrgstatic void amdgpu_cp_dma_p2p_vram_to_host(void);
680ed5401bSmrg
690ed5401bSmrg/**
700ed5401bSmrg * Tests in cp dma test suite
710ed5401bSmrg */
720ed5401bSmrgCU_TestInfo cp_dma_tests[] = {
730ed5401bSmrg	{ "CP DMA write Host to VRAM",  amdgpu_cp_dma_host_to_vram },
740ed5401bSmrg	{ "CP DMA write VRAM to Host",  amdgpu_cp_dma_vram_to_host },
750ed5401bSmrg
760ed5401bSmrg	{ "Peer to Peer CP DMA write VRAM to VRAM",  amdgpu_cp_dma_p2p_vram_to_vram },
770ed5401bSmrg	{ "Peer to Peer CP DMA write Host to VRAM",  amdgpu_cp_dma_p2p_host_to_vram },
780ed5401bSmrg	{ "Peer to Peer CP DMA write VRAM to Host",  amdgpu_cp_dma_p2p_vram_to_host },
790ed5401bSmrg	CU_TEST_INFO_NULL,
800ed5401bSmrg};
810ed5401bSmrg
820ed5401bSmrgstruct amdgpu_cp_dma_bo{
830ed5401bSmrg	amdgpu_bo_handle buf_handle;
840ed5401bSmrg	amdgpu_va_handle va_handle;
850ed5401bSmrg	uint64_t gpu_va;
860ed5401bSmrg	uint64_t size;
870ed5401bSmrg};
880ed5401bSmrg
890ed5401bSmrgstatic int allocate_bo_and_va(amdgpu_device_handle dev,
900ed5401bSmrg		uint64_t size, uint64_t alignment,
910ed5401bSmrg		uint32_t heap, uint64_t alloc_flags,
920ed5401bSmrg		struct amdgpu_cp_dma_bo *bo) {
930ed5401bSmrg	struct amdgpu_bo_alloc_request request = {};
940ed5401bSmrg	amdgpu_bo_handle buf_handle;
950ed5401bSmrg	amdgpu_va_handle va_handle;
960ed5401bSmrg	uint64_t vmc_addr;
970ed5401bSmrg	int r;
980ed5401bSmrg
990ed5401bSmrg	request.alloc_size = size;
1000ed5401bSmrg	request.phys_alignment = alignment;
1010ed5401bSmrg	request.preferred_heap = heap;
1020ed5401bSmrg	request.flags = alloc_flags;
1030ed5401bSmrg
1040ed5401bSmrg	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
1050ed5401bSmrg	if (r)
1060ed5401bSmrg		goto error_bo_alloc;
1070ed5401bSmrg
1080ed5401bSmrg	r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
1090ed5401bSmrg			size, alignment, 0,
1100ed5401bSmrg			&vmc_addr, &va_handle, 0);
1110ed5401bSmrg	if (r)
1120ed5401bSmrg		goto error_va_alloc;
1130ed5401bSmrg
1140ed5401bSmrg	r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr,
1150ed5401bSmrg						AMDGPU_VM_PAGE_READABLE |
1160ed5401bSmrg							AMDGPU_VM_PAGE_WRITEABLE |
1170ed5401bSmrg							AMDGPU_VM_PAGE_EXECUTABLE,
1180ed5401bSmrg						AMDGPU_VA_OP_MAP);
1190ed5401bSmrg	if (r)
1200ed5401bSmrg		goto error_va_map;
1210ed5401bSmrg
1220ed5401bSmrg	bo->buf_handle = buf_handle;
1230ed5401bSmrg	bo->va_handle = va_handle;
1240ed5401bSmrg	bo->gpu_va = vmc_addr;
1250ed5401bSmrg	bo->size = size;
1260ed5401bSmrg
1270ed5401bSmrg	return 0;
1280ed5401bSmrg
1290ed5401bSmrgerror_va_map:
1300ed5401bSmrg	amdgpu_bo_va_op(buf_handle, 0,
1310ed5401bSmrg			size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
1320ed5401bSmrg
1330ed5401bSmrgerror_va_alloc:
1340ed5401bSmrg	amdgpu_va_range_free(va_handle);
1350ed5401bSmrg
1360ed5401bSmrgerror_bo_alloc:
1370ed5401bSmrg	amdgpu_bo_free(buf_handle);
1380ed5401bSmrg
1390ed5401bSmrg	return r;
1400ed5401bSmrg}
1410ed5401bSmrg
1420ed5401bSmrgstatic int import_dma_buf_to_bo(amdgpu_device_handle dev,
1430ed5401bSmrg		int dmabuf_fd, struct amdgpu_cp_dma_bo *bo) {
1440ed5401bSmrg	amdgpu_va_handle va_handle;
1450ed5401bSmrg	uint64_t vmc_addr;
1460ed5401bSmrg	int r;
1470ed5401bSmrg	struct amdgpu_bo_import_result bo_import_result = {};
1480ed5401bSmrg
1490ed5401bSmrg	r = amdgpu_bo_import(dev, amdgpu_bo_handle_type_dma_buf_fd,
1500ed5401bSmrg			dmabuf_fd, &bo_import_result);
1510ed5401bSmrg	if (r)
1520ed5401bSmrg		goto error_bo_import;
1530ed5401bSmrg
1540ed5401bSmrg	r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
1550ed5401bSmrg				bo_import_result.alloc_size, 0, 0,
1560ed5401bSmrg				&vmc_addr, &va_handle, 0);
1570ed5401bSmrg	if (r)
1580ed5401bSmrg		goto error_va_alloc;
1590ed5401bSmrg
1600ed5401bSmrg	r = amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
1610ed5401bSmrg			bo_import_result.alloc_size, vmc_addr,
1620ed5401bSmrg			AMDGPU_VM_PAGE_READABLE |
1630ed5401bSmrg				AMDGPU_VM_PAGE_WRITEABLE |
1640ed5401bSmrg				AMDGPU_VM_PAGE_EXECUTABLE,
1650ed5401bSmrg			AMDGPU_VA_OP_MAP);
1660ed5401bSmrg	if (r)
1670ed5401bSmrg		goto error_va_map;
1680ed5401bSmrg
1690ed5401bSmrg	bo->buf_handle = bo_import_result.buf_handle;
1700ed5401bSmrg	bo->va_handle = va_handle;
1710ed5401bSmrg	bo->gpu_va = vmc_addr;
1720ed5401bSmrg	bo->size = bo_import_result.alloc_size;
1730ed5401bSmrg
1740ed5401bSmrg	return 0;
1750ed5401bSmrg
1760ed5401bSmrgerror_va_map:
1770ed5401bSmrg	amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
1780ed5401bSmrg			bo_import_result.alloc_size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
1790ed5401bSmrg
1800ed5401bSmrgerror_va_alloc:
1810ed5401bSmrg	amdgpu_va_range_free(va_handle);
1820ed5401bSmrg
1830ed5401bSmrgerror_bo_import:
1840ed5401bSmrg	amdgpu_bo_free(bo_import_result.buf_handle);
1850ed5401bSmrg
1860ed5401bSmrg	return r;
1870ed5401bSmrg}
1880ed5401bSmrg
1890ed5401bSmrgstatic int free_bo(struct amdgpu_cp_dma_bo bo) {
1900ed5401bSmrg	int r;
1910ed5401bSmrg	r = amdgpu_bo_va_op(bo.buf_handle, 0,
1920ed5401bSmrg			bo.size, bo.gpu_va, 0, AMDGPU_VA_OP_UNMAP);
1930ed5401bSmrg	if(r)
1940ed5401bSmrg		return r;
1950ed5401bSmrg
1960ed5401bSmrg	r = amdgpu_va_range_free(bo.va_handle);
1970ed5401bSmrg	if(r)
1980ed5401bSmrg		return r;
1990ed5401bSmrg
2000ed5401bSmrg	r = amdgpu_bo_free(bo.buf_handle);
2010ed5401bSmrg	if(r)
2020ed5401bSmrg		return r;
2030ed5401bSmrg
2040ed5401bSmrg	return 0;
2050ed5401bSmrg}
2060ed5401bSmrg
2070ed5401bSmrgstatic int submit_and_sync() {
2080ed5401bSmrg	struct amdgpu_cs_request ibs_request = {0};
2090ed5401bSmrg	struct amdgpu_cs_ib_info ib_info = {0};
2100ed5401bSmrg	struct amdgpu_cs_fence fence_status = {0};
2110ed5401bSmrg	uint32_t expired;
2120ed5401bSmrg	uint32_t family_id, chip_id, chip_rev;
2130ed5401bSmrg	unsigned gc_ip_type;
2140ed5401bSmrg	int r;
2150ed5401bSmrg
2160ed5401bSmrg	r = amdgpu_bo_list_create(executing_device_handle,
2170ed5401bSmrg			num_resources, resources,
2180ed5401bSmrg			NULL, &ibs_request.resources);
2190ed5401bSmrg	if (r)
2200ed5401bSmrg		return r;
2210ed5401bSmrg
2220ed5401bSmrg	family_id = executing_device_handle->info.family_id;
2230ed5401bSmrg	chip_id = executing_device_handle->info.chip_external_rev;
2240ed5401bSmrg	chip_rev = executing_device_handle->info.chip_rev;
2250ed5401bSmrg
2260ed5401bSmrg	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
2270ed5401bSmrg		AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
2280ed5401bSmrg
2290ed5401bSmrg	ib_info.ib_mc_address = ib_mc_address;
2300ed5401bSmrg	ib_info.size = num_dword;
2310ed5401bSmrg
2320ed5401bSmrg	ibs_request.ip_type = gc_ip_type;
2330ed5401bSmrg	ibs_request.number_of_ibs = 1;
2340ed5401bSmrg	ibs_request.ibs = &ib_info;
2350ed5401bSmrg	ibs_request.fence_info.handle = NULL;
2360ed5401bSmrg
2370ed5401bSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2380ed5401bSmrg	if (r)
2390ed5401bSmrg		return r;
2400ed5401bSmrg
2410ed5401bSmrg	r = amdgpu_bo_list_destroy(ibs_request.resources);
2420ed5401bSmrg	if (r)
2430ed5401bSmrg		return r;
2440ed5401bSmrg
2450ed5401bSmrg	fence_status.context = context_handle;
2460ed5401bSmrg	fence_status.ip_type = gc_ip_type;
2470ed5401bSmrg	fence_status.fence = ibs_request.seq_no;
2480ed5401bSmrg
2490ed5401bSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
2500ed5401bSmrg			AMDGPU_TIMEOUT_INFINITE,
2510ed5401bSmrg			0, &expired);
2520ed5401bSmrg	if (r)
2530ed5401bSmrg		return r;
2540ed5401bSmrg
2550ed5401bSmrg	return 0;
2560ed5401bSmrg}
2570ed5401bSmrg
2580ed5401bSmrgstatic void cp_dma_cmd(struct amdgpu_cp_dma_bo src_bo,
2590ed5401bSmrg		struct amdgpu_cp_dma_bo dst_bo) {
2600ed5401bSmrg	_Static_assert(DMA_SIZE < (1 << 26), "DMA size exceeds CP DMA maximium!");
2610ed5401bSmrg
2620ed5401bSmrg	ib_cpu[0] = 0xc0055000;
2630ed5401bSmrg	ib_cpu[1] = 0x80000000;
2640ed5401bSmrg	ib_cpu[2] = src_bo.gpu_va & 0x00000000ffffffff;
2650ed5401bSmrg	ib_cpu[3] = (src_bo.gpu_va & 0xffffffff00000000) >> 32;
2660ed5401bSmrg	ib_cpu[4] = dst_bo.gpu_va & 0x00000000ffffffff;
2670ed5401bSmrg	ib_cpu[5] = (dst_bo.gpu_va & 0xffffffff00000000) >> 32;
2680ed5401bSmrg	// size is read from the lower 26bits.
2690ed5401bSmrg	ib_cpu[6] = ((1 << 26) - 1) & DMA_SIZE;
2700ed5401bSmrg	ib_cpu[7] = 0xffff1000;
2710ed5401bSmrg
2720ed5401bSmrg	num_dword = 8;
2730ed5401bSmrg
2740ed5401bSmrg	resources[0] = src_bo.buf_handle;
2750ed5401bSmrg	resources[1] = dst_bo.buf_handle;
2760ed5401bSmrg	resources[2] = ib_handle;
2770ed5401bSmrg	num_resources = 3;
2780ed5401bSmrg}
2790ed5401bSmrg
2800ed5401bSmrgstatic void amdgpu_cp_dma(uint32_t src_heap, uint32_t dst_heap) {
2810ed5401bSmrg	int r;
2820ed5401bSmrg	struct amdgpu_cp_dma_bo src_bo = {0};
2830ed5401bSmrg	struct amdgpu_cp_dma_bo dst_bo = {0};
2840ed5401bSmrg	void *src_bo_cpu;
2850ed5401bSmrg	void *dst_bo_cpu;
2860ed5401bSmrg
2870ed5401bSmrg	/* allocate the src bo, set its data to DMA_DATA_BYTE */
2880ed5401bSmrg	r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
2890ed5401bSmrg			src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
2900ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
2910ed5401bSmrg
2920ed5401bSmrg	r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
2930ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
2940ed5401bSmrg	memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
2950ed5401bSmrg
2960ed5401bSmrg	r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
2970ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
2980ed5401bSmrg
2990ed5401bSmrg	/* allocate the dst bo and clear its content to all 0 */
3000ed5401bSmrg	r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
3010ed5401bSmrg			dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &dst_bo);
3020ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3030ed5401bSmrg
3040ed5401bSmrg	r = amdgpu_bo_cpu_map(dst_bo.buf_handle, (void **)&dst_bo_cpu);
3050ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3060ed5401bSmrg
3070ed5401bSmrg	_Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
3080ed5401bSmrg	memset(dst_bo_cpu, 0, DMA_SIZE);
3090ed5401bSmrg
3100ed5401bSmrg	/* record CP DMA command and dispatch the command */
3110ed5401bSmrg	cp_dma_cmd(src_bo, dst_bo);
3120ed5401bSmrg
3130ed5401bSmrg	r = submit_and_sync();
3140ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3150ed5401bSmrg
3160ed5401bSmrg	/* verify the dst bo is filled with DMA_DATA_BYTE */
3170ed5401bSmrg	CU_ASSERT_EQUAL(memcmp(dst_bo_cpu, reference_data, DMA_SIZE) == 0, true);
3180ed5401bSmrg
3190ed5401bSmrg	r = amdgpu_bo_cpu_unmap(dst_bo.buf_handle);
3200ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3210ed5401bSmrg
3220ed5401bSmrg	r = free_bo(src_bo);
3230ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3240ed5401bSmrg
3250ed5401bSmrg	r = free_bo(dst_bo);
3260ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3270ed5401bSmrg}
3280ed5401bSmrg
3290ed5401bSmrgstatic void amdgpu_cp_dma_p2p(uint32_t src_heap, uint32_t dst_heap) {
3300ed5401bSmrg	int r;
3310ed5401bSmrg	struct amdgpu_cp_dma_bo exported_bo = {0};
3320ed5401bSmrg	int dma_buf_fd;
3330ed5401bSmrg	int dma_buf_fd_dup;
3340ed5401bSmrg	struct amdgpu_cp_dma_bo src_bo = {0};
3350ed5401bSmrg	struct amdgpu_cp_dma_bo imported_dst_bo = {0};
3360ed5401bSmrg	void *exported_bo_cpu;
3370ed5401bSmrg	void *src_bo_cpu;
3380ed5401bSmrg
3390ed5401bSmrg	/* allocate a bo on the peer device and export it to dma-buf */
3400ed5401bSmrg	r = allocate_bo_and_va(peer_exporting_device_handle, DMA_SIZE, 4096,
3410ed5401bSmrg			src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &exported_bo);
3420ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3430ed5401bSmrg
3440ed5401bSmrg	/* map the exported bo and clear its content to 0 */
3450ed5401bSmrg	_Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
3460ed5401bSmrg	r = amdgpu_bo_cpu_map(exported_bo.buf_handle, (void **)&exported_bo_cpu);
3470ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3480ed5401bSmrg	memset(exported_bo_cpu, 0, DMA_SIZE);
3490ed5401bSmrg
3500ed5401bSmrg	r = amdgpu_bo_export(exported_bo.buf_handle,
3510ed5401bSmrg			amdgpu_bo_handle_type_dma_buf_fd, (uint32_t*)&dma_buf_fd);
3520ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3530ed5401bSmrg
3540ed5401bSmrg    // According to amdgpu_drm:
3550ed5401bSmrg	// "Buffer must be "imported" only using new "fd"
3560ed5401bSmrg	// (different from one used by "exporter")"
3570ed5401bSmrg	dma_buf_fd_dup = dup(dma_buf_fd);
3580ed5401bSmrg	r = close(dma_buf_fd);
3590ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3600ed5401bSmrg
3610ed5401bSmrg	/* import the dma-buf to the executing device, imported bo is the DMA destination */
3620ed5401bSmrg	r = import_dma_buf_to_bo(
3630ed5401bSmrg			executing_device_handle, dma_buf_fd_dup, &imported_dst_bo);
3640ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3650ed5401bSmrg
3660ed5401bSmrg	r = close(dma_buf_fd_dup);
3670ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3680ed5401bSmrg
3690ed5401bSmrg	/* allocate the src bo and set its content to DMA_DATA_BYTE */
3700ed5401bSmrg	r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
3710ed5401bSmrg			dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
3720ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3730ed5401bSmrg
3740ed5401bSmrg	r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
3750ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3760ed5401bSmrg
3770ed5401bSmrg	memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
3780ed5401bSmrg
3790ed5401bSmrg	r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
3800ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3810ed5401bSmrg
3820ed5401bSmrg	/* record CP DMA command and dispatch the command */
3830ed5401bSmrg	cp_dma_cmd(src_bo, imported_dst_bo);
3840ed5401bSmrg
3850ed5401bSmrg	r = submit_and_sync();
3860ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3870ed5401bSmrg
3880ed5401bSmrg	/* verify the bo from the peer device is filled with DMA_DATA_BYTE */
3890ed5401bSmrg	CU_ASSERT_EQUAL(memcmp(exported_bo_cpu, reference_data, DMA_SIZE) == 0, true);
3900ed5401bSmrg
3910ed5401bSmrg	r = amdgpu_bo_cpu_unmap(exported_bo.buf_handle);
3920ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3930ed5401bSmrg
3940ed5401bSmrg	r = free_bo(exported_bo);
3950ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3960ed5401bSmrg
3970ed5401bSmrg	r = free_bo(imported_dst_bo);
3980ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
3990ed5401bSmrg
4000ed5401bSmrg	r = free_bo(src_bo);
4010ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
4020ed5401bSmrg}
4030ed5401bSmrg
4040ed5401bSmrgstatic void amdgpu_cp_dma_host_to_vram(void) {
4050ed5401bSmrg	amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
4060ed5401bSmrg}
4070ed5401bSmrg
4080ed5401bSmrgstatic void amdgpu_cp_dma_vram_to_host(void) {
4090ed5401bSmrg	amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
4100ed5401bSmrg}
4110ed5401bSmrg
4120ed5401bSmrgstatic void amdgpu_cp_dma_p2p_vram_to_vram(void) {
4130ed5401bSmrg	amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM);
4140ed5401bSmrg}
4150ed5401bSmrg
4160ed5401bSmrgstatic void amdgpu_cp_dma_p2p_host_to_vram(void) {
4170ed5401bSmrg	amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
4180ed5401bSmrg}
4190ed5401bSmrg
4200ed5401bSmrgstatic void amdgpu_cp_dma_p2p_vram_to_host(void) {
4210ed5401bSmrg	amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
4220ed5401bSmrg}
4230ed5401bSmrg
4240ed5401bSmrgint suite_cp_dma_tests_init() {
4250ed5401bSmrg	int r;
4260ed5401bSmrg
4270ed5401bSmrg	r = amdgpu_device_initialize(drm_amdgpu[0],
4280ed5401bSmrg			&executing_device_major_version,
4290ed5401bSmrg			&executing_device_minor_version,
4300ed5401bSmrg			&executing_device_handle);
4310ed5401bSmrg	if (r)
4320ed5401bSmrg		return CUE_SINIT_FAILED;
4330ed5401bSmrg
4340ed5401bSmrg	r = amdgpu_cs_ctx_create(executing_device_handle, &context_handle);
4350ed5401bSmrg	if (r)
4360ed5401bSmrg		return CUE_SINIT_FAILED;
4370ed5401bSmrg
4380ed5401bSmrg	r = amdgpu_bo_alloc_and_map(executing_device_handle, IB_SIZE, 4096,
4390ed5401bSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
4400ed5401bSmrg					&ib_handle, (void**)&ib_cpu,
4410ed5401bSmrg					&ib_mc_address, &ib_va_handle);
4420ed5401bSmrg	if (r)
4430ed5401bSmrg		return CUE_SINIT_FAILED;
4440ed5401bSmrg
4450ed5401bSmrg	if (do_p2p) {
4460ed5401bSmrg		r = amdgpu_device_initialize(drm_amdgpu[1],
4470ed5401bSmrg				&peer_exporting_device_major_version,
4480ed5401bSmrg				&peer_exporting_device_minor_version,
4490ed5401bSmrg				&peer_exporting_device_handle);
4500ed5401bSmrg
4510ed5401bSmrg		if (r)
4520ed5401bSmrg			return CUE_SINIT_FAILED;
4530ed5401bSmrg	}
4540ed5401bSmrg
4550ed5401bSmrg	reference_data = (uint8_t*)malloc(DMA_SIZE);
4560ed5401bSmrg	if (!reference_data)
4570ed5401bSmrg		return CUE_SINIT_FAILED;
4580ed5401bSmrg	memset(reference_data, DMA_DATA_BYTE, DMA_SIZE);
4590ed5401bSmrg
4600ed5401bSmrg	return CUE_SUCCESS;
4610ed5401bSmrg}
4620ed5401bSmrg
4630ed5401bSmrgint suite_cp_dma_tests_clean() {
4640ed5401bSmrg	int r;
4650ed5401bSmrg
4660ed5401bSmrg	free(reference_data);
4670ed5401bSmrg
4680ed5401bSmrg	r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
4690ed5401bSmrg				 ib_mc_address, IB_SIZE);
4700ed5401bSmrg	if (r)
4710ed5401bSmrg		return CUE_SCLEAN_FAILED;
4720ed5401bSmrg
4730ed5401bSmrg	r = amdgpu_cs_ctx_free(context_handle);
4740ed5401bSmrg	if (r)
4750ed5401bSmrg		return CUE_SCLEAN_FAILED;
4760ed5401bSmrg
4770ed5401bSmrg	r = amdgpu_device_deinitialize(executing_device_handle);
4780ed5401bSmrg	if (r)
4790ed5401bSmrg		return CUE_SCLEAN_FAILED;
4800ed5401bSmrg
4810ed5401bSmrg	if (do_p2p) {
4820ed5401bSmrg		r = amdgpu_device_deinitialize(peer_exporting_device_handle);
4830ed5401bSmrg		if (r)
4840ed5401bSmrg			return CUE_SCLEAN_FAILED;
4850ed5401bSmrg	}
4860ed5401bSmrg
4870ed5401bSmrg	return CUE_SUCCESS;
4880ed5401bSmrg}
4890ed5401bSmrg
4900ed5401bSmrgCU_BOOL suite_cp_dma_tests_enable(void) {
4910ed5401bSmrg	int r = 0;
4920ed5401bSmrg
4930ed5401bSmrg	if (amdgpu_device_initialize(drm_amdgpu[0],
4940ed5401bSmrg			&executing_device_major_version,
4950ed5401bSmrg			&executing_device_minor_version,
4960ed5401bSmrg			&executing_device_handle))
4970ed5401bSmrg		return CU_FALSE;
4980ed5401bSmrg
4990ed5401bSmrg	if (!(executing_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
5000ed5401bSmrg			executing_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
5010ed5401bSmrg		printf("Testing device has ASIC that is not supported by CP-DMA test suite!\n");
5020ed5401bSmrg		return CU_FALSE;
5030ed5401bSmrg	}
5040ed5401bSmrg
5050ed5401bSmrg	if (amdgpu_device_deinitialize(executing_device_handle))
5060ed5401bSmrg		return CU_FALSE;
5070ed5401bSmrg
5080ed5401bSmrg	if (drm_amdgpu[1] >= 0) {
5090ed5401bSmrg		r = amdgpu_device_initialize(drm_amdgpu[1],
5100ed5401bSmrg				&peer_exporting_device_major_version,
5110ed5401bSmrg				&peer_exporting_device_minor_version,
5120ed5401bSmrg				&peer_exporting_device_handle);
5130ed5401bSmrg
5140ed5401bSmrg		if (r == 0 && (peer_exporting_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
5150ed5401bSmrg						peer_exporting_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
5160ed5401bSmrg			do_p2p = true;
5170ed5401bSmrg		}
5180ed5401bSmrg
5190ed5401bSmrg		if (r == 0 && amdgpu_device_deinitialize(peer_exporting_device_handle) != 0) {
5200ed5401bSmrg			printf("Deinitialize peer_exporting_device_handle failed!\n");
5210ed5401bSmrg			return CU_FALSE;
5220ed5401bSmrg		}
5230ed5401bSmrg	}
5240ed5401bSmrg
5250ed5401bSmrg	if (!do_p2p) {
5260ed5401bSmrg		amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to VRAM", CU_FALSE);
5270ed5401bSmrg		amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write Host to VRAM", CU_FALSE);
5280ed5401bSmrg		amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to Host", CU_FALSE);
5290ed5401bSmrg		printf("Peer device is not opened or has ASIC not supported by the suite, skip all Peer to Peer tests.\n");
5300ed5401bSmrg	}
5310ed5401bSmrg
5320ed5401bSmrg	return CU_TRUE;
5330ed5401bSmrg}
534