10ed5401bSmrg/* 20ed5401bSmrg * Copyright 2022 Advanced Micro Devices, Inc. 30ed5401bSmrg * 40ed5401bSmrg * Permission is hereby granted, free of charge, to any person obtaining a 50ed5401bSmrg * copy of this software and associated documentation files (the "Software"), 60ed5401bSmrg * to deal in the Software without restriction, including without limitation 70ed5401bSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 80ed5401bSmrg * and/or sell copies of the Software, and to permit persons to whom the 90ed5401bSmrg * Software is furnished to do so, subject to the following conditions: 100ed5401bSmrg * 110ed5401bSmrg * The above copyright notice and this permission notice shall be included in 120ed5401bSmrg * all copies or substantial portions of the Software. 130ed5401bSmrg * 140ed5401bSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 150ed5401bSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 160ed5401bSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 170ed5401bSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 180ed5401bSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 190ed5401bSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 200ed5401bSmrg * OTHER DEALINGS IN THE SOFTWARE. 210ed5401bSmrg * 220ed5401bSmrg*/ 230ed5401bSmrg 240ed5401bSmrg#include <unistd.h> 250ed5401bSmrg#include <stdio.h> 260ed5401bSmrg#include <stdlib.h> 270ed5401bSmrg#include <inttypes.h> 280ed5401bSmrg 290ed5401bSmrg#include "CUnit/Basic.h" 300ed5401bSmrg 310ed5401bSmrg#include "amdgpu_test.h" 320ed5401bSmrg#include "amdgpu_drm.h" 330ed5401bSmrg#include "amdgpu_internal.h" 340ed5401bSmrg 350ed5401bSmrg#define IB_SIZE 4096 360ed5401bSmrg#define MAX_RESOURCES 8 370ed5401bSmrg 380ed5401bSmrg#define DMA_SIZE 4097 390ed5401bSmrg#define DMA_DATA_BYTE 0xea 400ed5401bSmrg 410ed5401bSmrgstatic bool do_p2p; 420ed5401bSmrg 430ed5401bSmrgstatic amdgpu_device_handle executing_device_handle; 440ed5401bSmrgstatic uint32_t executing_device_major_version; 450ed5401bSmrgstatic uint32_t executing_device_minor_version; 460ed5401bSmrg 470ed5401bSmrgstatic amdgpu_device_handle peer_exporting_device_handle; 480ed5401bSmrgstatic uint32_t peer_exporting_device_major_version; 490ed5401bSmrgstatic uint32_t peer_exporting_device_minor_version; 500ed5401bSmrg 510ed5401bSmrgstatic amdgpu_context_handle context_handle; 520ed5401bSmrgstatic amdgpu_bo_handle ib_handle; 530ed5401bSmrgstatic uint32_t *ib_cpu; 540ed5401bSmrgstatic uint64_t ib_mc_address; 550ed5401bSmrgstatic amdgpu_va_handle ib_va_handle; 560ed5401bSmrgstatic uint32_t num_dword; 570ed5401bSmrg 580ed5401bSmrgstatic amdgpu_bo_handle resources[MAX_RESOURCES]; 590ed5401bSmrgstatic unsigned num_resources; 600ed5401bSmrg 610ed5401bSmrgstatic uint8_t* reference_data; 620ed5401bSmrg 630ed5401bSmrgstatic void amdgpu_cp_dma_host_to_vram(void); 640ed5401bSmrgstatic void amdgpu_cp_dma_vram_to_host(void); 650ed5401bSmrgstatic void amdgpu_cp_dma_p2p_vram_to_vram(void); 660ed5401bSmrgstatic void amdgpu_cp_dma_p2p_host_to_vram(void); 670ed5401bSmrgstatic void amdgpu_cp_dma_p2p_vram_to_host(void); 680ed5401bSmrg 690ed5401bSmrg/** 700ed5401bSmrg * Tests in cp dma test suite 710ed5401bSmrg */ 720ed5401bSmrgCU_TestInfo cp_dma_tests[] = { 730ed5401bSmrg { "CP DMA write Host to VRAM", amdgpu_cp_dma_host_to_vram }, 740ed5401bSmrg { "CP DMA write VRAM to Host", amdgpu_cp_dma_vram_to_host }, 750ed5401bSmrg 760ed5401bSmrg { "Peer to Peer CP DMA write VRAM to VRAM", amdgpu_cp_dma_p2p_vram_to_vram }, 770ed5401bSmrg { "Peer to Peer CP DMA write Host to VRAM", amdgpu_cp_dma_p2p_host_to_vram }, 780ed5401bSmrg { "Peer to Peer CP DMA write VRAM to Host", amdgpu_cp_dma_p2p_vram_to_host }, 790ed5401bSmrg CU_TEST_INFO_NULL, 800ed5401bSmrg}; 810ed5401bSmrg 820ed5401bSmrgstruct amdgpu_cp_dma_bo{ 830ed5401bSmrg amdgpu_bo_handle buf_handle; 840ed5401bSmrg amdgpu_va_handle va_handle; 850ed5401bSmrg uint64_t gpu_va; 860ed5401bSmrg uint64_t size; 870ed5401bSmrg}; 880ed5401bSmrg 890ed5401bSmrgstatic int allocate_bo_and_va(amdgpu_device_handle dev, 900ed5401bSmrg uint64_t size, uint64_t alignment, 910ed5401bSmrg uint32_t heap, uint64_t alloc_flags, 920ed5401bSmrg struct amdgpu_cp_dma_bo *bo) { 930ed5401bSmrg struct amdgpu_bo_alloc_request request = {}; 940ed5401bSmrg amdgpu_bo_handle buf_handle; 950ed5401bSmrg amdgpu_va_handle va_handle; 960ed5401bSmrg uint64_t vmc_addr; 970ed5401bSmrg int r; 980ed5401bSmrg 990ed5401bSmrg request.alloc_size = size; 1000ed5401bSmrg request.phys_alignment = alignment; 1010ed5401bSmrg request.preferred_heap = heap; 1020ed5401bSmrg request.flags = alloc_flags; 1030ed5401bSmrg 1040ed5401bSmrg r = amdgpu_bo_alloc(dev, &request, &buf_handle); 1050ed5401bSmrg if (r) 1060ed5401bSmrg goto error_bo_alloc; 1070ed5401bSmrg 1080ed5401bSmrg r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general, 1090ed5401bSmrg size, alignment, 0, 1100ed5401bSmrg &vmc_addr, &va_handle, 0); 1110ed5401bSmrg if (r) 1120ed5401bSmrg goto error_va_alloc; 1130ed5401bSmrg 1140ed5401bSmrg r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 1150ed5401bSmrg AMDGPU_VM_PAGE_READABLE | 1160ed5401bSmrg AMDGPU_VM_PAGE_WRITEABLE | 1170ed5401bSmrg AMDGPU_VM_PAGE_EXECUTABLE, 1180ed5401bSmrg AMDGPU_VA_OP_MAP); 1190ed5401bSmrg if (r) 1200ed5401bSmrg goto error_va_map; 1210ed5401bSmrg 1220ed5401bSmrg bo->buf_handle = buf_handle; 1230ed5401bSmrg bo->va_handle = va_handle; 1240ed5401bSmrg bo->gpu_va = vmc_addr; 1250ed5401bSmrg bo->size = size; 1260ed5401bSmrg 1270ed5401bSmrg return 0; 1280ed5401bSmrg 1290ed5401bSmrgerror_va_map: 1300ed5401bSmrg amdgpu_bo_va_op(buf_handle, 0, 1310ed5401bSmrg size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 1320ed5401bSmrg 1330ed5401bSmrgerror_va_alloc: 1340ed5401bSmrg amdgpu_va_range_free(va_handle); 1350ed5401bSmrg 1360ed5401bSmrgerror_bo_alloc: 1370ed5401bSmrg amdgpu_bo_free(buf_handle); 1380ed5401bSmrg 1390ed5401bSmrg return r; 1400ed5401bSmrg} 1410ed5401bSmrg 1420ed5401bSmrgstatic int import_dma_buf_to_bo(amdgpu_device_handle dev, 1430ed5401bSmrg int dmabuf_fd, struct amdgpu_cp_dma_bo *bo) { 1440ed5401bSmrg amdgpu_va_handle va_handle; 1450ed5401bSmrg uint64_t vmc_addr; 1460ed5401bSmrg int r; 1470ed5401bSmrg struct amdgpu_bo_import_result bo_import_result = {}; 1480ed5401bSmrg 1490ed5401bSmrg r = amdgpu_bo_import(dev, amdgpu_bo_handle_type_dma_buf_fd, 1500ed5401bSmrg dmabuf_fd, &bo_import_result); 1510ed5401bSmrg if (r) 1520ed5401bSmrg goto error_bo_import; 1530ed5401bSmrg 1540ed5401bSmrg r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general, 1550ed5401bSmrg bo_import_result.alloc_size, 0, 0, 1560ed5401bSmrg &vmc_addr, &va_handle, 0); 1570ed5401bSmrg if (r) 1580ed5401bSmrg goto error_va_alloc; 1590ed5401bSmrg 1600ed5401bSmrg r = amdgpu_bo_va_op(bo_import_result.buf_handle, 0, 1610ed5401bSmrg bo_import_result.alloc_size, vmc_addr, 1620ed5401bSmrg AMDGPU_VM_PAGE_READABLE | 1630ed5401bSmrg AMDGPU_VM_PAGE_WRITEABLE | 1640ed5401bSmrg AMDGPU_VM_PAGE_EXECUTABLE, 1650ed5401bSmrg AMDGPU_VA_OP_MAP); 1660ed5401bSmrg if (r) 1670ed5401bSmrg goto error_va_map; 1680ed5401bSmrg 1690ed5401bSmrg bo->buf_handle = bo_import_result.buf_handle; 1700ed5401bSmrg bo->va_handle = va_handle; 1710ed5401bSmrg bo->gpu_va = vmc_addr; 1720ed5401bSmrg bo->size = bo_import_result.alloc_size; 1730ed5401bSmrg 1740ed5401bSmrg return 0; 1750ed5401bSmrg 1760ed5401bSmrgerror_va_map: 1770ed5401bSmrg amdgpu_bo_va_op(bo_import_result.buf_handle, 0, 1780ed5401bSmrg bo_import_result.alloc_size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 1790ed5401bSmrg 1800ed5401bSmrgerror_va_alloc: 1810ed5401bSmrg amdgpu_va_range_free(va_handle); 1820ed5401bSmrg 1830ed5401bSmrgerror_bo_import: 1840ed5401bSmrg amdgpu_bo_free(bo_import_result.buf_handle); 1850ed5401bSmrg 1860ed5401bSmrg return r; 1870ed5401bSmrg} 1880ed5401bSmrg 1890ed5401bSmrgstatic int free_bo(struct amdgpu_cp_dma_bo bo) { 1900ed5401bSmrg int r; 1910ed5401bSmrg r = amdgpu_bo_va_op(bo.buf_handle, 0, 1920ed5401bSmrg bo.size, bo.gpu_va, 0, AMDGPU_VA_OP_UNMAP); 1930ed5401bSmrg if(r) 1940ed5401bSmrg return r; 1950ed5401bSmrg 1960ed5401bSmrg r = amdgpu_va_range_free(bo.va_handle); 1970ed5401bSmrg if(r) 1980ed5401bSmrg return r; 1990ed5401bSmrg 2000ed5401bSmrg r = amdgpu_bo_free(bo.buf_handle); 2010ed5401bSmrg if(r) 2020ed5401bSmrg return r; 2030ed5401bSmrg 2040ed5401bSmrg return 0; 2050ed5401bSmrg} 2060ed5401bSmrg 2070ed5401bSmrgstatic int submit_and_sync() { 2080ed5401bSmrg struct amdgpu_cs_request ibs_request = {0}; 2090ed5401bSmrg struct amdgpu_cs_ib_info ib_info = {0}; 2100ed5401bSmrg struct amdgpu_cs_fence fence_status = {0}; 2110ed5401bSmrg uint32_t expired; 2120ed5401bSmrg uint32_t family_id, chip_id, chip_rev; 2130ed5401bSmrg unsigned gc_ip_type; 2140ed5401bSmrg int r; 2150ed5401bSmrg 2160ed5401bSmrg r = amdgpu_bo_list_create(executing_device_handle, 2170ed5401bSmrg num_resources, resources, 2180ed5401bSmrg NULL, &ibs_request.resources); 2190ed5401bSmrg if (r) 2200ed5401bSmrg return r; 2210ed5401bSmrg 2220ed5401bSmrg family_id = executing_device_handle->info.family_id; 2230ed5401bSmrg chip_id = executing_device_handle->info.chip_external_rev; 2240ed5401bSmrg chip_rev = executing_device_handle->info.chip_rev; 2250ed5401bSmrg 2260ed5401bSmrg gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ? 2270ed5401bSmrg AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX; 2280ed5401bSmrg 2290ed5401bSmrg ib_info.ib_mc_address = ib_mc_address; 2300ed5401bSmrg ib_info.size = num_dword; 2310ed5401bSmrg 2320ed5401bSmrg ibs_request.ip_type = gc_ip_type; 2330ed5401bSmrg ibs_request.number_of_ibs = 1; 2340ed5401bSmrg ibs_request.ibs = &ib_info; 2350ed5401bSmrg ibs_request.fence_info.handle = NULL; 2360ed5401bSmrg 2370ed5401bSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2380ed5401bSmrg if (r) 2390ed5401bSmrg return r; 2400ed5401bSmrg 2410ed5401bSmrg r = amdgpu_bo_list_destroy(ibs_request.resources); 2420ed5401bSmrg if (r) 2430ed5401bSmrg return r; 2440ed5401bSmrg 2450ed5401bSmrg fence_status.context = context_handle; 2460ed5401bSmrg fence_status.ip_type = gc_ip_type; 2470ed5401bSmrg fence_status.fence = ibs_request.seq_no; 2480ed5401bSmrg 2490ed5401bSmrg r = amdgpu_cs_query_fence_status(&fence_status, 2500ed5401bSmrg AMDGPU_TIMEOUT_INFINITE, 2510ed5401bSmrg 0, &expired); 2520ed5401bSmrg if (r) 2530ed5401bSmrg return r; 2540ed5401bSmrg 2550ed5401bSmrg return 0; 2560ed5401bSmrg} 2570ed5401bSmrg 2580ed5401bSmrgstatic void cp_dma_cmd(struct amdgpu_cp_dma_bo src_bo, 2590ed5401bSmrg struct amdgpu_cp_dma_bo dst_bo) { 2600ed5401bSmrg _Static_assert(DMA_SIZE < (1 << 26), "DMA size exceeds CP DMA maximium!"); 2610ed5401bSmrg 2620ed5401bSmrg ib_cpu[0] = 0xc0055000; 2630ed5401bSmrg ib_cpu[1] = 0x80000000; 2640ed5401bSmrg ib_cpu[2] = src_bo.gpu_va & 0x00000000ffffffff; 2650ed5401bSmrg ib_cpu[3] = (src_bo.gpu_va & 0xffffffff00000000) >> 32; 2660ed5401bSmrg ib_cpu[4] = dst_bo.gpu_va & 0x00000000ffffffff; 2670ed5401bSmrg ib_cpu[5] = (dst_bo.gpu_va & 0xffffffff00000000) >> 32; 2680ed5401bSmrg // size is read from the lower 26bits. 2690ed5401bSmrg ib_cpu[6] = ((1 << 26) - 1) & DMA_SIZE; 2700ed5401bSmrg ib_cpu[7] = 0xffff1000; 2710ed5401bSmrg 2720ed5401bSmrg num_dword = 8; 2730ed5401bSmrg 2740ed5401bSmrg resources[0] = src_bo.buf_handle; 2750ed5401bSmrg resources[1] = dst_bo.buf_handle; 2760ed5401bSmrg resources[2] = ib_handle; 2770ed5401bSmrg num_resources = 3; 2780ed5401bSmrg} 2790ed5401bSmrg 2800ed5401bSmrgstatic void amdgpu_cp_dma(uint32_t src_heap, uint32_t dst_heap) { 2810ed5401bSmrg int r; 2820ed5401bSmrg struct amdgpu_cp_dma_bo src_bo = {0}; 2830ed5401bSmrg struct amdgpu_cp_dma_bo dst_bo = {0}; 2840ed5401bSmrg void *src_bo_cpu; 2850ed5401bSmrg void *dst_bo_cpu; 2860ed5401bSmrg 2870ed5401bSmrg /* allocate the src bo, set its data to DMA_DATA_BYTE */ 2880ed5401bSmrg r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096, 2890ed5401bSmrg src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo); 2900ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 2910ed5401bSmrg 2920ed5401bSmrg r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu); 2930ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 2940ed5401bSmrg memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE); 2950ed5401bSmrg 2960ed5401bSmrg r = amdgpu_bo_cpu_unmap(src_bo.buf_handle); 2970ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 2980ed5401bSmrg 2990ed5401bSmrg /* allocate the dst bo and clear its content to all 0 */ 3000ed5401bSmrg r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096, 3010ed5401bSmrg dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &dst_bo); 3020ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3030ed5401bSmrg 3040ed5401bSmrg r = amdgpu_bo_cpu_map(dst_bo.buf_handle, (void **)&dst_bo_cpu); 3050ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3060ed5401bSmrg 3070ed5401bSmrg _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!"); 3080ed5401bSmrg memset(dst_bo_cpu, 0, DMA_SIZE); 3090ed5401bSmrg 3100ed5401bSmrg /* record CP DMA command and dispatch the command */ 3110ed5401bSmrg cp_dma_cmd(src_bo, dst_bo); 3120ed5401bSmrg 3130ed5401bSmrg r = submit_and_sync(); 3140ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3150ed5401bSmrg 3160ed5401bSmrg /* verify the dst bo is filled with DMA_DATA_BYTE */ 3170ed5401bSmrg CU_ASSERT_EQUAL(memcmp(dst_bo_cpu, reference_data, DMA_SIZE) == 0, true); 3180ed5401bSmrg 3190ed5401bSmrg r = amdgpu_bo_cpu_unmap(dst_bo.buf_handle); 3200ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3210ed5401bSmrg 3220ed5401bSmrg r = free_bo(src_bo); 3230ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3240ed5401bSmrg 3250ed5401bSmrg r = free_bo(dst_bo); 3260ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3270ed5401bSmrg} 3280ed5401bSmrg 3290ed5401bSmrgstatic void amdgpu_cp_dma_p2p(uint32_t src_heap, uint32_t dst_heap) { 3300ed5401bSmrg int r; 3310ed5401bSmrg struct amdgpu_cp_dma_bo exported_bo = {0}; 3320ed5401bSmrg int dma_buf_fd; 3330ed5401bSmrg int dma_buf_fd_dup; 3340ed5401bSmrg struct amdgpu_cp_dma_bo src_bo = {0}; 3350ed5401bSmrg struct amdgpu_cp_dma_bo imported_dst_bo = {0}; 3360ed5401bSmrg void *exported_bo_cpu; 3370ed5401bSmrg void *src_bo_cpu; 3380ed5401bSmrg 3390ed5401bSmrg /* allocate a bo on the peer device and export it to dma-buf */ 3400ed5401bSmrg r = allocate_bo_and_va(peer_exporting_device_handle, DMA_SIZE, 4096, 3410ed5401bSmrg src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &exported_bo); 3420ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3430ed5401bSmrg 3440ed5401bSmrg /* map the exported bo and clear its content to 0 */ 3450ed5401bSmrg _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!"); 3460ed5401bSmrg r = amdgpu_bo_cpu_map(exported_bo.buf_handle, (void **)&exported_bo_cpu); 3470ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3480ed5401bSmrg memset(exported_bo_cpu, 0, DMA_SIZE); 3490ed5401bSmrg 3500ed5401bSmrg r = amdgpu_bo_export(exported_bo.buf_handle, 3510ed5401bSmrg amdgpu_bo_handle_type_dma_buf_fd, (uint32_t*)&dma_buf_fd); 3520ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3530ed5401bSmrg 3540ed5401bSmrg // According to amdgpu_drm: 3550ed5401bSmrg // "Buffer must be "imported" only using new "fd" 3560ed5401bSmrg // (different from one used by "exporter")" 3570ed5401bSmrg dma_buf_fd_dup = dup(dma_buf_fd); 3580ed5401bSmrg r = close(dma_buf_fd); 3590ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3600ed5401bSmrg 3610ed5401bSmrg /* import the dma-buf to the executing device, imported bo is the DMA destination */ 3620ed5401bSmrg r = import_dma_buf_to_bo( 3630ed5401bSmrg executing_device_handle, dma_buf_fd_dup, &imported_dst_bo); 3640ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3650ed5401bSmrg 3660ed5401bSmrg r = close(dma_buf_fd_dup); 3670ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3680ed5401bSmrg 3690ed5401bSmrg /* allocate the src bo and set its content to DMA_DATA_BYTE */ 3700ed5401bSmrg r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096, 3710ed5401bSmrg dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo); 3720ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3730ed5401bSmrg 3740ed5401bSmrg r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu); 3750ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3760ed5401bSmrg 3770ed5401bSmrg memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE); 3780ed5401bSmrg 3790ed5401bSmrg r = amdgpu_bo_cpu_unmap(src_bo.buf_handle); 3800ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3810ed5401bSmrg 3820ed5401bSmrg /* record CP DMA command and dispatch the command */ 3830ed5401bSmrg cp_dma_cmd(src_bo, imported_dst_bo); 3840ed5401bSmrg 3850ed5401bSmrg r = submit_and_sync(); 3860ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3870ed5401bSmrg 3880ed5401bSmrg /* verify the bo from the peer device is filled with DMA_DATA_BYTE */ 3890ed5401bSmrg CU_ASSERT_EQUAL(memcmp(exported_bo_cpu, reference_data, DMA_SIZE) == 0, true); 3900ed5401bSmrg 3910ed5401bSmrg r = amdgpu_bo_cpu_unmap(exported_bo.buf_handle); 3920ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3930ed5401bSmrg 3940ed5401bSmrg r = free_bo(exported_bo); 3950ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3960ed5401bSmrg 3970ed5401bSmrg r = free_bo(imported_dst_bo); 3980ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3990ed5401bSmrg 4000ed5401bSmrg r = free_bo(src_bo); 4010ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 4020ed5401bSmrg} 4030ed5401bSmrg 4040ed5401bSmrgstatic void amdgpu_cp_dma_host_to_vram(void) { 4050ed5401bSmrg amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM); 4060ed5401bSmrg} 4070ed5401bSmrg 4080ed5401bSmrgstatic void amdgpu_cp_dma_vram_to_host(void) { 4090ed5401bSmrg amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT); 4100ed5401bSmrg} 4110ed5401bSmrg 4120ed5401bSmrgstatic void amdgpu_cp_dma_p2p_vram_to_vram(void) { 4130ed5401bSmrg amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM); 4140ed5401bSmrg} 4150ed5401bSmrg 4160ed5401bSmrgstatic void amdgpu_cp_dma_p2p_host_to_vram(void) { 4170ed5401bSmrg amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM); 4180ed5401bSmrg} 4190ed5401bSmrg 4200ed5401bSmrgstatic void amdgpu_cp_dma_p2p_vram_to_host(void) { 4210ed5401bSmrg amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT); 4220ed5401bSmrg} 4230ed5401bSmrg 4240ed5401bSmrgint suite_cp_dma_tests_init() { 4250ed5401bSmrg int r; 4260ed5401bSmrg 4270ed5401bSmrg r = amdgpu_device_initialize(drm_amdgpu[0], 4280ed5401bSmrg &executing_device_major_version, 4290ed5401bSmrg &executing_device_minor_version, 4300ed5401bSmrg &executing_device_handle); 4310ed5401bSmrg if (r) 4320ed5401bSmrg return CUE_SINIT_FAILED; 4330ed5401bSmrg 4340ed5401bSmrg r = amdgpu_cs_ctx_create(executing_device_handle, &context_handle); 4350ed5401bSmrg if (r) 4360ed5401bSmrg return CUE_SINIT_FAILED; 4370ed5401bSmrg 4380ed5401bSmrg r = amdgpu_bo_alloc_and_map(executing_device_handle, IB_SIZE, 4096, 4390ed5401bSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 4400ed5401bSmrg &ib_handle, (void**)&ib_cpu, 4410ed5401bSmrg &ib_mc_address, &ib_va_handle); 4420ed5401bSmrg if (r) 4430ed5401bSmrg return CUE_SINIT_FAILED; 4440ed5401bSmrg 4450ed5401bSmrg if (do_p2p) { 4460ed5401bSmrg r = amdgpu_device_initialize(drm_amdgpu[1], 4470ed5401bSmrg &peer_exporting_device_major_version, 4480ed5401bSmrg &peer_exporting_device_minor_version, 4490ed5401bSmrg &peer_exporting_device_handle); 4500ed5401bSmrg 4510ed5401bSmrg if (r) 4520ed5401bSmrg return CUE_SINIT_FAILED; 4530ed5401bSmrg } 4540ed5401bSmrg 4550ed5401bSmrg reference_data = (uint8_t*)malloc(DMA_SIZE); 4560ed5401bSmrg if (!reference_data) 4570ed5401bSmrg return CUE_SINIT_FAILED; 4580ed5401bSmrg memset(reference_data, DMA_DATA_BYTE, DMA_SIZE); 4590ed5401bSmrg 4600ed5401bSmrg return CUE_SUCCESS; 4610ed5401bSmrg} 4620ed5401bSmrg 4630ed5401bSmrgint suite_cp_dma_tests_clean() { 4640ed5401bSmrg int r; 4650ed5401bSmrg 4660ed5401bSmrg free(reference_data); 4670ed5401bSmrg 4680ed5401bSmrg r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle, 4690ed5401bSmrg ib_mc_address, IB_SIZE); 4700ed5401bSmrg if (r) 4710ed5401bSmrg return CUE_SCLEAN_FAILED; 4720ed5401bSmrg 4730ed5401bSmrg r = amdgpu_cs_ctx_free(context_handle); 4740ed5401bSmrg if (r) 4750ed5401bSmrg return CUE_SCLEAN_FAILED; 4760ed5401bSmrg 4770ed5401bSmrg r = amdgpu_device_deinitialize(executing_device_handle); 4780ed5401bSmrg if (r) 4790ed5401bSmrg return CUE_SCLEAN_FAILED; 4800ed5401bSmrg 4810ed5401bSmrg if (do_p2p) { 4820ed5401bSmrg r = amdgpu_device_deinitialize(peer_exporting_device_handle); 4830ed5401bSmrg if (r) 4840ed5401bSmrg return CUE_SCLEAN_FAILED; 4850ed5401bSmrg } 4860ed5401bSmrg 4870ed5401bSmrg return CUE_SUCCESS; 4880ed5401bSmrg} 4890ed5401bSmrg 4900ed5401bSmrgCU_BOOL suite_cp_dma_tests_enable(void) { 4910ed5401bSmrg int r = 0; 4920ed5401bSmrg 4930ed5401bSmrg if (amdgpu_device_initialize(drm_amdgpu[0], 4940ed5401bSmrg &executing_device_major_version, 4950ed5401bSmrg &executing_device_minor_version, 4960ed5401bSmrg &executing_device_handle)) 4970ed5401bSmrg return CU_FALSE; 4980ed5401bSmrg 4990ed5401bSmrg if (!(executing_device_handle->info.family_id >= AMDGPU_FAMILY_AI && 5000ed5401bSmrg executing_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) { 5010ed5401bSmrg printf("Testing device has ASIC that is not supported by CP-DMA test suite!\n"); 5020ed5401bSmrg return CU_FALSE; 5030ed5401bSmrg } 5040ed5401bSmrg 5050ed5401bSmrg if (amdgpu_device_deinitialize(executing_device_handle)) 5060ed5401bSmrg return CU_FALSE; 5070ed5401bSmrg 5080ed5401bSmrg if (drm_amdgpu[1] >= 0) { 5090ed5401bSmrg r = amdgpu_device_initialize(drm_amdgpu[1], 5100ed5401bSmrg &peer_exporting_device_major_version, 5110ed5401bSmrg &peer_exporting_device_minor_version, 5120ed5401bSmrg &peer_exporting_device_handle); 5130ed5401bSmrg 5140ed5401bSmrg if (r == 0 && (peer_exporting_device_handle->info.family_id >= AMDGPU_FAMILY_AI && 5150ed5401bSmrg peer_exporting_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) { 5160ed5401bSmrg do_p2p = true; 5170ed5401bSmrg } 5180ed5401bSmrg 5190ed5401bSmrg if (r == 0 && amdgpu_device_deinitialize(peer_exporting_device_handle) != 0) { 5200ed5401bSmrg printf("Deinitialize peer_exporting_device_handle failed!\n"); 5210ed5401bSmrg return CU_FALSE; 5220ed5401bSmrg } 5230ed5401bSmrg } 5240ed5401bSmrg 5250ed5401bSmrg if (!do_p2p) { 5260ed5401bSmrg amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to VRAM", CU_FALSE); 5270ed5401bSmrg amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write Host to VRAM", CU_FALSE); 5280ed5401bSmrg amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to Host", CU_FALSE); 5290ed5401bSmrg printf("Peer device is not opened or has ASIC not supported by the suite, skip all Peer to Peer tests.\n"); 5300ed5401bSmrg } 5310ed5401bSmrg 5320ed5401bSmrg return CU_TRUE; 5330ed5401bSmrg} 534