ras_tests.c revision 88f8a8d2
15324fb0dSmrg/* 25324fb0dSmrg * Copyright 2017 Advanced Micro Devices, Inc. 35324fb0dSmrg * 45324fb0dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 55324fb0dSmrg * copy of this software and associated documentation files (the "Software"), 65324fb0dSmrg * to deal in the Software without restriction, including without limitation 75324fb0dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 85324fb0dSmrg * and/or sell copies of the Software, and to permit persons to whom the 95324fb0dSmrg * Software is furnished to do so, subject to the following conditions: 105324fb0dSmrg * 115324fb0dSmrg * The above copyright notice and this permission notice shall be included in 125324fb0dSmrg * all copies or substantial portions of the Software. 135324fb0dSmrg * 145324fb0dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 155324fb0dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 165324fb0dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 175324fb0dSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 185324fb0dSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 195324fb0dSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 205324fb0dSmrg * OTHER DEALINGS IN THE SOFTWARE. 215324fb0dSmrg * 225324fb0dSmrg*/ 235324fb0dSmrg 245324fb0dSmrg#include "CUnit/Basic.h" 255324fb0dSmrg 265324fb0dSmrg#include "amdgpu_test.h" 275324fb0dSmrg#include "amdgpu_drm.h" 285324fb0dSmrg#include "amdgpu_internal.h" 295324fb0dSmrg#include <unistd.h> 305324fb0dSmrg#include <fcntl.h> 315324fb0dSmrg#include <stdio.h> 325324fb0dSmrg#include "xf86drm.h" 335324fb0dSmrg 3488f8a8d2Smrg#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) 3588f8a8d2Smrg 365324fb0dSmrgconst char *ras_block_string[] = { 375324fb0dSmrg "umc", 385324fb0dSmrg "sdma", 395324fb0dSmrg "gfx", 405324fb0dSmrg "mmhub", 415324fb0dSmrg "athub", 425324fb0dSmrg "pcie_bif", 435324fb0dSmrg "hdp", 445324fb0dSmrg "xgmi_wafl", 455324fb0dSmrg "df", 465324fb0dSmrg "smn", 475324fb0dSmrg "sem", 485324fb0dSmrg "mp0", 495324fb0dSmrg "mp1", 505324fb0dSmrg "fuse", 515324fb0dSmrg}; 525324fb0dSmrg 535324fb0dSmrg#define ras_block_str(i) (ras_block_string[i]) 545324fb0dSmrg 555324fb0dSmrgenum amdgpu_ras_block { 565324fb0dSmrg AMDGPU_RAS_BLOCK__UMC = 0, 575324fb0dSmrg AMDGPU_RAS_BLOCK__SDMA, 585324fb0dSmrg AMDGPU_RAS_BLOCK__GFX, 595324fb0dSmrg AMDGPU_RAS_BLOCK__MMHUB, 605324fb0dSmrg AMDGPU_RAS_BLOCK__ATHUB, 615324fb0dSmrg AMDGPU_RAS_BLOCK__PCIE_BIF, 625324fb0dSmrg AMDGPU_RAS_BLOCK__HDP, 635324fb0dSmrg AMDGPU_RAS_BLOCK__XGMI_WAFL, 645324fb0dSmrg AMDGPU_RAS_BLOCK__DF, 655324fb0dSmrg AMDGPU_RAS_BLOCK__SMN, 665324fb0dSmrg AMDGPU_RAS_BLOCK__SEM, 675324fb0dSmrg AMDGPU_RAS_BLOCK__MP0, 685324fb0dSmrg AMDGPU_RAS_BLOCK__MP1, 695324fb0dSmrg AMDGPU_RAS_BLOCK__FUSE, 705324fb0dSmrg 715324fb0dSmrg AMDGPU_RAS_BLOCK__LAST 725324fb0dSmrg}; 735324fb0dSmrg 745324fb0dSmrg#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST 755324fb0dSmrg#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) 765324fb0dSmrg 7788f8a8d2Smrgenum amdgpu_ras_gfx_subblock { 7888f8a8d2Smrg /* CPC */ 7988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 8088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = 8188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, 8288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, 8388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, 8488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 8588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, 8688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, 8788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 8888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, 8988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = 9088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, 9188f8a8d2Smrg /* CPF */ 9288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, 9388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = 9488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, 9588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, 9688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_TAG, 9788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, 9888f8a8d2Smrg /* CPG */ 9988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, 10088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = 10188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, 10288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, 10388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_TAG, 10488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, 10588f8a8d2Smrg /* GDS */ 10688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, 10788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, 10888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 10988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 11088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 11188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 11288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = 11388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 11488f8a8d2Smrg /* SPI */ 11588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, 11688f8a8d2Smrg /* SQ */ 11788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, 11888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, 11988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, 12088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, 12188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, 12288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, 12388f8a8d2Smrg /* SQC (3 ranges) */ 12488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, 12588f8a8d2Smrg /* SQC range 0 */ 12688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = 12788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, 12888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 12988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, 13088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 13188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 13288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 13388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 13488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 13588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 13688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = 13788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 13888f8a8d2Smrg /* SQC range 1 */ 13988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, 14088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 14188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, 14288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 14388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 14488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 14588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 14688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 14788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 14888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 14988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 15088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = 15188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 15288f8a8d2Smrg /* SQC range 2 */ 15388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, 15488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 15588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, 15688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 15788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 15888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 15988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 16088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 16188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 16288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 16388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 16488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = 16588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 16688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = 16788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, 16888f8a8d2Smrg /* TA */ 16988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, 17088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = 17188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, 17288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, 17388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, 17488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, 17588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, 17688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, 17788f8a8d2Smrg /* TCA */ 17888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, 17988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = 18088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, 18188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, 18288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = 18388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, 18488f8a8d2Smrg /* TCC (5 sub-ranges) */ 18588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, 18688f8a8d2Smrg /* TCC range 0 */ 18788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = 18888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, 18988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = 19088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, 19188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 19288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 19388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 19488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 19588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 19688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 19788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 19888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = 19988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 20088f8a8d2Smrg /* TCC range 1 */ 20188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, 20288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = 20388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, 20488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 20588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = 20688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 20788f8a8d2Smrg /* TCC range 2 */ 20888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, 20988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = 21088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, 21188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 21288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 21388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 21488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 21588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, 21688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 21788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 21888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = 21988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 22088f8a8d2Smrg /* TCC range 3 */ 22188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, 22288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = 22388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, 22488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 22588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = 22688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 22788f8a8d2Smrg /* TCC range 4 */ 22888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, 22988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 23088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, 23188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 23288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = 23388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 23488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = 23588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, 23688f8a8d2Smrg /* TCI */ 23788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, 23888f8a8d2Smrg /* TCP */ 23988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, 24088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = 24188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, 24288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 24388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, 24488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, 24588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, 24688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 24788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 24888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = 24988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 25088f8a8d2Smrg /* TD */ 25188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, 25288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = 25388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, 25488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 25588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, 25688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, 25788f8a8d2Smrg /* EA (3 sub-ranges) */ 25888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, 25988f8a8d2Smrg /* EA range 0 */ 26088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = 26188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, 26288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = 26388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, 26488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 26588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 26688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 26788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 26888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 26988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 27088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 27188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = 27288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 27388f8a8d2Smrg /* EA range 1 */ 27488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, 27588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = 27688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, 27788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 27888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 27988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 28088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 28188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 28288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 28388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = 28488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 28588f8a8d2Smrg /* EA range 2 */ 28688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, 28788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = 28888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, 28988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, 29088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, 29188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, 29288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = 29388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, 29488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = 29588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, 29688f8a8d2Smrg /* UTC VM L2 bank */ 29788f8a8d2Smrg AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, 29888f8a8d2Smrg /* UTC VM walker */ 29988f8a8d2Smrg AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, 30088f8a8d2Smrg /* UTC ATC L2 2MB cache */ 30188f8a8d2Smrg AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 30288f8a8d2Smrg /* UTC ATC L2 4KB cache */ 30388f8a8d2Smrg AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 30488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_MAX 30588f8a8d2Smrg}; 30688f8a8d2Smrg 3075324fb0dSmrgenum amdgpu_ras_error_type { 30888f8a8d2Smrg AMDGPU_RAS_ERROR__NONE = 0, 30988f8a8d2Smrg AMDGPU_RAS_ERROR__PARITY = 1, 31088f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2, 31188f8a8d2Smrg AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4, 31288f8a8d2Smrg AMDGPU_RAS_ERROR__POISON = 8, 31388f8a8d2Smrg}; 31488f8a8d2Smrg 31588f8a8d2Smrgstruct ras_inject_test_config { 31688f8a8d2Smrg char name[64]; 31788f8a8d2Smrg char block[32]; 31888f8a8d2Smrg int sub_block; 31988f8a8d2Smrg enum amdgpu_ras_error_type type; 32088f8a8d2Smrg uint64_t address; 32188f8a8d2Smrg uint64_t value; 3225324fb0dSmrg}; 3235324fb0dSmrg 3245324fb0dSmrgstruct ras_common_if { 3255324fb0dSmrg enum amdgpu_ras_block block; 3265324fb0dSmrg enum amdgpu_ras_error_type type; 3275324fb0dSmrg uint32_t sub_block_index; 3285324fb0dSmrg char name[32]; 3295324fb0dSmrg}; 3305324fb0dSmrg 3315324fb0dSmrgstruct ras_inject_if { 3325324fb0dSmrg struct ras_common_if head; 3335324fb0dSmrg uint64_t address; 3345324fb0dSmrg uint64_t value; 3355324fb0dSmrg}; 3365324fb0dSmrg 3375324fb0dSmrgstruct ras_debug_if { 3385324fb0dSmrg union { 3395324fb0dSmrg struct ras_common_if head; 3405324fb0dSmrg struct ras_inject_if inject; 3415324fb0dSmrg }; 3425324fb0dSmrg int op; 3435324fb0dSmrg}; 3445324fb0dSmrg/* for now, only umc, gfx, sdma has implemented. */ 34588f8a8d2Smrg#define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\ 34688f8a8d2Smrg (1 << AMDGPU_RAS_BLOCK__GFX)) 34788f8a8d2Smrg#define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\ 34888f8a8d2Smrg (1 << AMDGPU_RAS_BLOCK__GFX)) 3495324fb0dSmrg#define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\ 3505324fb0dSmrg (1 << AMDGPU_RAS_BLOCK__SDMA) |\ 3515324fb0dSmrg (1 << AMDGPU_RAS_BLOCK__GFX)) 3525324fb0dSmrg 3535324fb0dSmrgstatic uint32_t ras_block_mask_inject = DEFAULT_RAS_BLOCK_MASK_INJECT; 3545324fb0dSmrgstatic uint32_t ras_block_mask_query = DEFAULT_RAS_BLOCK_MASK_INJECT; 3555324fb0dSmrgstatic uint32_t ras_block_mask_basic = DEFAULT_RAS_BLOCK_MASK_BASIC; 3565324fb0dSmrg 3575324fb0dSmrgstruct ras_test_mask { 3585324fb0dSmrg uint32_t inject_mask; 3595324fb0dSmrg uint32_t query_mask; 3605324fb0dSmrg uint32_t basic_mask; 3615324fb0dSmrg}; 3625324fb0dSmrg 3635324fb0dSmrgstruct amdgpu_ras_data { 3645324fb0dSmrg amdgpu_device_handle device_handle; 3655324fb0dSmrg uint32_t id; 3665324fb0dSmrg uint32_t capability; 3675324fb0dSmrg struct ras_test_mask test_mask; 3685324fb0dSmrg}; 3695324fb0dSmrg 3705324fb0dSmrg/* all devices who has ras supported */ 3715324fb0dSmrgstatic struct amdgpu_ras_data devices[MAX_CARDS_SUPPORTED]; 3725324fb0dSmrgstatic int devices_count; 3735324fb0dSmrg 3745324fb0dSmrgstruct ras_DID_test_mask{ 3755324fb0dSmrg uint16_t device_id; 3765324fb0dSmrg uint16_t revision_id; 3775324fb0dSmrg struct ras_test_mask test_mask; 3785324fb0dSmrg}; 3795324fb0dSmrg 3805324fb0dSmrg/* white list for inject test. */ 3815324fb0dSmrg#define RAS_BLOCK_MASK_ALL {\ 3825324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_INJECT,\ 3835324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_QUERY,\ 3845324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_BASIC\ 3855324fb0dSmrg} 3865324fb0dSmrg 3875324fb0dSmrg#define RAS_BLOCK_MASK_QUERY_BASIC {\ 3885324fb0dSmrg 0,\ 3895324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_QUERY,\ 3905324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_BASIC\ 3915324fb0dSmrg} 3925324fb0dSmrg 39388f8a8d2Smrgstatic const struct ras_inject_test_config umc_ras_inject_test[] = { 39488f8a8d2Smrg {"ras_umc.1.0", "umc", 0, AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 39588f8a8d2Smrg}; 39688f8a8d2Smrg 39788f8a8d2Smrgstatic const struct ras_inject_test_config gfx_ras_inject_test[] = { 39888f8a8d2Smrg {"ras_gfx.2.0", "gfx", AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, 39988f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 40088f8a8d2Smrg {"ras_gfx.2.1", "gfx", AMDGPU_RAS_BLOCK__GFX_CPF_TAG, 40188f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 40288f8a8d2Smrg {"ras_gfx.2.2", "gfx", AMDGPU_RAS_BLOCK__GFX_CPG_TAG, 40388f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 40488f8a8d2Smrg {"ras_gfx.2.3", "gfx", AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, 40588f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 40688f8a8d2Smrg {"ras_gfx.2.4", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 40788f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 40888f8a8d2Smrg {"ras_gfx.2.5", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM, 40988f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41088f8a8d2Smrg {"ras_gfx.2.6", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM, 41188f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41288f8a8d2Smrg {"ras_gfx.2.7", "gfx", AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO, 41388f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41488f8a8d2Smrg {"ras_gfx.2.8", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA, 41588f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41688f8a8d2Smrg {"ras_gfx.2.9", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 41788f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41888f8a8d2Smrg {"ras_gfx.2.10", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 41988f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42088f8a8d2Smrg {"ras_gfx.2.11", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 42188f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42288f8a8d2Smrg {"ras_gfx.2.12", "gfx", AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM, 42388f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42488f8a8d2Smrg {"ras_gfx.2.13", "gfx", AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO, 42588f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42688f8a8d2Smrg {"ras_gfx.2.14", "gfx", AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM, 42788f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42888f8a8d2Smrg}; 42988f8a8d2Smrg 4305324fb0dSmrgstatic const struct ras_DID_test_mask ras_DID_array[] = { 4315324fb0dSmrg {0x66a1, 0x00, RAS_BLOCK_MASK_ALL}, 4325324fb0dSmrg {0x66a1, 0x01, RAS_BLOCK_MASK_ALL}, 4335324fb0dSmrg {0x66a1, 0x04, RAS_BLOCK_MASK_ALL}, 4345324fb0dSmrg}; 4355324fb0dSmrg 43688f8a8d2Smrgstatic uint32_t amdgpu_ras_find_block_id_by_name(const char *name) 43788f8a8d2Smrg{ 43888f8a8d2Smrg int i; 43988f8a8d2Smrg 44088f8a8d2Smrg for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) { 44188f8a8d2Smrg if (strcmp(name, ras_block_string[i]) == 0) 44288f8a8d2Smrg return i; 44388f8a8d2Smrg } 44488f8a8d2Smrg 44588f8a8d2Smrg return ARRAY_SIZE(ras_block_string); 44688f8a8d2Smrg} 44788f8a8d2Smrg 44888f8a8d2Smrgstatic char *amdgpu_ras_get_error_type_id(enum amdgpu_ras_error_type type) 44988f8a8d2Smrg{ 45088f8a8d2Smrg switch (type) { 45188f8a8d2Smrg case AMDGPU_RAS_ERROR__PARITY: 45288f8a8d2Smrg return "parity"; 45388f8a8d2Smrg case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE: 45488f8a8d2Smrg return "single_correctable"; 45588f8a8d2Smrg case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE: 45688f8a8d2Smrg return "multi_uncorrectable"; 45788f8a8d2Smrg case AMDGPU_RAS_ERROR__POISON: 45888f8a8d2Smrg return "poison"; 45988f8a8d2Smrg case AMDGPU_RAS_ERROR__NONE: 46088f8a8d2Smrg default: 46188f8a8d2Smrg return NULL; 46288f8a8d2Smrg } 46388f8a8d2Smrg} 46488f8a8d2Smrg 4655324fb0dSmrgstatic struct ras_test_mask amdgpu_ras_get_test_mask(drmDevicePtr device) 4665324fb0dSmrg{ 4675324fb0dSmrg int i; 4685324fb0dSmrg static struct ras_test_mask default_test_mask = RAS_BLOCK_MASK_QUERY_BASIC; 4695324fb0dSmrg 4705324fb0dSmrg for (i = 0; i < sizeof(ras_DID_array) / sizeof(ras_DID_array[0]); i++) { 4715324fb0dSmrg if (ras_DID_array[i].device_id == device->deviceinfo.pci->device_id && 4725324fb0dSmrg ras_DID_array[i].revision_id == device->deviceinfo.pci->revision_id) 4735324fb0dSmrg return ras_DID_array[i].test_mask; 4745324fb0dSmrg } 4755324fb0dSmrg return default_test_mask; 4765324fb0dSmrg} 4775324fb0dSmrg 4785324fb0dSmrgstatic uint32_t amdgpu_ras_lookup_capability(amdgpu_device_handle device_handle) 4795324fb0dSmrg{ 4805324fb0dSmrg union { 4815324fb0dSmrg uint64_t feature_mask; 4825324fb0dSmrg struct { 4835324fb0dSmrg uint32_t enabled_features; 4845324fb0dSmrg uint32_t supported_features; 4855324fb0dSmrg }; 4865324fb0dSmrg } features = { 0 }; 4875324fb0dSmrg int ret; 4885324fb0dSmrg 4895324fb0dSmrg ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES, 4905324fb0dSmrg sizeof(features), &features); 4915324fb0dSmrg if (ret) 4925324fb0dSmrg return 0; 4935324fb0dSmrg 4945324fb0dSmrg return features.supported_features; 4955324fb0dSmrg} 4965324fb0dSmrg 4975324fb0dSmrgstatic int get_file_contents(char *file, char *buf, int size); 4985324fb0dSmrg 4995324fb0dSmrgstatic int amdgpu_ras_lookup_id(drmDevicePtr device) 5005324fb0dSmrg{ 5015324fb0dSmrg char path[1024]; 5025324fb0dSmrg char str[128]; 5035324fb0dSmrg drmPciBusInfo info; 5045324fb0dSmrg int i; 5055324fb0dSmrg int ret; 5065324fb0dSmrg 5075324fb0dSmrg for (i = 0; i < MAX_CARDS_SUPPORTED; i++) { 5085324fb0dSmrg memset(str, 0, sizeof(str)); 5095324fb0dSmrg memset(&info, 0, sizeof(info)); 5105324fb0dSmrg sprintf(path, "/sys/kernel/debug/dri/%d/name", i); 5115324fb0dSmrg if (get_file_contents(path, str, sizeof(str)) <= 0) 5125324fb0dSmrg continue; 5135324fb0dSmrg 5145324fb0dSmrg ret = sscanf(str, "amdgpu dev=%04hx:%02hhx:%02hhx.%01hhx", 5155324fb0dSmrg &info.domain, &info.bus, &info.dev, &info.func); 5165324fb0dSmrg if (ret != 4) 5175324fb0dSmrg continue; 5185324fb0dSmrg 5195324fb0dSmrg if (memcmp(&info, device->businfo.pci, sizeof(info)) == 0) 5205324fb0dSmrg return i; 5215324fb0dSmrg } 5225324fb0dSmrg return -1; 5235324fb0dSmrg} 5245324fb0dSmrg 5255324fb0dSmrgCU_BOOL suite_ras_tests_enable(void) 5265324fb0dSmrg{ 5275324fb0dSmrg amdgpu_device_handle device_handle; 5285324fb0dSmrg uint32_t major_version; 5295324fb0dSmrg uint32_t minor_version; 5305324fb0dSmrg int i; 5315324fb0dSmrg drmDevicePtr device; 5325324fb0dSmrg 5335324fb0dSmrg for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) { 5345324fb0dSmrg if (amdgpu_device_initialize(drm_amdgpu[i], &major_version, 5355324fb0dSmrg &minor_version, &device_handle)) 5365324fb0dSmrg continue; 5375324fb0dSmrg 5385324fb0dSmrg if (drmGetDevice2(drm_amdgpu[i], 5395324fb0dSmrg DRM_DEVICE_GET_PCI_REVISION, 5405324fb0dSmrg &device)) 5415324fb0dSmrg continue; 5425324fb0dSmrg 5435324fb0dSmrg if (device->bustype == DRM_BUS_PCI && 5445324fb0dSmrg amdgpu_ras_lookup_capability(device_handle)) { 5455324fb0dSmrg amdgpu_device_deinitialize(device_handle); 5465324fb0dSmrg return CU_TRUE; 5475324fb0dSmrg } 5485324fb0dSmrg 5495324fb0dSmrg if (amdgpu_device_deinitialize(device_handle)) 5505324fb0dSmrg continue; 5515324fb0dSmrg } 5525324fb0dSmrg 5535324fb0dSmrg return CU_FALSE; 5545324fb0dSmrg} 5555324fb0dSmrg 5565324fb0dSmrgint suite_ras_tests_init(void) 5575324fb0dSmrg{ 5585324fb0dSmrg drmDevicePtr device; 5595324fb0dSmrg amdgpu_device_handle device_handle; 5605324fb0dSmrg uint32_t major_version; 5615324fb0dSmrg uint32_t minor_version; 5625324fb0dSmrg uint32_t capability; 5635324fb0dSmrg struct ras_test_mask test_mask; 5645324fb0dSmrg int id; 5655324fb0dSmrg int i; 5665324fb0dSmrg int r; 5675324fb0dSmrg 5685324fb0dSmrg for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) { 5695324fb0dSmrg r = amdgpu_device_initialize(drm_amdgpu[i], &major_version, 5705324fb0dSmrg &minor_version, &device_handle); 5715324fb0dSmrg if (r) 5725324fb0dSmrg continue; 5735324fb0dSmrg 5745324fb0dSmrg if (drmGetDevice2(drm_amdgpu[i], 5755324fb0dSmrg DRM_DEVICE_GET_PCI_REVISION, 5765324fb0dSmrg &device)) { 5775324fb0dSmrg amdgpu_device_deinitialize(device_handle); 5785324fb0dSmrg continue; 5795324fb0dSmrg } 5805324fb0dSmrg 5815324fb0dSmrg if (device->bustype != DRM_BUS_PCI) { 5825324fb0dSmrg amdgpu_device_deinitialize(device_handle); 5835324fb0dSmrg continue; 5845324fb0dSmrg } 5855324fb0dSmrg 5865324fb0dSmrg capability = amdgpu_ras_lookup_capability(device_handle); 5875324fb0dSmrg if (capability == 0) { 5885324fb0dSmrg amdgpu_device_deinitialize(device_handle); 5895324fb0dSmrg continue; 5905324fb0dSmrg 5915324fb0dSmrg } 5925324fb0dSmrg 5935324fb0dSmrg id = amdgpu_ras_lookup_id(device); 5945324fb0dSmrg if (id == -1) { 5955324fb0dSmrg amdgpu_device_deinitialize(device_handle); 5965324fb0dSmrg continue; 5975324fb0dSmrg } 5985324fb0dSmrg 5995324fb0dSmrg test_mask = amdgpu_ras_get_test_mask(device); 6005324fb0dSmrg 6015324fb0dSmrg devices[devices_count++] = (struct amdgpu_ras_data) { 6025324fb0dSmrg device_handle, id, capability, test_mask, 6035324fb0dSmrg }; 6045324fb0dSmrg } 6055324fb0dSmrg 6065324fb0dSmrg if (devices_count == 0) 6075324fb0dSmrg return CUE_SINIT_FAILED; 6085324fb0dSmrg 6095324fb0dSmrg return CUE_SUCCESS; 6105324fb0dSmrg} 6115324fb0dSmrg 6125324fb0dSmrgint suite_ras_tests_clean(void) 6135324fb0dSmrg{ 6145324fb0dSmrg int r; 6155324fb0dSmrg int i; 6165324fb0dSmrg int ret = CUE_SUCCESS; 6175324fb0dSmrg 6185324fb0dSmrg for (i = 0; i < devices_count; i++) { 6195324fb0dSmrg r = amdgpu_device_deinitialize(devices[i].device_handle); 6205324fb0dSmrg if (r) 6215324fb0dSmrg ret = CUE_SCLEAN_FAILED; 6225324fb0dSmrg } 6235324fb0dSmrg return ret; 6245324fb0dSmrg} 6255324fb0dSmrg 6265324fb0dSmrgstatic void amdgpu_ras_disable_test(void); 6275324fb0dSmrgstatic void amdgpu_ras_enable_test(void); 6285324fb0dSmrgstatic void amdgpu_ras_inject_test(void); 6295324fb0dSmrgstatic void amdgpu_ras_query_test(void); 6305324fb0dSmrgstatic void amdgpu_ras_basic_test(void); 6315324fb0dSmrg 6325324fb0dSmrgCU_TestInfo ras_tests[] = { 6335324fb0dSmrg { "ras basic test", amdgpu_ras_basic_test }, 6345324fb0dSmrg { "ras query test", amdgpu_ras_query_test }, 6355324fb0dSmrg { "ras inject test", amdgpu_ras_inject_test }, 6365324fb0dSmrg { "ras disable test", amdgpu_ras_disable_test }, 6375324fb0dSmrg#if 0 6385324fb0dSmrg { "ras enable test", amdgpu_ras_enable_test }, 6395324fb0dSmrg#endif 6405324fb0dSmrg CU_TEST_INFO_NULL, 6415324fb0dSmrg}; 6425324fb0dSmrg 6435324fb0dSmrg//helpers 6445324fb0dSmrg 6455324fb0dSmrgstatic int test_card; 6465324fb0dSmrgstatic char sysfs_path[1024]; 6475324fb0dSmrgstatic char debugfs_path[1024]; 6485324fb0dSmrgstatic uint32_t ras_mask; 6495324fb0dSmrgstatic amdgpu_device_handle device_handle; 6505324fb0dSmrg 6515324fb0dSmrgstatic int set_test_card(int card) 6525324fb0dSmrg{ 6535324fb0dSmrg int i; 6545324fb0dSmrg 6555324fb0dSmrg test_card = card; 6565324fb0dSmrg sprintf(sysfs_path, "/sys/class/drm/card%d/device/ras/", devices[card].id); 6575324fb0dSmrg sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/ras/", devices[card].id); 6585324fb0dSmrg ras_mask = devices[card].capability; 6595324fb0dSmrg device_handle = devices[card].device_handle; 6605324fb0dSmrg ras_block_mask_inject = devices[card].test_mask.inject_mask; 6615324fb0dSmrg ras_block_mask_query = devices[card].test_mask.query_mask; 6625324fb0dSmrg ras_block_mask_basic = devices[card].test_mask.basic_mask; 6635324fb0dSmrg 6645324fb0dSmrg return 0; 6655324fb0dSmrg} 6665324fb0dSmrg 6675324fb0dSmrgstatic const char *get_ras_sysfs_root(void) 6685324fb0dSmrg{ 6695324fb0dSmrg return sysfs_path; 6705324fb0dSmrg} 6715324fb0dSmrg 6725324fb0dSmrgstatic const char *get_ras_debugfs_root(void) 6735324fb0dSmrg{ 6745324fb0dSmrg return debugfs_path; 6755324fb0dSmrg} 6765324fb0dSmrg 6775324fb0dSmrgstatic int set_file_contents(char *file, char *buf, int size) 6785324fb0dSmrg{ 6795324fb0dSmrg int n, fd; 6805324fb0dSmrg fd = open(file, O_WRONLY); 6815324fb0dSmrg if (fd == -1) 6825324fb0dSmrg return -1; 6835324fb0dSmrg n = write(fd, buf, size); 6845324fb0dSmrg close(fd); 6855324fb0dSmrg return n; 6865324fb0dSmrg} 6875324fb0dSmrg 6885324fb0dSmrgstatic int get_file_contents(char *file, char *buf, int size) 6895324fb0dSmrg{ 6905324fb0dSmrg int n, fd; 6915324fb0dSmrg fd = open(file, O_RDONLY); 6925324fb0dSmrg if (fd == -1) 6935324fb0dSmrg return -1; 6945324fb0dSmrg n = read(fd, buf, size); 6955324fb0dSmrg close(fd); 6965324fb0dSmrg return n; 6975324fb0dSmrg} 6985324fb0dSmrg 6995324fb0dSmrgstatic int is_file_ok(char *file, int flags) 7005324fb0dSmrg{ 7015324fb0dSmrg int fd; 7025324fb0dSmrg 7035324fb0dSmrg fd = open(file, flags); 7045324fb0dSmrg if (fd == -1) 7055324fb0dSmrg return -1; 7065324fb0dSmrg close(fd); 7075324fb0dSmrg return 0; 7085324fb0dSmrg} 7095324fb0dSmrg 7105324fb0dSmrgstatic int amdgpu_ras_is_feature_enabled(enum amdgpu_ras_block block) 7115324fb0dSmrg{ 7125324fb0dSmrg uint32_t feature_mask; 7135324fb0dSmrg int ret; 7145324fb0dSmrg 7155324fb0dSmrg ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES, 7165324fb0dSmrg sizeof(feature_mask), &feature_mask); 7175324fb0dSmrg if (ret) 7185324fb0dSmrg return -1; 7195324fb0dSmrg 7205324fb0dSmrg return (1 << block) & feature_mask; 7215324fb0dSmrg} 7225324fb0dSmrg 7235324fb0dSmrgstatic int amdgpu_ras_is_feature_supported(enum amdgpu_ras_block block) 7245324fb0dSmrg{ 7255324fb0dSmrg return (1 << block) & ras_mask; 7265324fb0dSmrg} 7275324fb0dSmrg 7285324fb0dSmrgstatic int amdgpu_ras_invoke(struct ras_debug_if *data) 7295324fb0dSmrg{ 7305324fb0dSmrg char path[1024]; 7315324fb0dSmrg int ret; 7325324fb0dSmrg 7335324fb0dSmrg sprintf(path, "%s%s", get_ras_debugfs_root(), "ras_ctrl"); 7345324fb0dSmrg 7355324fb0dSmrg ret = set_file_contents(path, (char *)data, sizeof(*data)) 7365324fb0dSmrg - sizeof(*data); 7375324fb0dSmrg return ret; 7385324fb0dSmrg} 7395324fb0dSmrg 7405324fb0dSmrgstatic int amdgpu_ras_query_err_count(enum amdgpu_ras_block block, 7415324fb0dSmrg unsigned long *ue, unsigned long *ce) 7425324fb0dSmrg{ 7435324fb0dSmrg char buf[64]; 7445324fb0dSmrg char name[1024]; 7455324fb0dSmrg int ret; 7465324fb0dSmrg 7475324fb0dSmrg *ue = *ce = 0; 7485324fb0dSmrg 7495324fb0dSmrg if (amdgpu_ras_is_feature_supported(block) <= 0) 7505324fb0dSmrg return -1; 7515324fb0dSmrg 7525324fb0dSmrg sprintf(name, "%s%s%s", get_ras_sysfs_root(), ras_block_str(block), "_err_count"); 7535324fb0dSmrg 7545324fb0dSmrg if (is_file_ok(name, O_RDONLY)) 7555324fb0dSmrg return 0; 7565324fb0dSmrg 7575324fb0dSmrg if (get_file_contents(name, buf, sizeof(buf)) <= 0) 7585324fb0dSmrg return -1; 7595324fb0dSmrg 7605324fb0dSmrg if (sscanf(buf, "ue: %lu\nce: %lu", ue, ce) != 2) 7615324fb0dSmrg return -1; 7625324fb0dSmrg 7635324fb0dSmrg return 0; 7645324fb0dSmrg} 7655324fb0dSmrg 76688f8a8d2Smrgstatic int amdgpu_ras_inject(enum amdgpu_ras_block block, 76788f8a8d2Smrg uint32_t sub_block, enum amdgpu_ras_error_type type, 76888f8a8d2Smrg uint64_t address, uint64_t value) 76988f8a8d2Smrg{ 77088f8a8d2Smrg struct ras_debug_if data = { .op = 2, }; 77188f8a8d2Smrg struct ras_inject_if *inject = &data.inject; 77288f8a8d2Smrg int ret; 77388f8a8d2Smrg 77488f8a8d2Smrg if (amdgpu_ras_is_feature_enabled(block) <= 0) { 77588f8a8d2Smrg fprintf(stderr, "block id(%d) is not valid\n", block); 77688f8a8d2Smrg return -1; 77788f8a8d2Smrg } 77888f8a8d2Smrg 77988f8a8d2Smrg inject->head.block = block; 78088f8a8d2Smrg inject->head.type = type; 78188f8a8d2Smrg inject->head.sub_block_index = sub_block; 78288f8a8d2Smrg strncpy(inject->head.name, ras_block_str(block), 32); 78388f8a8d2Smrg inject->address = address; 78488f8a8d2Smrg inject->value = value; 78588f8a8d2Smrg 78688f8a8d2Smrg ret = amdgpu_ras_invoke(&data); 78788f8a8d2Smrg CU_ASSERT_EQUAL(ret, 0); 78888f8a8d2Smrg if (ret) 78988f8a8d2Smrg return -1; 79088f8a8d2Smrg 79188f8a8d2Smrg return 0; 79288f8a8d2Smrg} 79388f8a8d2Smrg 7945324fb0dSmrg//tests 7955324fb0dSmrgstatic void amdgpu_ras_features_test(int enable) 7965324fb0dSmrg{ 7975324fb0dSmrg struct ras_debug_if data; 7985324fb0dSmrg int ret; 7995324fb0dSmrg int i; 8005324fb0dSmrg 8015324fb0dSmrg data.op = enable; 8025324fb0dSmrg for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) { 8035324fb0dSmrg struct ras_common_if head = { 8045324fb0dSmrg .block = i, 8055324fb0dSmrg .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 8065324fb0dSmrg .sub_block_index = 0, 8075324fb0dSmrg .name = "", 8085324fb0dSmrg }; 8095324fb0dSmrg 8105324fb0dSmrg if (amdgpu_ras_is_feature_supported(i) <= 0) 8115324fb0dSmrg continue; 8125324fb0dSmrg 8135324fb0dSmrg data.head = head; 8145324fb0dSmrg 8155324fb0dSmrg ret = amdgpu_ras_invoke(&data); 8165324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8175324fb0dSmrg 8185324fb0dSmrg if (ret) 8195324fb0dSmrg continue; 8205324fb0dSmrg 8215324fb0dSmrg ret = enable ^ amdgpu_ras_is_feature_enabled(i); 8225324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8235324fb0dSmrg } 8245324fb0dSmrg} 8255324fb0dSmrg 8265324fb0dSmrgstatic void amdgpu_ras_disable_test(void) 8275324fb0dSmrg{ 8285324fb0dSmrg int i; 8295324fb0dSmrg for (i = 0; i < devices_count; i++) { 8305324fb0dSmrg set_test_card(i); 8315324fb0dSmrg amdgpu_ras_features_test(0); 8325324fb0dSmrg } 8335324fb0dSmrg} 8345324fb0dSmrg 8355324fb0dSmrgstatic void amdgpu_ras_enable_test(void) 8365324fb0dSmrg{ 8375324fb0dSmrg int i; 8385324fb0dSmrg for (i = 0; i < devices_count; i++) { 8395324fb0dSmrg set_test_card(i); 8405324fb0dSmrg amdgpu_ras_features_test(1); 8415324fb0dSmrg } 8425324fb0dSmrg} 8435324fb0dSmrg 84488f8a8d2Smrgstatic void __amdgpu_ras_ip_inject_test(const struct ras_inject_test_config *ip_test, 84588f8a8d2Smrg uint32_t size) 8465324fb0dSmrg{ 84788f8a8d2Smrg int i, ret; 84888f8a8d2Smrg unsigned long old_ue, old_ce; 84988f8a8d2Smrg unsigned long ue, ce; 85088f8a8d2Smrg uint32_t block; 85188f8a8d2Smrg int timeout; 85288f8a8d2Smrg bool pass; 8535324fb0dSmrg 85488f8a8d2Smrg for (i = 0; i < size; i++) { 85588f8a8d2Smrg timeout = 3; 85688f8a8d2Smrg pass = false; 8575324fb0dSmrg 85888f8a8d2Smrg block = amdgpu_ras_find_block_id_by_name(ip_test[i].block); 8595324fb0dSmrg 86088f8a8d2Smrg /* Ensure one valid ip block */ 86188f8a8d2Smrg if (block == ARRAY_SIZE(ras_block_string)) 86288f8a8d2Smrg break; 8635324fb0dSmrg 86488f8a8d2Smrg /* Ensure RAS feature for the IP block is enabled by kernel */ 86588f8a8d2Smrg if (amdgpu_ras_is_feature_supported(block) <= 0) 86688f8a8d2Smrg break; 8675324fb0dSmrg 86888f8a8d2Smrg ret = amdgpu_ras_query_err_count(block, &old_ue, &old_ce); 8695324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8705324fb0dSmrg if (ret) 87188f8a8d2Smrg break; 8725324fb0dSmrg 87388f8a8d2Smrg ret = amdgpu_ras_inject(block, 87488f8a8d2Smrg ip_test[i].sub_block, 87588f8a8d2Smrg ip_test[i].type, 87688f8a8d2Smrg ip_test[i].address, 87788f8a8d2Smrg ip_test[i].value); 8785324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8795324fb0dSmrg if (ret) 88088f8a8d2Smrg break; 8815324fb0dSmrg 8825324fb0dSmrg while (timeout > 0) { 88388f8a8d2Smrg sleep(5); 8845324fb0dSmrg 88588f8a8d2Smrg ret = amdgpu_ras_query_err_count(block, &ue, &ce); 88688f8a8d2Smrg CU_ASSERT_EQUAL(ret, 0); 8875324fb0dSmrg if (ret) 8885324fb0dSmrg break; 8895324fb0dSmrg 89088f8a8d2Smrg if (old_ue != ue || old_ce != ce) { 89188f8a8d2Smrg pass = true; 89288f8a8d2Smrg sleep(20); 89388f8a8d2Smrg break; 89488f8a8d2Smrg } 8955324fb0dSmrg timeout -= 1; 8965324fb0dSmrg } 89788f8a8d2Smrg printf("\t Test %s@block %s, subblock %d, error_type %s, address %ld, value %ld: %s\n", 89888f8a8d2Smrg ip_test[i].name, 89988f8a8d2Smrg ip_test[i].block, 90088f8a8d2Smrg ip_test[i].sub_block, 90188f8a8d2Smrg amdgpu_ras_get_error_type_id(ip_test[i].type), 90288f8a8d2Smrg ip_test[i].address, 90388f8a8d2Smrg ip_test[i].value, 90488f8a8d2Smrg pass ? "Pass" : "Fail"); 9055324fb0dSmrg } 9065324fb0dSmrg} 9075324fb0dSmrg 90888f8a8d2Smrgstatic void __amdgpu_ras_inject_test(void) 90988f8a8d2Smrg{ 91088f8a8d2Smrg printf("...\n"); 91188f8a8d2Smrg 91288f8a8d2Smrg /* run UMC ras inject test */ 91388f8a8d2Smrg __amdgpu_ras_ip_inject_test(umc_ras_inject_test, 91488f8a8d2Smrg ARRAY_SIZE(umc_ras_inject_test)); 91588f8a8d2Smrg 91688f8a8d2Smrg /* run GFX ras inject test */ 91788f8a8d2Smrg __amdgpu_ras_ip_inject_test(gfx_ras_inject_test, 91888f8a8d2Smrg ARRAY_SIZE(gfx_ras_inject_test)); 91988f8a8d2Smrg} 92088f8a8d2Smrg 9215324fb0dSmrgstatic void amdgpu_ras_inject_test(void) 9225324fb0dSmrg{ 9235324fb0dSmrg int i; 9245324fb0dSmrg for (i = 0; i < devices_count; i++) { 9255324fb0dSmrg set_test_card(i); 9265324fb0dSmrg __amdgpu_ras_inject_test(); 9275324fb0dSmrg } 9285324fb0dSmrg} 9295324fb0dSmrg 9305324fb0dSmrgstatic void __amdgpu_ras_query_test(void) 9315324fb0dSmrg{ 9325324fb0dSmrg unsigned long ue, ce; 9335324fb0dSmrg int ret; 9345324fb0dSmrg int i; 9355324fb0dSmrg 9365324fb0dSmrg for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) { 9375324fb0dSmrg if (amdgpu_ras_is_feature_supported(i) <= 0) 9385324fb0dSmrg continue; 9395324fb0dSmrg 9405324fb0dSmrg if (!((1 << i) & ras_block_mask_query)) 9415324fb0dSmrg continue; 9425324fb0dSmrg 9435324fb0dSmrg ret = amdgpu_ras_query_err_count(i, &ue, &ce); 9445324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 9455324fb0dSmrg } 9465324fb0dSmrg} 9475324fb0dSmrg 9485324fb0dSmrgstatic void amdgpu_ras_query_test(void) 9495324fb0dSmrg{ 9505324fb0dSmrg int i; 9515324fb0dSmrg for (i = 0; i < devices_count; i++) { 9525324fb0dSmrg set_test_card(i); 9535324fb0dSmrg __amdgpu_ras_query_test(); 9545324fb0dSmrg } 9555324fb0dSmrg} 9565324fb0dSmrg 9575324fb0dSmrgstatic void amdgpu_ras_basic_test(void) 9585324fb0dSmrg{ 9595324fb0dSmrg unsigned long ue, ce; 9605324fb0dSmrg char name[1024]; 9615324fb0dSmrg int ret; 9625324fb0dSmrg int i; 9635324fb0dSmrg int j; 9645324fb0dSmrg uint32_t features; 9655324fb0dSmrg char path[1024]; 9665324fb0dSmrg 9675324fb0dSmrg ret = is_file_ok("/sys/module/amdgpu/parameters/ras_mask", O_RDONLY); 9685324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 9695324fb0dSmrg 9705324fb0dSmrg for (i = 0; i < devices_count; i++) { 9715324fb0dSmrg set_test_card(i); 9725324fb0dSmrg 9735324fb0dSmrg ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES, 9745324fb0dSmrg sizeof(features), &features); 9755324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 9765324fb0dSmrg 9775324fb0dSmrg sprintf(path, "%s%s", get_ras_debugfs_root(), "ras_ctrl"); 9785324fb0dSmrg ret = is_file_ok(path, O_WRONLY); 9795324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 9805324fb0dSmrg 9815324fb0dSmrg sprintf(path, "%s%s", get_ras_sysfs_root(), "features"); 9825324fb0dSmrg ret = is_file_ok(path, O_RDONLY); 9835324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 9845324fb0dSmrg 9855324fb0dSmrg for (j = 0; j < AMDGPU_RAS_BLOCK__LAST; j++) { 9865324fb0dSmrg ret = amdgpu_ras_is_feature_supported(j); 9875324fb0dSmrg if (ret <= 0) 9885324fb0dSmrg continue; 9895324fb0dSmrg 9905324fb0dSmrg if (!((1 << j) & ras_block_mask_basic)) 9915324fb0dSmrg continue; 9925324fb0dSmrg 9935324fb0dSmrg sprintf(path, "%s%s%s", get_ras_sysfs_root(), ras_block_str(j), "_err_count"); 9945324fb0dSmrg ret = is_file_ok(path, O_RDONLY); 9955324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 9965324fb0dSmrg 9975324fb0dSmrg sprintf(path, "%s%s%s", get_ras_debugfs_root(), ras_block_str(j), "_err_inject"); 9985324fb0dSmrg ret = is_file_ok(path, O_WRONLY); 9995324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 10005324fb0dSmrg } 10015324fb0dSmrg } 10025324fb0dSmrg} 1003