15324fb0dSmrg/* 25324fb0dSmrg * Copyright 2017 Advanced Micro Devices, Inc. 35324fb0dSmrg * 45324fb0dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 55324fb0dSmrg * copy of this software and associated documentation files (the "Software"), 65324fb0dSmrg * to deal in the Software without restriction, including without limitation 75324fb0dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 85324fb0dSmrg * and/or sell copies of the Software, and to permit persons to whom the 95324fb0dSmrg * Software is furnished to do so, subject to the following conditions: 105324fb0dSmrg * 115324fb0dSmrg * The above copyright notice and this permission notice shall be included in 125324fb0dSmrg * all copies or substantial portions of the Software. 135324fb0dSmrg * 145324fb0dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 155324fb0dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 165324fb0dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 175324fb0dSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 185324fb0dSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 195324fb0dSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 205324fb0dSmrg * OTHER DEALINGS IN THE SOFTWARE. 215324fb0dSmrg * 225324fb0dSmrg*/ 235324fb0dSmrg 245324fb0dSmrg#include "CUnit/Basic.h" 255324fb0dSmrg 265324fb0dSmrg#include "amdgpu_test.h" 275324fb0dSmrg#include "amdgpu_drm.h" 285324fb0dSmrg#include "amdgpu_internal.h" 295324fb0dSmrg#include <unistd.h> 305324fb0dSmrg#include <fcntl.h> 315324fb0dSmrg#include <stdio.h> 325324fb0dSmrg#include "xf86drm.h" 339bd392adSmrg#include <limits.h> 349bd392adSmrg 359bd392adSmrg#define PATH_SIZE PATH_MAX 365324fb0dSmrg 3788f8a8d2Smrg#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) 3888f8a8d2Smrg 395324fb0dSmrgconst char *ras_block_string[] = { 405324fb0dSmrg "umc", 415324fb0dSmrg "sdma", 425324fb0dSmrg "gfx", 435324fb0dSmrg "mmhub", 445324fb0dSmrg "athub", 455324fb0dSmrg "pcie_bif", 465324fb0dSmrg "hdp", 475324fb0dSmrg "xgmi_wafl", 485324fb0dSmrg "df", 495324fb0dSmrg "smn", 505324fb0dSmrg "sem", 515324fb0dSmrg "mp0", 525324fb0dSmrg "mp1", 535324fb0dSmrg "fuse", 545324fb0dSmrg}; 555324fb0dSmrg 565324fb0dSmrg#define ras_block_str(i) (ras_block_string[i]) 575324fb0dSmrg 585324fb0dSmrgenum amdgpu_ras_block { 595324fb0dSmrg AMDGPU_RAS_BLOCK__UMC = 0, 605324fb0dSmrg AMDGPU_RAS_BLOCK__SDMA, 615324fb0dSmrg AMDGPU_RAS_BLOCK__GFX, 625324fb0dSmrg AMDGPU_RAS_BLOCK__MMHUB, 635324fb0dSmrg AMDGPU_RAS_BLOCK__ATHUB, 645324fb0dSmrg AMDGPU_RAS_BLOCK__PCIE_BIF, 655324fb0dSmrg AMDGPU_RAS_BLOCK__HDP, 665324fb0dSmrg AMDGPU_RAS_BLOCK__XGMI_WAFL, 675324fb0dSmrg AMDGPU_RAS_BLOCK__DF, 685324fb0dSmrg AMDGPU_RAS_BLOCK__SMN, 695324fb0dSmrg AMDGPU_RAS_BLOCK__SEM, 705324fb0dSmrg AMDGPU_RAS_BLOCK__MP0, 715324fb0dSmrg AMDGPU_RAS_BLOCK__MP1, 725324fb0dSmrg AMDGPU_RAS_BLOCK__FUSE, 735324fb0dSmrg 745324fb0dSmrg AMDGPU_RAS_BLOCK__LAST 755324fb0dSmrg}; 765324fb0dSmrg 775324fb0dSmrg#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST 785324fb0dSmrg#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) 795324fb0dSmrg 8088f8a8d2Smrgenum amdgpu_ras_gfx_subblock { 8188f8a8d2Smrg /* CPC */ 8288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 8388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = 8488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, 8588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, 8688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, 8788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 8888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, 8988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, 9088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 9188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, 9288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = 9388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, 9488f8a8d2Smrg /* CPF */ 9588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, 9688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = 9788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, 9888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, 9988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_TAG, 10088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, 10188f8a8d2Smrg /* CPG */ 10288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, 10388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = 10488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, 10588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, 10688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_TAG, 10788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, 10888f8a8d2Smrg /* GDS */ 10988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, 11088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, 11188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 11288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 11388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 11488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 11588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = 11688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 11788f8a8d2Smrg /* SPI */ 11888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, 11988f8a8d2Smrg /* SQ */ 12088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, 12188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, 12288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, 12388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, 12488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, 12588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, 12688f8a8d2Smrg /* SQC (3 ranges) */ 12788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, 12888f8a8d2Smrg /* SQC range 0 */ 12988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = 13088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, 13188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 13288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, 13388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 13488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 13588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 13688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 13788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 13888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 13988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = 14088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 14188f8a8d2Smrg /* SQC range 1 */ 14288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, 14388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 14488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, 14588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 14688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 14788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 14888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 14988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 15088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 15188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 15288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 15388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = 15488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 15588f8a8d2Smrg /* SQC range 2 */ 15688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, 15788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 15888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, 15988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 16088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 16188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 16288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 16388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 16488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 16588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 16688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 16788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = 16888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 16988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = 17088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, 17188f8a8d2Smrg /* TA */ 17288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, 17388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = 17488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, 17588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, 17688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, 17788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, 17888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, 17988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, 18088f8a8d2Smrg /* TCA */ 18188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, 18288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = 18388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, 18488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, 18588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = 18688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, 18788f8a8d2Smrg /* TCC (5 sub-ranges) */ 18888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, 18988f8a8d2Smrg /* TCC range 0 */ 19088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = 19188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, 19288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = 19388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, 19488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 19588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 19688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 19788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 19888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 19988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 20088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 20188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = 20288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 20388f8a8d2Smrg /* TCC range 1 */ 20488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, 20588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = 20688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, 20788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 20888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = 20988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 21088f8a8d2Smrg /* TCC range 2 */ 21188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, 21288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = 21388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, 21488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 21588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 21688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 21788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 21888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, 21988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 22088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 22188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = 22288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 22388f8a8d2Smrg /* TCC range 3 */ 22488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, 22588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = 22688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, 22788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 22888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = 22988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 23088f8a8d2Smrg /* TCC range 4 */ 23188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, 23288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 23388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, 23488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 23588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = 23688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 23788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = 23888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, 23988f8a8d2Smrg /* TCI */ 24088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, 24188f8a8d2Smrg /* TCP */ 24288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, 24388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = 24488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, 24588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 24688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, 24788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, 24888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, 24988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 25088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 25188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = 25288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 25388f8a8d2Smrg /* TD */ 25488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, 25588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = 25688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, 25788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 25888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, 25988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, 26088f8a8d2Smrg /* EA (3 sub-ranges) */ 26188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, 26288f8a8d2Smrg /* EA range 0 */ 26388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = 26488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, 26588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = 26688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, 26788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 26888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 26988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 27088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 27188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 27288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 27388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 27488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = 27588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 27688f8a8d2Smrg /* EA range 1 */ 27788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, 27888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = 27988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, 28088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 28188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 28288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 28388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 28488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 28588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 28688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = 28788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 28888f8a8d2Smrg /* EA range 2 */ 28988f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, 29088f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = 29188f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, 29288f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, 29388f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, 29488f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, 29588f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = 29688f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, 29788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = 29888f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, 29988f8a8d2Smrg /* UTC VM L2 bank */ 30088f8a8d2Smrg AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, 30188f8a8d2Smrg /* UTC VM walker */ 30288f8a8d2Smrg AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, 30388f8a8d2Smrg /* UTC ATC L2 2MB cache */ 30488f8a8d2Smrg AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 30588f8a8d2Smrg /* UTC ATC L2 4KB cache */ 30688f8a8d2Smrg AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 30788f8a8d2Smrg AMDGPU_RAS_BLOCK__GFX_MAX 30888f8a8d2Smrg}; 30988f8a8d2Smrg 3105324fb0dSmrgenum amdgpu_ras_error_type { 31188f8a8d2Smrg AMDGPU_RAS_ERROR__NONE = 0, 31288f8a8d2Smrg AMDGPU_RAS_ERROR__PARITY = 1, 31388f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2, 31488f8a8d2Smrg AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4, 31588f8a8d2Smrg AMDGPU_RAS_ERROR__POISON = 8, 31688f8a8d2Smrg}; 31788f8a8d2Smrg 31888f8a8d2Smrgstruct ras_inject_test_config { 31988f8a8d2Smrg char name[64]; 32088f8a8d2Smrg char block[32]; 32188f8a8d2Smrg int sub_block; 32288f8a8d2Smrg enum amdgpu_ras_error_type type; 32388f8a8d2Smrg uint64_t address; 32488f8a8d2Smrg uint64_t value; 3255324fb0dSmrg}; 3265324fb0dSmrg 3275324fb0dSmrgstruct ras_common_if { 3285324fb0dSmrg enum amdgpu_ras_block block; 3295324fb0dSmrg enum amdgpu_ras_error_type type; 3305324fb0dSmrg uint32_t sub_block_index; 3315324fb0dSmrg char name[32]; 3325324fb0dSmrg}; 3335324fb0dSmrg 3345324fb0dSmrgstruct ras_inject_if { 3355324fb0dSmrg struct ras_common_if head; 3365324fb0dSmrg uint64_t address; 3375324fb0dSmrg uint64_t value; 3385324fb0dSmrg}; 3395324fb0dSmrg 3405324fb0dSmrgstruct ras_debug_if { 3415324fb0dSmrg union { 3425324fb0dSmrg struct ras_common_if head; 3435324fb0dSmrg struct ras_inject_if inject; 3445324fb0dSmrg }; 3455324fb0dSmrg int op; 3465324fb0dSmrg}; 3475324fb0dSmrg/* for now, only umc, gfx, sdma has implemented. */ 34888f8a8d2Smrg#define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\ 34988f8a8d2Smrg (1 << AMDGPU_RAS_BLOCK__GFX)) 35088f8a8d2Smrg#define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\ 35188f8a8d2Smrg (1 << AMDGPU_RAS_BLOCK__GFX)) 3525324fb0dSmrg#define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\ 3535324fb0dSmrg (1 << AMDGPU_RAS_BLOCK__SDMA) |\ 3545324fb0dSmrg (1 << AMDGPU_RAS_BLOCK__GFX)) 3555324fb0dSmrg 3565324fb0dSmrgstatic uint32_t ras_block_mask_inject = DEFAULT_RAS_BLOCK_MASK_INJECT; 3575324fb0dSmrgstatic uint32_t ras_block_mask_query = DEFAULT_RAS_BLOCK_MASK_INJECT; 3585324fb0dSmrgstatic uint32_t ras_block_mask_basic = DEFAULT_RAS_BLOCK_MASK_BASIC; 3595324fb0dSmrg 3605324fb0dSmrgstruct ras_test_mask { 3615324fb0dSmrg uint32_t inject_mask; 3625324fb0dSmrg uint32_t query_mask; 3635324fb0dSmrg uint32_t basic_mask; 3645324fb0dSmrg}; 3655324fb0dSmrg 3665324fb0dSmrgstruct amdgpu_ras_data { 3675324fb0dSmrg amdgpu_device_handle device_handle; 3685324fb0dSmrg uint32_t id; 3695324fb0dSmrg uint32_t capability; 3705324fb0dSmrg struct ras_test_mask test_mask; 3715324fb0dSmrg}; 3725324fb0dSmrg 3735324fb0dSmrg/* all devices who has ras supported */ 3745324fb0dSmrgstatic struct amdgpu_ras_data devices[MAX_CARDS_SUPPORTED]; 3755324fb0dSmrgstatic int devices_count; 3765324fb0dSmrg 3775324fb0dSmrgstruct ras_DID_test_mask{ 3785324fb0dSmrg uint16_t device_id; 3795324fb0dSmrg uint16_t revision_id; 3805324fb0dSmrg struct ras_test_mask test_mask; 3815324fb0dSmrg}; 3825324fb0dSmrg 3835324fb0dSmrg/* white list for inject test. */ 3845324fb0dSmrg#define RAS_BLOCK_MASK_ALL {\ 3855324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_INJECT,\ 3865324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_QUERY,\ 3875324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_BASIC\ 3885324fb0dSmrg} 3895324fb0dSmrg 3905324fb0dSmrg#define RAS_BLOCK_MASK_QUERY_BASIC {\ 3915324fb0dSmrg 0,\ 3925324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_QUERY,\ 3935324fb0dSmrg DEFAULT_RAS_BLOCK_MASK_BASIC\ 3945324fb0dSmrg} 3955324fb0dSmrg 39688f8a8d2Smrgstatic const struct ras_inject_test_config umc_ras_inject_test[] = { 39788f8a8d2Smrg {"ras_umc.1.0", "umc", 0, AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 39888f8a8d2Smrg}; 39988f8a8d2Smrg 40088f8a8d2Smrgstatic const struct ras_inject_test_config gfx_ras_inject_test[] = { 40188f8a8d2Smrg {"ras_gfx.2.0", "gfx", AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, 40288f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 40388f8a8d2Smrg {"ras_gfx.2.1", "gfx", AMDGPU_RAS_BLOCK__GFX_CPF_TAG, 40488f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 40588f8a8d2Smrg {"ras_gfx.2.2", "gfx", AMDGPU_RAS_BLOCK__GFX_CPG_TAG, 40688f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 40788f8a8d2Smrg {"ras_gfx.2.3", "gfx", AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, 40888f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 40988f8a8d2Smrg {"ras_gfx.2.4", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 41088f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41188f8a8d2Smrg {"ras_gfx.2.5", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM, 41288f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41388f8a8d2Smrg {"ras_gfx.2.6", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM, 41488f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41588f8a8d2Smrg {"ras_gfx.2.7", "gfx", AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO, 41688f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41788f8a8d2Smrg {"ras_gfx.2.8", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA, 41888f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 41988f8a8d2Smrg {"ras_gfx.2.9", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 42088f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42188f8a8d2Smrg {"ras_gfx.2.10", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 42288f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42388f8a8d2Smrg {"ras_gfx.2.11", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 42488f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42588f8a8d2Smrg {"ras_gfx.2.12", "gfx", AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM, 42688f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42788f8a8d2Smrg {"ras_gfx.2.13", "gfx", AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO, 42888f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 42988f8a8d2Smrg {"ras_gfx.2.14", "gfx", AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM, 43088f8a8d2Smrg AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0}, 43188f8a8d2Smrg}; 43288f8a8d2Smrg 4335324fb0dSmrgstatic const struct ras_DID_test_mask ras_DID_array[] = { 4345324fb0dSmrg {0x66a1, 0x00, RAS_BLOCK_MASK_ALL}, 4355324fb0dSmrg {0x66a1, 0x01, RAS_BLOCK_MASK_ALL}, 4365324fb0dSmrg {0x66a1, 0x04, RAS_BLOCK_MASK_ALL}, 4375324fb0dSmrg}; 4385324fb0dSmrg 43988f8a8d2Smrgstatic uint32_t amdgpu_ras_find_block_id_by_name(const char *name) 44088f8a8d2Smrg{ 44188f8a8d2Smrg int i; 44288f8a8d2Smrg 44388f8a8d2Smrg for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) { 44488f8a8d2Smrg if (strcmp(name, ras_block_string[i]) == 0) 44588f8a8d2Smrg return i; 44688f8a8d2Smrg } 44788f8a8d2Smrg 44888f8a8d2Smrg return ARRAY_SIZE(ras_block_string); 44988f8a8d2Smrg} 45088f8a8d2Smrg 45188f8a8d2Smrgstatic char *amdgpu_ras_get_error_type_id(enum amdgpu_ras_error_type type) 45288f8a8d2Smrg{ 45388f8a8d2Smrg switch (type) { 45488f8a8d2Smrg case AMDGPU_RAS_ERROR__PARITY: 45588f8a8d2Smrg return "parity"; 45688f8a8d2Smrg case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE: 45788f8a8d2Smrg return "single_correctable"; 45888f8a8d2Smrg case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE: 45988f8a8d2Smrg return "multi_uncorrectable"; 46088f8a8d2Smrg case AMDGPU_RAS_ERROR__POISON: 46188f8a8d2Smrg return "poison"; 46288f8a8d2Smrg case AMDGPU_RAS_ERROR__NONE: 46388f8a8d2Smrg default: 46488f8a8d2Smrg return NULL; 46588f8a8d2Smrg } 46688f8a8d2Smrg} 46788f8a8d2Smrg 4685324fb0dSmrgstatic struct ras_test_mask amdgpu_ras_get_test_mask(drmDevicePtr device) 4695324fb0dSmrg{ 4705324fb0dSmrg int i; 4715324fb0dSmrg static struct ras_test_mask default_test_mask = RAS_BLOCK_MASK_QUERY_BASIC; 4725324fb0dSmrg 4735324fb0dSmrg for (i = 0; i < sizeof(ras_DID_array) / sizeof(ras_DID_array[0]); i++) { 4745324fb0dSmrg if (ras_DID_array[i].device_id == device->deviceinfo.pci->device_id && 4755324fb0dSmrg ras_DID_array[i].revision_id == device->deviceinfo.pci->revision_id) 4765324fb0dSmrg return ras_DID_array[i].test_mask; 4775324fb0dSmrg } 4785324fb0dSmrg return default_test_mask; 4795324fb0dSmrg} 4805324fb0dSmrg 4815324fb0dSmrgstatic uint32_t amdgpu_ras_lookup_capability(amdgpu_device_handle device_handle) 4825324fb0dSmrg{ 4835324fb0dSmrg union { 4845324fb0dSmrg uint64_t feature_mask; 4855324fb0dSmrg struct { 4865324fb0dSmrg uint32_t enabled_features; 4875324fb0dSmrg uint32_t supported_features; 4885324fb0dSmrg }; 4895324fb0dSmrg } features = { 0 }; 4905324fb0dSmrg int ret; 4915324fb0dSmrg 4925324fb0dSmrg ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES, 4935324fb0dSmrg sizeof(features), &features); 4945324fb0dSmrg if (ret) 4955324fb0dSmrg return 0; 4965324fb0dSmrg 4975324fb0dSmrg return features.supported_features; 4985324fb0dSmrg} 4995324fb0dSmrg 5005324fb0dSmrgstatic int get_file_contents(char *file, char *buf, int size); 5015324fb0dSmrg 5025324fb0dSmrgstatic int amdgpu_ras_lookup_id(drmDevicePtr device) 5035324fb0dSmrg{ 5049bd392adSmrg char path[PATH_SIZE]; 5055324fb0dSmrg char str[128]; 5065324fb0dSmrg drmPciBusInfo info; 5075324fb0dSmrg int i; 5085324fb0dSmrg int ret; 5095324fb0dSmrg 5105324fb0dSmrg for (i = 0; i < MAX_CARDS_SUPPORTED; i++) { 5115324fb0dSmrg memset(str, 0, sizeof(str)); 5125324fb0dSmrg memset(&info, 0, sizeof(info)); 5139bd392adSmrg snprintf(path, PATH_SIZE, "/sys/kernel/debug/dri/%d/name", i); 5145324fb0dSmrg if (get_file_contents(path, str, sizeof(str)) <= 0) 5155324fb0dSmrg continue; 5165324fb0dSmrg 5175324fb0dSmrg ret = sscanf(str, "amdgpu dev=%04hx:%02hhx:%02hhx.%01hhx", 5185324fb0dSmrg &info.domain, &info.bus, &info.dev, &info.func); 5195324fb0dSmrg if (ret != 4) 5205324fb0dSmrg continue; 5215324fb0dSmrg 5225324fb0dSmrg if (memcmp(&info, device->businfo.pci, sizeof(info)) == 0) 5235324fb0dSmrg return i; 5245324fb0dSmrg } 5255324fb0dSmrg return -1; 5265324fb0dSmrg} 5275324fb0dSmrg 5285324fb0dSmrg//helpers 5295324fb0dSmrg 5305324fb0dSmrgstatic int test_card; 5319bd392adSmrgstatic char sysfs_path[PATH_SIZE]; 5329bd392adSmrgstatic char debugfs_path[PATH_SIZE]; 5335324fb0dSmrgstatic uint32_t ras_mask; 5345324fb0dSmrgstatic amdgpu_device_handle device_handle; 5355324fb0dSmrg 5369bd392adSmrgstatic void set_test_card(int card) 5375324fb0dSmrg{ 5385324fb0dSmrg test_card = card; 5399bd392adSmrg snprintf(sysfs_path, PATH_SIZE, "/sys/class/drm/card%d/device/ras/", devices[card].id); 5409bd392adSmrg snprintf(debugfs_path, PATH_SIZE, "/sys/kernel/debug/dri/%d/ras/", devices[card].id); 5415324fb0dSmrg ras_mask = devices[card].capability; 5425324fb0dSmrg device_handle = devices[card].device_handle; 5435324fb0dSmrg ras_block_mask_inject = devices[card].test_mask.inject_mask; 5445324fb0dSmrg ras_block_mask_query = devices[card].test_mask.query_mask; 5455324fb0dSmrg ras_block_mask_basic = devices[card].test_mask.basic_mask; 5465324fb0dSmrg} 5475324fb0dSmrg 5485324fb0dSmrgstatic const char *get_ras_sysfs_root(void) 5495324fb0dSmrg{ 5505324fb0dSmrg return sysfs_path; 5515324fb0dSmrg} 5525324fb0dSmrg 5535324fb0dSmrgstatic const char *get_ras_debugfs_root(void) 5545324fb0dSmrg{ 5555324fb0dSmrg return debugfs_path; 5565324fb0dSmrg} 5575324fb0dSmrg 5585324fb0dSmrgstatic int set_file_contents(char *file, char *buf, int size) 5595324fb0dSmrg{ 5605324fb0dSmrg int n, fd; 5615324fb0dSmrg fd = open(file, O_WRONLY); 5625324fb0dSmrg if (fd == -1) 5635324fb0dSmrg return -1; 5645324fb0dSmrg n = write(fd, buf, size); 5655324fb0dSmrg close(fd); 5665324fb0dSmrg return n; 5675324fb0dSmrg} 5685324fb0dSmrg 5695324fb0dSmrgstatic int get_file_contents(char *file, char *buf, int size) 5705324fb0dSmrg{ 5715324fb0dSmrg int n, fd; 5725324fb0dSmrg fd = open(file, O_RDONLY); 5735324fb0dSmrg if (fd == -1) 5745324fb0dSmrg return -1; 5755324fb0dSmrg n = read(fd, buf, size); 5765324fb0dSmrg close(fd); 5775324fb0dSmrg return n; 5785324fb0dSmrg} 5795324fb0dSmrg 5805324fb0dSmrgstatic int is_file_ok(char *file, int flags) 5815324fb0dSmrg{ 5825324fb0dSmrg int fd; 5835324fb0dSmrg 5845324fb0dSmrg fd = open(file, flags); 5855324fb0dSmrg if (fd == -1) 5865324fb0dSmrg return -1; 5875324fb0dSmrg close(fd); 5885324fb0dSmrg return 0; 5895324fb0dSmrg} 5905324fb0dSmrg 5915324fb0dSmrgstatic int amdgpu_ras_is_feature_enabled(enum amdgpu_ras_block block) 5925324fb0dSmrg{ 5935324fb0dSmrg uint32_t feature_mask; 5945324fb0dSmrg int ret; 5955324fb0dSmrg 5965324fb0dSmrg ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES, 5975324fb0dSmrg sizeof(feature_mask), &feature_mask); 5985324fb0dSmrg if (ret) 5995324fb0dSmrg return -1; 6005324fb0dSmrg 6015324fb0dSmrg return (1 << block) & feature_mask; 6025324fb0dSmrg} 6035324fb0dSmrg 6045324fb0dSmrgstatic int amdgpu_ras_is_feature_supported(enum amdgpu_ras_block block) 6055324fb0dSmrg{ 6065324fb0dSmrg return (1 << block) & ras_mask; 6075324fb0dSmrg} 6085324fb0dSmrg 6095324fb0dSmrgstatic int amdgpu_ras_invoke(struct ras_debug_if *data) 6105324fb0dSmrg{ 6119bd392adSmrg char path[PATH_SIZE]; 6125324fb0dSmrg int ret; 6135324fb0dSmrg 6149bd392adSmrg snprintf(path, sizeof(path), "%s", get_ras_debugfs_root()); 6159bd392adSmrg strncat(path, "ras_ctrl", sizeof(path) - strlen(path)); 6165324fb0dSmrg 6175324fb0dSmrg ret = set_file_contents(path, (char *)data, sizeof(*data)) 6185324fb0dSmrg - sizeof(*data); 6195324fb0dSmrg return ret; 6205324fb0dSmrg} 6215324fb0dSmrg 6225324fb0dSmrgstatic int amdgpu_ras_query_err_count(enum amdgpu_ras_block block, 6235324fb0dSmrg unsigned long *ue, unsigned long *ce) 6245324fb0dSmrg{ 6255324fb0dSmrg char buf[64]; 6269bd392adSmrg char name[PATH_SIZE]; 6275324fb0dSmrg 6285324fb0dSmrg *ue = *ce = 0; 6295324fb0dSmrg 6305324fb0dSmrg if (amdgpu_ras_is_feature_supported(block) <= 0) 6315324fb0dSmrg return -1; 6325324fb0dSmrg 6339bd392adSmrg snprintf(name, sizeof(name), "%s", get_ras_sysfs_root()); 6349bd392adSmrg strncat(name, ras_block_str(block), sizeof(name) - strlen(name)); 6359bd392adSmrg strncat(name, "_err_count", sizeof(name) - strlen(name)); 6365324fb0dSmrg 6375324fb0dSmrg if (is_file_ok(name, O_RDONLY)) 6385324fb0dSmrg return 0; 6395324fb0dSmrg 6405324fb0dSmrg if (get_file_contents(name, buf, sizeof(buf)) <= 0) 6415324fb0dSmrg return -1; 6425324fb0dSmrg 6435324fb0dSmrg if (sscanf(buf, "ue: %lu\nce: %lu", ue, ce) != 2) 6445324fb0dSmrg return -1; 6455324fb0dSmrg 6465324fb0dSmrg return 0; 6475324fb0dSmrg} 6485324fb0dSmrg 64988f8a8d2Smrgstatic int amdgpu_ras_inject(enum amdgpu_ras_block block, 65088f8a8d2Smrg uint32_t sub_block, enum amdgpu_ras_error_type type, 65188f8a8d2Smrg uint64_t address, uint64_t value) 65288f8a8d2Smrg{ 65388f8a8d2Smrg struct ras_debug_if data = { .op = 2, }; 65488f8a8d2Smrg struct ras_inject_if *inject = &data.inject; 65588f8a8d2Smrg int ret; 65688f8a8d2Smrg 65788f8a8d2Smrg if (amdgpu_ras_is_feature_enabled(block) <= 0) { 65888f8a8d2Smrg fprintf(stderr, "block id(%d) is not valid\n", block); 65988f8a8d2Smrg return -1; 66088f8a8d2Smrg } 66188f8a8d2Smrg 66288f8a8d2Smrg inject->head.block = block; 66388f8a8d2Smrg inject->head.type = type; 66488f8a8d2Smrg inject->head.sub_block_index = sub_block; 6659bd392adSmrg strncpy(inject->head.name, ras_block_str(block), sizeof(inject->head.name)-1); 66688f8a8d2Smrg inject->address = address; 66788f8a8d2Smrg inject->value = value; 66888f8a8d2Smrg 66988f8a8d2Smrg ret = amdgpu_ras_invoke(&data); 67088f8a8d2Smrg CU_ASSERT_EQUAL(ret, 0); 67188f8a8d2Smrg if (ret) 67288f8a8d2Smrg return -1; 67388f8a8d2Smrg 67488f8a8d2Smrg return 0; 67588f8a8d2Smrg} 67688f8a8d2Smrg 6775324fb0dSmrg//tests 6785324fb0dSmrgstatic void amdgpu_ras_features_test(int enable) 6795324fb0dSmrg{ 6805324fb0dSmrg struct ras_debug_if data; 6815324fb0dSmrg int ret; 6825324fb0dSmrg int i; 6835324fb0dSmrg 6845324fb0dSmrg data.op = enable; 6855324fb0dSmrg for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) { 6865324fb0dSmrg struct ras_common_if head = { 6875324fb0dSmrg .block = i, 6885324fb0dSmrg .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 6895324fb0dSmrg .sub_block_index = 0, 6905324fb0dSmrg .name = "", 6915324fb0dSmrg }; 6925324fb0dSmrg 6935324fb0dSmrg if (amdgpu_ras_is_feature_supported(i) <= 0) 6945324fb0dSmrg continue; 6955324fb0dSmrg 6965324fb0dSmrg data.head = head; 6975324fb0dSmrg 6985324fb0dSmrg ret = amdgpu_ras_invoke(&data); 6995324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 7005324fb0dSmrg 7015324fb0dSmrg if (ret) 7025324fb0dSmrg continue; 7035324fb0dSmrg 7045324fb0dSmrg ret = enable ^ amdgpu_ras_is_feature_enabled(i); 7055324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 7065324fb0dSmrg } 7075324fb0dSmrg} 7085324fb0dSmrg 7095324fb0dSmrgstatic void amdgpu_ras_disable_test(void) 7105324fb0dSmrg{ 7115324fb0dSmrg int i; 7125324fb0dSmrg for (i = 0; i < devices_count; i++) { 7135324fb0dSmrg set_test_card(i); 7145324fb0dSmrg amdgpu_ras_features_test(0); 7155324fb0dSmrg } 7165324fb0dSmrg} 7175324fb0dSmrg 7185324fb0dSmrgstatic void amdgpu_ras_enable_test(void) 7195324fb0dSmrg{ 7205324fb0dSmrg int i; 7215324fb0dSmrg for (i = 0; i < devices_count; i++) { 7225324fb0dSmrg set_test_card(i); 7235324fb0dSmrg amdgpu_ras_features_test(1); 7245324fb0dSmrg } 7255324fb0dSmrg} 7265324fb0dSmrg 72788f8a8d2Smrgstatic void __amdgpu_ras_ip_inject_test(const struct ras_inject_test_config *ip_test, 72888f8a8d2Smrg uint32_t size) 7295324fb0dSmrg{ 73088f8a8d2Smrg int i, ret; 73188f8a8d2Smrg unsigned long old_ue, old_ce; 73288f8a8d2Smrg unsigned long ue, ce; 73388f8a8d2Smrg uint32_t block; 73488f8a8d2Smrg int timeout; 73588f8a8d2Smrg bool pass; 7365324fb0dSmrg 73788f8a8d2Smrg for (i = 0; i < size; i++) { 73888f8a8d2Smrg timeout = 3; 73988f8a8d2Smrg pass = false; 7405324fb0dSmrg 74188f8a8d2Smrg block = amdgpu_ras_find_block_id_by_name(ip_test[i].block); 7425324fb0dSmrg 74388f8a8d2Smrg /* Ensure one valid ip block */ 74488f8a8d2Smrg if (block == ARRAY_SIZE(ras_block_string)) 74588f8a8d2Smrg break; 7465324fb0dSmrg 74788f8a8d2Smrg /* Ensure RAS feature for the IP block is enabled by kernel */ 74888f8a8d2Smrg if (amdgpu_ras_is_feature_supported(block) <= 0) 74988f8a8d2Smrg break; 7505324fb0dSmrg 75188f8a8d2Smrg ret = amdgpu_ras_query_err_count(block, &old_ue, &old_ce); 7525324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 7535324fb0dSmrg if (ret) 75488f8a8d2Smrg break; 7555324fb0dSmrg 75688f8a8d2Smrg ret = amdgpu_ras_inject(block, 75788f8a8d2Smrg ip_test[i].sub_block, 75888f8a8d2Smrg ip_test[i].type, 75988f8a8d2Smrg ip_test[i].address, 76088f8a8d2Smrg ip_test[i].value); 7615324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 7625324fb0dSmrg if (ret) 76388f8a8d2Smrg break; 7645324fb0dSmrg 7655324fb0dSmrg while (timeout > 0) { 76688f8a8d2Smrg sleep(5); 7675324fb0dSmrg 76888f8a8d2Smrg ret = amdgpu_ras_query_err_count(block, &ue, &ce); 76988f8a8d2Smrg CU_ASSERT_EQUAL(ret, 0); 7705324fb0dSmrg if (ret) 7715324fb0dSmrg break; 7725324fb0dSmrg 77388f8a8d2Smrg if (old_ue != ue || old_ce != ce) { 77488f8a8d2Smrg pass = true; 77588f8a8d2Smrg sleep(20); 77688f8a8d2Smrg break; 77788f8a8d2Smrg } 7785324fb0dSmrg timeout -= 1; 7795324fb0dSmrg } 78088f8a8d2Smrg printf("\t Test %s@block %s, subblock %d, error_type %s, address %ld, value %ld: %s\n", 78188f8a8d2Smrg ip_test[i].name, 78288f8a8d2Smrg ip_test[i].block, 78388f8a8d2Smrg ip_test[i].sub_block, 78488f8a8d2Smrg amdgpu_ras_get_error_type_id(ip_test[i].type), 78588f8a8d2Smrg ip_test[i].address, 78688f8a8d2Smrg ip_test[i].value, 78788f8a8d2Smrg pass ? "Pass" : "Fail"); 7885324fb0dSmrg } 7895324fb0dSmrg} 7905324fb0dSmrg 79188f8a8d2Smrgstatic void __amdgpu_ras_inject_test(void) 79288f8a8d2Smrg{ 79388f8a8d2Smrg printf("...\n"); 79488f8a8d2Smrg 79588f8a8d2Smrg /* run UMC ras inject test */ 79688f8a8d2Smrg __amdgpu_ras_ip_inject_test(umc_ras_inject_test, 79788f8a8d2Smrg ARRAY_SIZE(umc_ras_inject_test)); 79888f8a8d2Smrg 79988f8a8d2Smrg /* run GFX ras inject test */ 80088f8a8d2Smrg __amdgpu_ras_ip_inject_test(gfx_ras_inject_test, 80188f8a8d2Smrg ARRAY_SIZE(gfx_ras_inject_test)); 80288f8a8d2Smrg} 80388f8a8d2Smrg 8045324fb0dSmrgstatic void amdgpu_ras_inject_test(void) 8055324fb0dSmrg{ 8065324fb0dSmrg int i; 8075324fb0dSmrg for (i = 0; i < devices_count; i++) { 8085324fb0dSmrg set_test_card(i); 8095324fb0dSmrg __amdgpu_ras_inject_test(); 8105324fb0dSmrg } 8115324fb0dSmrg} 8125324fb0dSmrg 8135324fb0dSmrgstatic void __amdgpu_ras_query_test(void) 8145324fb0dSmrg{ 8155324fb0dSmrg unsigned long ue, ce; 8165324fb0dSmrg int ret; 8175324fb0dSmrg int i; 8185324fb0dSmrg 8195324fb0dSmrg for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) { 8205324fb0dSmrg if (amdgpu_ras_is_feature_supported(i) <= 0) 8215324fb0dSmrg continue; 8225324fb0dSmrg 8235324fb0dSmrg if (!((1 << i) & ras_block_mask_query)) 8245324fb0dSmrg continue; 8255324fb0dSmrg 8265324fb0dSmrg ret = amdgpu_ras_query_err_count(i, &ue, &ce); 8275324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8285324fb0dSmrg } 8295324fb0dSmrg} 8305324fb0dSmrg 8315324fb0dSmrgstatic void amdgpu_ras_query_test(void) 8325324fb0dSmrg{ 8335324fb0dSmrg int i; 8345324fb0dSmrg for (i = 0; i < devices_count; i++) { 8355324fb0dSmrg set_test_card(i); 8365324fb0dSmrg __amdgpu_ras_query_test(); 8375324fb0dSmrg } 8385324fb0dSmrg} 8395324fb0dSmrg 8405324fb0dSmrgstatic void amdgpu_ras_basic_test(void) 8415324fb0dSmrg{ 8425324fb0dSmrg int ret; 8435324fb0dSmrg int i; 8445324fb0dSmrg int j; 8455324fb0dSmrg uint32_t features; 8469bd392adSmrg char path[PATH_SIZE]; 8475324fb0dSmrg 8485324fb0dSmrg ret = is_file_ok("/sys/module/amdgpu/parameters/ras_mask", O_RDONLY); 8495324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8505324fb0dSmrg 8515324fb0dSmrg for (i = 0; i < devices_count; i++) { 8525324fb0dSmrg set_test_card(i); 8535324fb0dSmrg 8545324fb0dSmrg ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES, 8555324fb0dSmrg sizeof(features), &features); 8565324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8575324fb0dSmrg 8589bd392adSmrg snprintf(path, sizeof(path), "%s", get_ras_debugfs_root()); 8599bd392adSmrg strncat(path, "ras_ctrl", sizeof(path) - strlen(path)); 8609bd392adSmrg 8615324fb0dSmrg ret = is_file_ok(path, O_WRONLY); 8625324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8635324fb0dSmrg 8649bd392adSmrg snprintf(path, sizeof(path), "%s", get_ras_sysfs_root()); 8659bd392adSmrg strncat(path, "features", sizeof(path) - strlen(path)); 8669bd392adSmrg 8675324fb0dSmrg ret = is_file_ok(path, O_RDONLY); 8685324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8695324fb0dSmrg 8705324fb0dSmrg for (j = 0; j < AMDGPU_RAS_BLOCK__LAST; j++) { 8715324fb0dSmrg ret = amdgpu_ras_is_feature_supported(j); 8725324fb0dSmrg if (ret <= 0) 8735324fb0dSmrg continue; 8745324fb0dSmrg 8755324fb0dSmrg if (!((1 << j) & ras_block_mask_basic)) 8765324fb0dSmrg continue; 8775324fb0dSmrg 8789bd392adSmrg snprintf(path, sizeof(path), "%s", get_ras_sysfs_root()); 8799bd392adSmrg strncat(path, ras_block_str(j), sizeof(path) - strlen(path)); 8809bd392adSmrg strncat(path, "_err_count", sizeof(path) - strlen(path)); 8819bd392adSmrg 8825324fb0dSmrg ret = is_file_ok(path, O_RDONLY); 8835324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8845324fb0dSmrg 8859bd392adSmrg snprintf(path, sizeof(path), "%s", get_ras_debugfs_root()); 8869bd392adSmrg strncat(path, ras_block_str(j), sizeof(path) - strlen(path)); 8879bd392adSmrg strncat(path, "_err_inject", sizeof(path) - strlen(path)); 8889bd392adSmrg 8895324fb0dSmrg ret = is_file_ok(path, O_WRONLY); 8905324fb0dSmrg CU_ASSERT_EQUAL(ret, 0); 8915324fb0dSmrg } 8925324fb0dSmrg } 8935324fb0dSmrg} 8949bd392adSmrg 8959bd392adSmrgCU_TestInfo ras_tests[] = { 8969bd392adSmrg { "ras basic test", amdgpu_ras_basic_test }, 8979bd392adSmrg { "ras query test", amdgpu_ras_query_test }, 8989bd392adSmrg { "ras inject test", amdgpu_ras_inject_test }, 8999bd392adSmrg { "ras disable test", amdgpu_ras_disable_test }, 9009bd392adSmrg { "ras enable test", amdgpu_ras_enable_test }, 9019bd392adSmrg CU_TEST_INFO_NULL, 9029bd392adSmrg}; 9039bd392adSmrg 9049bd392adSmrgCU_BOOL suite_ras_tests_enable(void) 9059bd392adSmrg{ 9069bd392adSmrg amdgpu_device_handle device_handle; 9079bd392adSmrg uint32_t major_version; 9089bd392adSmrg uint32_t minor_version; 9099bd392adSmrg int i; 9109bd392adSmrg drmDevicePtr device; 9119bd392adSmrg 9129bd392adSmrg for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) { 9139bd392adSmrg if (amdgpu_device_initialize(drm_amdgpu[i], &major_version, 9149bd392adSmrg &minor_version, &device_handle)) 9159bd392adSmrg continue; 9169bd392adSmrg 9179bd392adSmrg if (drmGetDevice2(drm_amdgpu[i], 9189bd392adSmrg DRM_DEVICE_GET_PCI_REVISION, 9199bd392adSmrg &device)) 9209bd392adSmrg continue; 9219bd392adSmrg 9229bd392adSmrg if (device->bustype == DRM_BUS_PCI && 9239bd392adSmrg amdgpu_ras_lookup_capability(device_handle)) { 9249bd392adSmrg amdgpu_device_deinitialize(device_handle); 9259bd392adSmrg return CU_TRUE; 9269bd392adSmrg } 9279bd392adSmrg 9289bd392adSmrg if (amdgpu_device_deinitialize(device_handle)) 9299bd392adSmrg continue; 9309bd392adSmrg } 9319bd392adSmrg 9329bd392adSmrg return CU_FALSE; 9339bd392adSmrg} 9349bd392adSmrg 9359bd392adSmrgint suite_ras_tests_init(void) 9369bd392adSmrg{ 9379bd392adSmrg drmDevicePtr device; 9389bd392adSmrg amdgpu_device_handle device_handle; 9399bd392adSmrg uint32_t major_version; 9409bd392adSmrg uint32_t minor_version; 9419bd392adSmrg uint32_t capability; 9429bd392adSmrg struct ras_test_mask test_mask; 9439bd392adSmrg int id; 9449bd392adSmrg int i; 9459bd392adSmrg int r; 9469bd392adSmrg 9479bd392adSmrg for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) { 9489bd392adSmrg r = amdgpu_device_initialize(drm_amdgpu[i], &major_version, 9499bd392adSmrg &minor_version, &device_handle); 9509bd392adSmrg if (r) 9519bd392adSmrg continue; 9529bd392adSmrg 9539bd392adSmrg if (drmGetDevice2(drm_amdgpu[i], 9549bd392adSmrg DRM_DEVICE_GET_PCI_REVISION, 9559bd392adSmrg &device)) { 9569bd392adSmrg amdgpu_device_deinitialize(device_handle); 9579bd392adSmrg continue; 9589bd392adSmrg } 9599bd392adSmrg 9609bd392adSmrg if (device->bustype != DRM_BUS_PCI) { 9619bd392adSmrg amdgpu_device_deinitialize(device_handle); 9629bd392adSmrg continue; 9639bd392adSmrg } 9649bd392adSmrg 9659bd392adSmrg capability = amdgpu_ras_lookup_capability(device_handle); 9669bd392adSmrg if (capability == 0) { 9679bd392adSmrg amdgpu_device_deinitialize(device_handle); 9689bd392adSmrg continue; 9699bd392adSmrg 9709bd392adSmrg } 9719bd392adSmrg 9729bd392adSmrg id = amdgpu_ras_lookup_id(device); 9739bd392adSmrg if (id == -1) { 9749bd392adSmrg amdgpu_device_deinitialize(device_handle); 9759bd392adSmrg continue; 9769bd392adSmrg } 9779bd392adSmrg 9789bd392adSmrg test_mask = amdgpu_ras_get_test_mask(device); 9799bd392adSmrg 9809bd392adSmrg devices[devices_count++] = (struct amdgpu_ras_data) { 9819bd392adSmrg device_handle, id, capability, test_mask, 9829bd392adSmrg }; 9839bd392adSmrg } 9849bd392adSmrg 9859bd392adSmrg if (devices_count == 0) 9869bd392adSmrg return CUE_SINIT_FAILED; 9879bd392adSmrg 9889bd392adSmrg return CUE_SUCCESS; 9899bd392adSmrg} 9909bd392adSmrg 9919bd392adSmrgint suite_ras_tests_clean(void) 9929bd392adSmrg{ 9939bd392adSmrg int r; 9949bd392adSmrg int i; 9959bd392adSmrg int ret = CUE_SUCCESS; 9969bd392adSmrg 9979bd392adSmrg for (i = 0; i < devices_count; i++) { 9989bd392adSmrg r = amdgpu_device_deinitialize(devices[i].device_handle); 9999bd392adSmrg if (r) 10009bd392adSmrg ret = CUE_SCLEAN_FAILED; 10019bd392adSmrg } 10029bd392adSmrg return ret; 10039bd392adSmrg} 1004