ras_tests.c revision 88f8a8d2
15324fb0dSmrg/*
25324fb0dSmrg * Copyright 2017 Advanced Micro Devices, Inc.
35324fb0dSmrg *
45324fb0dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
55324fb0dSmrg * copy of this software and associated documentation files (the "Software"),
65324fb0dSmrg * to deal in the Software without restriction, including without limitation
75324fb0dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
85324fb0dSmrg * and/or sell copies of the Software, and to permit persons to whom the
95324fb0dSmrg * Software is furnished to do so, subject to the following conditions:
105324fb0dSmrg *
115324fb0dSmrg * The above copyright notice and this permission notice shall be included in
125324fb0dSmrg * all copies or substantial portions of the Software.
135324fb0dSmrg *
145324fb0dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
155324fb0dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
165324fb0dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
175324fb0dSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
185324fb0dSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
195324fb0dSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
205324fb0dSmrg * OTHER DEALINGS IN THE SOFTWARE.
215324fb0dSmrg *
225324fb0dSmrg*/
235324fb0dSmrg
245324fb0dSmrg#include "CUnit/Basic.h"
255324fb0dSmrg
265324fb0dSmrg#include "amdgpu_test.h"
275324fb0dSmrg#include "amdgpu_drm.h"
285324fb0dSmrg#include "amdgpu_internal.h"
295324fb0dSmrg#include <unistd.h>
305324fb0dSmrg#include <fcntl.h>
315324fb0dSmrg#include <stdio.h>
325324fb0dSmrg#include "xf86drm.h"
335324fb0dSmrg
3488f8a8d2Smrg#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
3588f8a8d2Smrg
365324fb0dSmrgconst char *ras_block_string[] = {
375324fb0dSmrg	"umc",
385324fb0dSmrg	"sdma",
395324fb0dSmrg	"gfx",
405324fb0dSmrg	"mmhub",
415324fb0dSmrg	"athub",
425324fb0dSmrg	"pcie_bif",
435324fb0dSmrg	"hdp",
445324fb0dSmrg	"xgmi_wafl",
455324fb0dSmrg	"df",
465324fb0dSmrg	"smn",
475324fb0dSmrg	"sem",
485324fb0dSmrg	"mp0",
495324fb0dSmrg	"mp1",
505324fb0dSmrg	"fuse",
515324fb0dSmrg};
525324fb0dSmrg
535324fb0dSmrg#define ras_block_str(i) (ras_block_string[i])
545324fb0dSmrg
555324fb0dSmrgenum amdgpu_ras_block {
565324fb0dSmrg	AMDGPU_RAS_BLOCK__UMC = 0,
575324fb0dSmrg	AMDGPU_RAS_BLOCK__SDMA,
585324fb0dSmrg	AMDGPU_RAS_BLOCK__GFX,
595324fb0dSmrg	AMDGPU_RAS_BLOCK__MMHUB,
605324fb0dSmrg	AMDGPU_RAS_BLOCK__ATHUB,
615324fb0dSmrg	AMDGPU_RAS_BLOCK__PCIE_BIF,
625324fb0dSmrg	AMDGPU_RAS_BLOCK__HDP,
635324fb0dSmrg	AMDGPU_RAS_BLOCK__XGMI_WAFL,
645324fb0dSmrg	AMDGPU_RAS_BLOCK__DF,
655324fb0dSmrg	AMDGPU_RAS_BLOCK__SMN,
665324fb0dSmrg	AMDGPU_RAS_BLOCK__SEM,
675324fb0dSmrg	AMDGPU_RAS_BLOCK__MP0,
685324fb0dSmrg	AMDGPU_RAS_BLOCK__MP1,
695324fb0dSmrg	AMDGPU_RAS_BLOCK__FUSE,
705324fb0dSmrg
715324fb0dSmrg	AMDGPU_RAS_BLOCK__LAST
725324fb0dSmrg};
735324fb0dSmrg
745324fb0dSmrg#define AMDGPU_RAS_BLOCK_COUNT  AMDGPU_RAS_BLOCK__LAST
755324fb0dSmrg#define AMDGPU_RAS_BLOCK_MASK   ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
765324fb0dSmrg
7788f8a8d2Smrgenum amdgpu_ras_gfx_subblock {
7888f8a8d2Smrg	/* CPC */
7988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
8088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
8188f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
8288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
8388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
8488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
8588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
8688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
8788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
8888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
8988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
9088f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
9188f8a8d2Smrg	/* CPF */
9288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
9388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
9488f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
9588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
9688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
9788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
9888f8a8d2Smrg	/* CPG */
9988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
10088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
10188f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
10288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
10388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
10488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
10588f8a8d2Smrg	/* GDS */
10688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
10788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
10888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
10988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
11088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
11188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
11288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
11388f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
11488f8a8d2Smrg	/* SPI */
11588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
11688f8a8d2Smrg	/* SQ */
11788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
11888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
11988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
12088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
12188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
12288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
12388f8a8d2Smrg	/* SQC (3 ranges) */
12488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
12588f8a8d2Smrg	/* SQC range 0 */
12688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
12788f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
12888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
12988f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
13088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
13188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
13288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
13388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
13488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
13588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
13688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
13788f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
13888f8a8d2Smrg	/* SQC range 1 */
13988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
14088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
14188f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
14288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
14388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
14488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
14588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
14688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
14788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
14888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
14988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
15088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
15188f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
15288f8a8d2Smrg	/* SQC range 2 */
15388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
15488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
15588f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
15688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
15788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
15888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
15988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
16088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
16188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
16288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
16388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
16488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
16588f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
16688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
16788f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
16888f8a8d2Smrg	/* TA */
16988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
17088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
17188f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
17288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
17388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
17488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
17588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
17688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
17788f8a8d2Smrg	/* TCA */
17888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
17988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
18088f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
18188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
18288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
18388f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
18488f8a8d2Smrg	/* TCC (5 sub-ranges) */
18588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
18688f8a8d2Smrg	/* TCC range 0 */
18788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
18888f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
18988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
19088f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
19188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
19288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
19388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
19488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
19588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
19688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
19788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
19888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
19988f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
20088f8a8d2Smrg	/* TCC range 1 */
20188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
20288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
20388f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
20488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
20588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
20688f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
20788f8a8d2Smrg	/* TCC range 2 */
20888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
20988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
21088f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
21188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
21288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
21388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
21488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
21588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
21688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
21788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
21888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
21988f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
22088f8a8d2Smrg	/* TCC range 3 */
22188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
22288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
22388f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
22488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
22588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
22688f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
22788f8a8d2Smrg	/* TCC range 4 */
22888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
22988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
23088f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
23188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
23288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
23388f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
23488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
23588f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
23688f8a8d2Smrg	/* TCI */
23788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
23888f8a8d2Smrg	/* TCP */
23988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
24088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
24188f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
24288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
24388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
24488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
24588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
24688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
24788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
24888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
24988f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
25088f8a8d2Smrg	/* TD */
25188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
25288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
25388f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
25488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
25588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
25688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
25788f8a8d2Smrg	/* EA (3 sub-ranges) */
25888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
25988f8a8d2Smrg	/* EA range 0 */
26088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
26188f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
26288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
26388f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
26488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
26588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
26688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
26788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
26888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
26988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
27088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
27188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
27288f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
27388f8a8d2Smrg	/* EA range 1 */
27488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
27588f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
27688f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
27788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
27888f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
27988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
28088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
28188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
28288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
28388f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
28488f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
28588f8a8d2Smrg	/* EA range 2 */
28688f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
28788f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
28888f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
28988f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
29088f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
29188f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
29288f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
29388f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
29488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
29588f8a8d2Smrg		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
29688f8a8d2Smrg	/* UTC VM L2 bank */
29788f8a8d2Smrg	AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
29888f8a8d2Smrg	/* UTC VM walker */
29988f8a8d2Smrg	AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
30088f8a8d2Smrg	/* UTC ATC L2 2MB cache */
30188f8a8d2Smrg	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
30288f8a8d2Smrg	/* UTC ATC L2 4KB cache */
30388f8a8d2Smrg	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
30488f8a8d2Smrg	AMDGPU_RAS_BLOCK__GFX_MAX
30588f8a8d2Smrg};
30688f8a8d2Smrg
3075324fb0dSmrgenum amdgpu_ras_error_type {
30888f8a8d2Smrg	AMDGPU_RAS_ERROR__NONE					= 0,
30988f8a8d2Smrg	AMDGPU_RAS_ERROR__PARITY				= 1,
31088f8a8d2Smrg	AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE			= 2,
31188f8a8d2Smrg	AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE			= 4,
31288f8a8d2Smrg	AMDGPU_RAS_ERROR__POISON				= 8,
31388f8a8d2Smrg};
31488f8a8d2Smrg
31588f8a8d2Smrgstruct ras_inject_test_config {
31688f8a8d2Smrg	char name[64];
31788f8a8d2Smrg	char block[32];
31888f8a8d2Smrg	int sub_block;
31988f8a8d2Smrg	enum amdgpu_ras_error_type type;
32088f8a8d2Smrg	uint64_t address;
32188f8a8d2Smrg	uint64_t value;
3225324fb0dSmrg};
3235324fb0dSmrg
3245324fb0dSmrgstruct ras_common_if {
3255324fb0dSmrg	enum amdgpu_ras_block block;
3265324fb0dSmrg	enum amdgpu_ras_error_type type;
3275324fb0dSmrg	uint32_t sub_block_index;
3285324fb0dSmrg	char name[32];
3295324fb0dSmrg};
3305324fb0dSmrg
3315324fb0dSmrgstruct ras_inject_if {
3325324fb0dSmrg	struct ras_common_if head;
3335324fb0dSmrg	uint64_t address;
3345324fb0dSmrg	uint64_t value;
3355324fb0dSmrg};
3365324fb0dSmrg
3375324fb0dSmrgstruct ras_debug_if {
3385324fb0dSmrg	union {
3395324fb0dSmrg		struct ras_common_if head;
3405324fb0dSmrg		struct ras_inject_if inject;
3415324fb0dSmrg	};
3425324fb0dSmrg	int op;
3435324fb0dSmrg};
3445324fb0dSmrg/* for now, only umc, gfx, sdma has implemented. */
34588f8a8d2Smrg#define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\
34688f8a8d2Smrg		(1 << AMDGPU_RAS_BLOCK__GFX))
34788f8a8d2Smrg#define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\
34888f8a8d2Smrg		(1 << AMDGPU_RAS_BLOCK__GFX))
3495324fb0dSmrg#define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\
3505324fb0dSmrg		(1 << AMDGPU_RAS_BLOCK__SDMA) |\
3515324fb0dSmrg		(1 << AMDGPU_RAS_BLOCK__GFX))
3525324fb0dSmrg
3535324fb0dSmrgstatic uint32_t ras_block_mask_inject = DEFAULT_RAS_BLOCK_MASK_INJECT;
3545324fb0dSmrgstatic uint32_t ras_block_mask_query = DEFAULT_RAS_BLOCK_MASK_INJECT;
3555324fb0dSmrgstatic uint32_t ras_block_mask_basic = DEFAULT_RAS_BLOCK_MASK_BASIC;
3565324fb0dSmrg
3575324fb0dSmrgstruct ras_test_mask {
3585324fb0dSmrg	uint32_t inject_mask;
3595324fb0dSmrg	uint32_t query_mask;
3605324fb0dSmrg	uint32_t basic_mask;
3615324fb0dSmrg};
3625324fb0dSmrg
3635324fb0dSmrgstruct amdgpu_ras_data {
3645324fb0dSmrg	amdgpu_device_handle device_handle;
3655324fb0dSmrg	uint32_t  id;
3665324fb0dSmrg	uint32_t  capability;
3675324fb0dSmrg	struct ras_test_mask test_mask;
3685324fb0dSmrg};
3695324fb0dSmrg
3705324fb0dSmrg/* all devices who has ras supported */
3715324fb0dSmrgstatic struct amdgpu_ras_data devices[MAX_CARDS_SUPPORTED];
3725324fb0dSmrgstatic int devices_count;
3735324fb0dSmrg
3745324fb0dSmrgstruct ras_DID_test_mask{
3755324fb0dSmrg	uint16_t device_id;
3765324fb0dSmrg	uint16_t revision_id;
3775324fb0dSmrg	struct ras_test_mask test_mask;
3785324fb0dSmrg};
3795324fb0dSmrg
3805324fb0dSmrg/* white list for inject test. */
3815324fb0dSmrg#define RAS_BLOCK_MASK_ALL {\
3825324fb0dSmrg	DEFAULT_RAS_BLOCK_MASK_INJECT,\
3835324fb0dSmrg	DEFAULT_RAS_BLOCK_MASK_QUERY,\
3845324fb0dSmrg	DEFAULT_RAS_BLOCK_MASK_BASIC\
3855324fb0dSmrg}
3865324fb0dSmrg
3875324fb0dSmrg#define RAS_BLOCK_MASK_QUERY_BASIC {\
3885324fb0dSmrg	0,\
3895324fb0dSmrg	DEFAULT_RAS_BLOCK_MASK_QUERY,\
3905324fb0dSmrg	DEFAULT_RAS_BLOCK_MASK_BASIC\
3915324fb0dSmrg}
3925324fb0dSmrg
39388f8a8d2Smrgstatic const struct ras_inject_test_config umc_ras_inject_test[] = {
39488f8a8d2Smrg	{"ras_umc.1.0", "umc", 0, AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
39588f8a8d2Smrg};
39688f8a8d2Smrg
39788f8a8d2Smrgstatic const struct ras_inject_test_config gfx_ras_inject_test[] = {
39888f8a8d2Smrg	{"ras_gfx.2.0", "gfx", AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
39988f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
40088f8a8d2Smrg	{"ras_gfx.2.1", "gfx", AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
40188f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
40288f8a8d2Smrg	{"ras_gfx.2.2", "gfx", AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
40388f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
40488f8a8d2Smrg	{"ras_gfx.2.3", "gfx", AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
40588f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
40688f8a8d2Smrg	{"ras_gfx.2.4", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
40788f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
40888f8a8d2Smrg	{"ras_gfx.2.5", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM,
40988f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
41088f8a8d2Smrg	{"ras_gfx.2.6", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM,
41188f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
41288f8a8d2Smrg	{"ras_gfx.2.7", "gfx", AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO,
41388f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
41488f8a8d2Smrg	{"ras_gfx.2.8", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA,
41588f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
41688f8a8d2Smrg	{"ras_gfx.2.9", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
41788f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
41888f8a8d2Smrg	{"ras_gfx.2.10", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
41988f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
42088f8a8d2Smrg	{"ras_gfx.2.11", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
42188f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
42288f8a8d2Smrg	{"ras_gfx.2.12", "gfx", AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM,
42388f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
42488f8a8d2Smrg	{"ras_gfx.2.13", "gfx", AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO,
42588f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
42688f8a8d2Smrg	{"ras_gfx.2.14", "gfx", AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM,
42788f8a8d2Smrg		AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
42888f8a8d2Smrg};
42988f8a8d2Smrg
4305324fb0dSmrgstatic const struct ras_DID_test_mask ras_DID_array[] = {
4315324fb0dSmrg	{0x66a1, 0x00, RAS_BLOCK_MASK_ALL},
4325324fb0dSmrg	{0x66a1, 0x01, RAS_BLOCK_MASK_ALL},
4335324fb0dSmrg	{0x66a1, 0x04, RAS_BLOCK_MASK_ALL},
4345324fb0dSmrg};
4355324fb0dSmrg
43688f8a8d2Smrgstatic uint32_t amdgpu_ras_find_block_id_by_name(const char *name)
43788f8a8d2Smrg{
43888f8a8d2Smrg	int i;
43988f8a8d2Smrg
44088f8a8d2Smrg	for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
44188f8a8d2Smrg		if (strcmp(name, ras_block_string[i]) == 0)
44288f8a8d2Smrg			return i;
44388f8a8d2Smrg	}
44488f8a8d2Smrg
44588f8a8d2Smrg	return ARRAY_SIZE(ras_block_string);
44688f8a8d2Smrg}
44788f8a8d2Smrg
44888f8a8d2Smrgstatic char *amdgpu_ras_get_error_type_id(enum amdgpu_ras_error_type type)
44988f8a8d2Smrg{
45088f8a8d2Smrg	switch (type) {
45188f8a8d2Smrg	case AMDGPU_RAS_ERROR__PARITY:
45288f8a8d2Smrg		return "parity";
45388f8a8d2Smrg	case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
45488f8a8d2Smrg		return "single_correctable";
45588f8a8d2Smrg	case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
45688f8a8d2Smrg		return "multi_uncorrectable";
45788f8a8d2Smrg	case AMDGPU_RAS_ERROR__POISON:
45888f8a8d2Smrg		return "poison";
45988f8a8d2Smrg	case AMDGPU_RAS_ERROR__NONE:
46088f8a8d2Smrg	default:
46188f8a8d2Smrg		return NULL;
46288f8a8d2Smrg	}
46388f8a8d2Smrg}
46488f8a8d2Smrg
4655324fb0dSmrgstatic struct ras_test_mask amdgpu_ras_get_test_mask(drmDevicePtr device)
4665324fb0dSmrg{
4675324fb0dSmrg	int i;
4685324fb0dSmrg	static struct ras_test_mask default_test_mask = RAS_BLOCK_MASK_QUERY_BASIC;
4695324fb0dSmrg
4705324fb0dSmrg	for (i = 0; i < sizeof(ras_DID_array) / sizeof(ras_DID_array[0]); i++) {
4715324fb0dSmrg		if (ras_DID_array[i].device_id == device->deviceinfo.pci->device_id &&
4725324fb0dSmrg				ras_DID_array[i].revision_id == device->deviceinfo.pci->revision_id)
4735324fb0dSmrg			return ras_DID_array[i].test_mask;
4745324fb0dSmrg	}
4755324fb0dSmrg	return default_test_mask;
4765324fb0dSmrg}
4775324fb0dSmrg
4785324fb0dSmrgstatic uint32_t amdgpu_ras_lookup_capability(amdgpu_device_handle device_handle)
4795324fb0dSmrg{
4805324fb0dSmrg	union {
4815324fb0dSmrg		uint64_t feature_mask;
4825324fb0dSmrg		struct {
4835324fb0dSmrg			uint32_t enabled_features;
4845324fb0dSmrg			uint32_t supported_features;
4855324fb0dSmrg		};
4865324fb0dSmrg	} features = { 0 };
4875324fb0dSmrg	int ret;
4885324fb0dSmrg
4895324fb0dSmrg	ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
4905324fb0dSmrg			sizeof(features), &features);
4915324fb0dSmrg	if (ret)
4925324fb0dSmrg		return 0;
4935324fb0dSmrg
4945324fb0dSmrg	return features.supported_features;
4955324fb0dSmrg}
4965324fb0dSmrg
4975324fb0dSmrgstatic int get_file_contents(char *file, char *buf, int size);
4985324fb0dSmrg
4995324fb0dSmrgstatic int amdgpu_ras_lookup_id(drmDevicePtr device)
5005324fb0dSmrg{
5015324fb0dSmrg	char path[1024];
5025324fb0dSmrg	char str[128];
5035324fb0dSmrg	drmPciBusInfo info;
5045324fb0dSmrg	int i;
5055324fb0dSmrg	int ret;
5065324fb0dSmrg
5075324fb0dSmrg	for (i = 0; i < MAX_CARDS_SUPPORTED; i++) {
5085324fb0dSmrg		memset(str, 0, sizeof(str));
5095324fb0dSmrg		memset(&info, 0, sizeof(info));
5105324fb0dSmrg		sprintf(path, "/sys/kernel/debug/dri/%d/name", i);
5115324fb0dSmrg		if (get_file_contents(path, str, sizeof(str)) <= 0)
5125324fb0dSmrg			continue;
5135324fb0dSmrg
5145324fb0dSmrg		ret = sscanf(str, "amdgpu dev=%04hx:%02hhx:%02hhx.%01hhx",
5155324fb0dSmrg				&info.domain, &info.bus, &info.dev, &info.func);
5165324fb0dSmrg		if (ret != 4)
5175324fb0dSmrg			continue;
5185324fb0dSmrg
5195324fb0dSmrg		if (memcmp(&info, device->businfo.pci, sizeof(info)) == 0)
5205324fb0dSmrg				return i;
5215324fb0dSmrg	}
5225324fb0dSmrg	return -1;
5235324fb0dSmrg}
5245324fb0dSmrg
5255324fb0dSmrgCU_BOOL suite_ras_tests_enable(void)
5265324fb0dSmrg{
5275324fb0dSmrg	amdgpu_device_handle device_handle;
5285324fb0dSmrg	uint32_t  major_version;
5295324fb0dSmrg	uint32_t  minor_version;
5305324fb0dSmrg	int i;
5315324fb0dSmrg	drmDevicePtr device;
5325324fb0dSmrg
5335324fb0dSmrg	for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
5345324fb0dSmrg		if (amdgpu_device_initialize(drm_amdgpu[i], &major_version,
5355324fb0dSmrg					&minor_version, &device_handle))
5365324fb0dSmrg			continue;
5375324fb0dSmrg
5385324fb0dSmrg		if (drmGetDevice2(drm_amdgpu[i],
5395324fb0dSmrg					DRM_DEVICE_GET_PCI_REVISION,
5405324fb0dSmrg					&device))
5415324fb0dSmrg			continue;
5425324fb0dSmrg
5435324fb0dSmrg		if (device->bustype == DRM_BUS_PCI &&
5445324fb0dSmrg				amdgpu_ras_lookup_capability(device_handle)) {
5455324fb0dSmrg			amdgpu_device_deinitialize(device_handle);
5465324fb0dSmrg			return CU_TRUE;
5475324fb0dSmrg		}
5485324fb0dSmrg
5495324fb0dSmrg		if (amdgpu_device_deinitialize(device_handle))
5505324fb0dSmrg			continue;
5515324fb0dSmrg	}
5525324fb0dSmrg
5535324fb0dSmrg	return CU_FALSE;
5545324fb0dSmrg}
5555324fb0dSmrg
5565324fb0dSmrgint suite_ras_tests_init(void)
5575324fb0dSmrg{
5585324fb0dSmrg	drmDevicePtr device;
5595324fb0dSmrg	amdgpu_device_handle device_handle;
5605324fb0dSmrg	uint32_t  major_version;
5615324fb0dSmrg	uint32_t  minor_version;
5625324fb0dSmrg	uint32_t  capability;
5635324fb0dSmrg	struct ras_test_mask test_mask;
5645324fb0dSmrg	int id;
5655324fb0dSmrg	int i;
5665324fb0dSmrg	int r;
5675324fb0dSmrg
5685324fb0dSmrg	for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
5695324fb0dSmrg		r = amdgpu_device_initialize(drm_amdgpu[i], &major_version,
5705324fb0dSmrg				&minor_version, &device_handle);
5715324fb0dSmrg		if (r)
5725324fb0dSmrg			continue;
5735324fb0dSmrg
5745324fb0dSmrg		if (drmGetDevice2(drm_amdgpu[i],
5755324fb0dSmrg					DRM_DEVICE_GET_PCI_REVISION,
5765324fb0dSmrg					&device)) {
5775324fb0dSmrg			amdgpu_device_deinitialize(device_handle);
5785324fb0dSmrg			continue;
5795324fb0dSmrg		}
5805324fb0dSmrg
5815324fb0dSmrg		if (device->bustype != DRM_BUS_PCI) {
5825324fb0dSmrg			amdgpu_device_deinitialize(device_handle);
5835324fb0dSmrg			continue;
5845324fb0dSmrg		}
5855324fb0dSmrg
5865324fb0dSmrg		capability = amdgpu_ras_lookup_capability(device_handle);
5875324fb0dSmrg		if (capability == 0) {
5885324fb0dSmrg			amdgpu_device_deinitialize(device_handle);
5895324fb0dSmrg			continue;
5905324fb0dSmrg
5915324fb0dSmrg		}
5925324fb0dSmrg
5935324fb0dSmrg		id = amdgpu_ras_lookup_id(device);
5945324fb0dSmrg		if (id == -1) {
5955324fb0dSmrg			amdgpu_device_deinitialize(device_handle);
5965324fb0dSmrg			continue;
5975324fb0dSmrg		}
5985324fb0dSmrg
5995324fb0dSmrg		test_mask = amdgpu_ras_get_test_mask(device);
6005324fb0dSmrg
6015324fb0dSmrg		devices[devices_count++] = (struct amdgpu_ras_data) {
6025324fb0dSmrg			device_handle, id, capability, test_mask,
6035324fb0dSmrg		};
6045324fb0dSmrg	}
6055324fb0dSmrg
6065324fb0dSmrg	if (devices_count == 0)
6075324fb0dSmrg		return CUE_SINIT_FAILED;
6085324fb0dSmrg
6095324fb0dSmrg	return CUE_SUCCESS;
6105324fb0dSmrg}
6115324fb0dSmrg
6125324fb0dSmrgint suite_ras_tests_clean(void)
6135324fb0dSmrg{
6145324fb0dSmrg	int r;
6155324fb0dSmrg	int i;
6165324fb0dSmrg	int ret = CUE_SUCCESS;
6175324fb0dSmrg
6185324fb0dSmrg	for (i = 0; i < devices_count; i++) {
6195324fb0dSmrg		r = amdgpu_device_deinitialize(devices[i].device_handle);
6205324fb0dSmrg		if (r)
6215324fb0dSmrg			ret = CUE_SCLEAN_FAILED;
6225324fb0dSmrg	}
6235324fb0dSmrg	return ret;
6245324fb0dSmrg}
6255324fb0dSmrg
6265324fb0dSmrgstatic void amdgpu_ras_disable_test(void);
6275324fb0dSmrgstatic void amdgpu_ras_enable_test(void);
6285324fb0dSmrgstatic void amdgpu_ras_inject_test(void);
6295324fb0dSmrgstatic void amdgpu_ras_query_test(void);
6305324fb0dSmrgstatic void amdgpu_ras_basic_test(void);
6315324fb0dSmrg
6325324fb0dSmrgCU_TestInfo ras_tests[] = {
6335324fb0dSmrg	{ "ras basic test",	amdgpu_ras_basic_test },
6345324fb0dSmrg	{ "ras query test",	amdgpu_ras_query_test },
6355324fb0dSmrg	{ "ras inject test",	amdgpu_ras_inject_test },
6365324fb0dSmrg	{ "ras disable test",	amdgpu_ras_disable_test },
6375324fb0dSmrg#if 0
6385324fb0dSmrg	{ "ras enable test",	amdgpu_ras_enable_test },
6395324fb0dSmrg#endif
6405324fb0dSmrg	CU_TEST_INFO_NULL,
6415324fb0dSmrg};
6425324fb0dSmrg
6435324fb0dSmrg//helpers
6445324fb0dSmrg
6455324fb0dSmrgstatic int test_card;
6465324fb0dSmrgstatic char sysfs_path[1024];
6475324fb0dSmrgstatic char debugfs_path[1024];
6485324fb0dSmrgstatic uint32_t ras_mask;
6495324fb0dSmrgstatic amdgpu_device_handle device_handle;
6505324fb0dSmrg
6515324fb0dSmrgstatic int set_test_card(int card)
6525324fb0dSmrg{
6535324fb0dSmrg	int i;
6545324fb0dSmrg
6555324fb0dSmrg	test_card = card;
6565324fb0dSmrg	sprintf(sysfs_path, "/sys/class/drm/card%d/device/ras/", devices[card].id);
6575324fb0dSmrg	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/ras/", devices[card].id);
6585324fb0dSmrg	ras_mask = devices[card].capability;
6595324fb0dSmrg	device_handle = devices[card].device_handle;
6605324fb0dSmrg	ras_block_mask_inject = devices[card].test_mask.inject_mask;
6615324fb0dSmrg	ras_block_mask_query = devices[card].test_mask.query_mask;
6625324fb0dSmrg	ras_block_mask_basic = devices[card].test_mask.basic_mask;
6635324fb0dSmrg
6645324fb0dSmrg	return 0;
6655324fb0dSmrg}
6665324fb0dSmrg
6675324fb0dSmrgstatic const char *get_ras_sysfs_root(void)
6685324fb0dSmrg{
6695324fb0dSmrg	return sysfs_path;
6705324fb0dSmrg}
6715324fb0dSmrg
6725324fb0dSmrgstatic const char *get_ras_debugfs_root(void)
6735324fb0dSmrg{
6745324fb0dSmrg	return debugfs_path;
6755324fb0dSmrg}
6765324fb0dSmrg
6775324fb0dSmrgstatic int set_file_contents(char *file, char *buf, int size)
6785324fb0dSmrg{
6795324fb0dSmrg	int n, fd;
6805324fb0dSmrg	fd = open(file, O_WRONLY);
6815324fb0dSmrg	if (fd == -1)
6825324fb0dSmrg		return -1;
6835324fb0dSmrg	n = write(fd, buf, size);
6845324fb0dSmrg	close(fd);
6855324fb0dSmrg	return n;
6865324fb0dSmrg}
6875324fb0dSmrg
6885324fb0dSmrgstatic int get_file_contents(char *file, char *buf, int size)
6895324fb0dSmrg{
6905324fb0dSmrg	int n, fd;
6915324fb0dSmrg	fd = open(file, O_RDONLY);
6925324fb0dSmrg	if (fd == -1)
6935324fb0dSmrg		return -1;
6945324fb0dSmrg	n = read(fd, buf, size);
6955324fb0dSmrg	close(fd);
6965324fb0dSmrg	return n;
6975324fb0dSmrg}
6985324fb0dSmrg
6995324fb0dSmrgstatic int is_file_ok(char *file, int flags)
7005324fb0dSmrg{
7015324fb0dSmrg	int fd;
7025324fb0dSmrg
7035324fb0dSmrg	fd = open(file, flags);
7045324fb0dSmrg	if (fd == -1)
7055324fb0dSmrg		return -1;
7065324fb0dSmrg	close(fd);
7075324fb0dSmrg	return 0;
7085324fb0dSmrg}
7095324fb0dSmrg
7105324fb0dSmrgstatic int amdgpu_ras_is_feature_enabled(enum amdgpu_ras_block block)
7115324fb0dSmrg{
7125324fb0dSmrg	uint32_t feature_mask;
7135324fb0dSmrg	int ret;
7145324fb0dSmrg
7155324fb0dSmrg	ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
7165324fb0dSmrg			sizeof(feature_mask), &feature_mask);
7175324fb0dSmrg	if (ret)
7185324fb0dSmrg		return -1;
7195324fb0dSmrg
7205324fb0dSmrg	return (1 << block) & feature_mask;
7215324fb0dSmrg}
7225324fb0dSmrg
7235324fb0dSmrgstatic int amdgpu_ras_is_feature_supported(enum amdgpu_ras_block block)
7245324fb0dSmrg{
7255324fb0dSmrg	return (1 << block) & ras_mask;
7265324fb0dSmrg}
7275324fb0dSmrg
7285324fb0dSmrgstatic int amdgpu_ras_invoke(struct ras_debug_if *data)
7295324fb0dSmrg{
7305324fb0dSmrg	char path[1024];
7315324fb0dSmrg	int ret;
7325324fb0dSmrg
7335324fb0dSmrg	sprintf(path, "%s%s", get_ras_debugfs_root(), "ras_ctrl");
7345324fb0dSmrg
7355324fb0dSmrg	ret = set_file_contents(path, (char *)data, sizeof(*data))
7365324fb0dSmrg		- sizeof(*data);
7375324fb0dSmrg	return ret;
7385324fb0dSmrg}
7395324fb0dSmrg
7405324fb0dSmrgstatic int amdgpu_ras_query_err_count(enum amdgpu_ras_block block,
7415324fb0dSmrg		unsigned long *ue, unsigned long *ce)
7425324fb0dSmrg{
7435324fb0dSmrg	char buf[64];
7445324fb0dSmrg	char name[1024];
7455324fb0dSmrg	int ret;
7465324fb0dSmrg
7475324fb0dSmrg	*ue = *ce = 0;
7485324fb0dSmrg
7495324fb0dSmrg	if (amdgpu_ras_is_feature_supported(block) <= 0)
7505324fb0dSmrg		return -1;
7515324fb0dSmrg
7525324fb0dSmrg	sprintf(name, "%s%s%s", get_ras_sysfs_root(), ras_block_str(block), "_err_count");
7535324fb0dSmrg
7545324fb0dSmrg	if (is_file_ok(name, O_RDONLY))
7555324fb0dSmrg		return 0;
7565324fb0dSmrg
7575324fb0dSmrg	if (get_file_contents(name, buf, sizeof(buf)) <= 0)
7585324fb0dSmrg		return -1;
7595324fb0dSmrg
7605324fb0dSmrg	if (sscanf(buf, "ue: %lu\nce: %lu", ue, ce) != 2)
7615324fb0dSmrg		return -1;
7625324fb0dSmrg
7635324fb0dSmrg	return 0;
7645324fb0dSmrg}
7655324fb0dSmrg
76688f8a8d2Smrgstatic int amdgpu_ras_inject(enum amdgpu_ras_block block,
76788f8a8d2Smrg		uint32_t sub_block, enum amdgpu_ras_error_type type,
76888f8a8d2Smrg		uint64_t address, uint64_t value)
76988f8a8d2Smrg{
77088f8a8d2Smrg	struct ras_debug_if data = { .op = 2, };
77188f8a8d2Smrg	struct ras_inject_if *inject = &data.inject;
77288f8a8d2Smrg	int ret;
77388f8a8d2Smrg
77488f8a8d2Smrg	if (amdgpu_ras_is_feature_enabled(block) <= 0) {
77588f8a8d2Smrg		fprintf(stderr, "block id(%d) is not valid\n", block);
77688f8a8d2Smrg		return -1;
77788f8a8d2Smrg	}
77888f8a8d2Smrg
77988f8a8d2Smrg	inject->head.block = block;
78088f8a8d2Smrg	inject->head.type = type;
78188f8a8d2Smrg	inject->head.sub_block_index = sub_block;
78288f8a8d2Smrg	strncpy(inject->head.name, ras_block_str(block), 32);
78388f8a8d2Smrg	inject->address = address;
78488f8a8d2Smrg	inject->value = value;
78588f8a8d2Smrg
78688f8a8d2Smrg	ret = amdgpu_ras_invoke(&data);
78788f8a8d2Smrg	CU_ASSERT_EQUAL(ret, 0);
78888f8a8d2Smrg	if (ret)
78988f8a8d2Smrg		return -1;
79088f8a8d2Smrg
79188f8a8d2Smrg	return 0;
79288f8a8d2Smrg}
79388f8a8d2Smrg
7945324fb0dSmrg//tests
7955324fb0dSmrgstatic void amdgpu_ras_features_test(int enable)
7965324fb0dSmrg{
7975324fb0dSmrg	struct ras_debug_if data;
7985324fb0dSmrg	int ret;
7995324fb0dSmrg	int i;
8005324fb0dSmrg
8015324fb0dSmrg	data.op = enable;
8025324fb0dSmrg	for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
8035324fb0dSmrg		struct ras_common_if head = {
8045324fb0dSmrg			.block = i,
8055324fb0dSmrg			.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
8065324fb0dSmrg			.sub_block_index = 0,
8075324fb0dSmrg			.name = "",
8085324fb0dSmrg		};
8095324fb0dSmrg
8105324fb0dSmrg		if (amdgpu_ras_is_feature_supported(i) <= 0)
8115324fb0dSmrg			continue;
8125324fb0dSmrg
8135324fb0dSmrg		data.head = head;
8145324fb0dSmrg
8155324fb0dSmrg		ret = amdgpu_ras_invoke(&data);
8165324fb0dSmrg		CU_ASSERT_EQUAL(ret, 0);
8175324fb0dSmrg
8185324fb0dSmrg		if (ret)
8195324fb0dSmrg			continue;
8205324fb0dSmrg
8215324fb0dSmrg		ret = enable ^ amdgpu_ras_is_feature_enabled(i);
8225324fb0dSmrg		CU_ASSERT_EQUAL(ret, 0);
8235324fb0dSmrg	}
8245324fb0dSmrg}
8255324fb0dSmrg
8265324fb0dSmrgstatic void amdgpu_ras_disable_test(void)
8275324fb0dSmrg{
8285324fb0dSmrg	int i;
8295324fb0dSmrg	for (i = 0; i < devices_count; i++) {
8305324fb0dSmrg		set_test_card(i);
8315324fb0dSmrg		amdgpu_ras_features_test(0);
8325324fb0dSmrg	}
8335324fb0dSmrg}
8345324fb0dSmrg
8355324fb0dSmrgstatic void amdgpu_ras_enable_test(void)
8365324fb0dSmrg{
8375324fb0dSmrg	int i;
8385324fb0dSmrg	for (i = 0; i < devices_count; i++) {
8395324fb0dSmrg		set_test_card(i);
8405324fb0dSmrg		amdgpu_ras_features_test(1);
8415324fb0dSmrg	}
8425324fb0dSmrg}
8435324fb0dSmrg
84488f8a8d2Smrgstatic void __amdgpu_ras_ip_inject_test(const struct ras_inject_test_config *ip_test,
84588f8a8d2Smrg					uint32_t size)
8465324fb0dSmrg{
84788f8a8d2Smrg	int i, ret;
84888f8a8d2Smrg	unsigned long old_ue, old_ce;
84988f8a8d2Smrg	unsigned long ue, ce;
85088f8a8d2Smrg	uint32_t block;
85188f8a8d2Smrg	int timeout;
85288f8a8d2Smrg	bool pass;
8535324fb0dSmrg
85488f8a8d2Smrg	for (i = 0; i < size; i++) {
85588f8a8d2Smrg		timeout = 3;
85688f8a8d2Smrg		pass = false;
8575324fb0dSmrg
85888f8a8d2Smrg		block = amdgpu_ras_find_block_id_by_name(ip_test[i].block);
8595324fb0dSmrg
86088f8a8d2Smrg		/* Ensure one valid ip block */
86188f8a8d2Smrg		if (block == ARRAY_SIZE(ras_block_string))
86288f8a8d2Smrg			break;
8635324fb0dSmrg
86488f8a8d2Smrg		/* Ensure RAS feature for the IP block is enabled by kernel */
86588f8a8d2Smrg		if (amdgpu_ras_is_feature_supported(block) <= 0)
86688f8a8d2Smrg			break;
8675324fb0dSmrg
86888f8a8d2Smrg		ret = amdgpu_ras_query_err_count(block, &old_ue, &old_ce);
8695324fb0dSmrg		CU_ASSERT_EQUAL(ret, 0);
8705324fb0dSmrg		if (ret)
87188f8a8d2Smrg			break;
8725324fb0dSmrg
87388f8a8d2Smrg		ret = amdgpu_ras_inject(block,
87488f8a8d2Smrg					ip_test[i].sub_block,
87588f8a8d2Smrg					ip_test[i].type,
87688f8a8d2Smrg					ip_test[i].address,
87788f8a8d2Smrg					ip_test[i].value);
8785324fb0dSmrg		CU_ASSERT_EQUAL(ret, 0);
8795324fb0dSmrg		if (ret)
88088f8a8d2Smrg			break;
8815324fb0dSmrg
8825324fb0dSmrg		while (timeout > 0) {
88388f8a8d2Smrg			sleep(5);
8845324fb0dSmrg
88588f8a8d2Smrg			ret = amdgpu_ras_query_err_count(block, &ue, &ce);
88688f8a8d2Smrg			CU_ASSERT_EQUAL(ret, 0);
8875324fb0dSmrg			if (ret)
8885324fb0dSmrg				break;
8895324fb0dSmrg
89088f8a8d2Smrg			if (old_ue != ue || old_ce != ce) {
89188f8a8d2Smrg				pass = true;
89288f8a8d2Smrg				sleep(20);
89388f8a8d2Smrg				break;
89488f8a8d2Smrg			}
8955324fb0dSmrg			timeout -= 1;
8965324fb0dSmrg		}
89788f8a8d2Smrg		printf("\t Test %s@block %s, subblock %d, error_type %s, address %ld, value %ld: %s\n",
89888f8a8d2Smrg			ip_test[i].name,
89988f8a8d2Smrg			ip_test[i].block,
90088f8a8d2Smrg			ip_test[i].sub_block,
90188f8a8d2Smrg			amdgpu_ras_get_error_type_id(ip_test[i].type),
90288f8a8d2Smrg			ip_test[i].address,
90388f8a8d2Smrg			ip_test[i].value,
90488f8a8d2Smrg			pass ? "Pass" : "Fail");
9055324fb0dSmrg	}
9065324fb0dSmrg}
9075324fb0dSmrg
90888f8a8d2Smrgstatic void __amdgpu_ras_inject_test(void)
90988f8a8d2Smrg{
91088f8a8d2Smrg	printf("...\n");
91188f8a8d2Smrg
91288f8a8d2Smrg	/* run UMC ras inject test */
91388f8a8d2Smrg	__amdgpu_ras_ip_inject_test(umc_ras_inject_test,
91488f8a8d2Smrg		ARRAY_SIZE(umc_ras_inject_test));
91588f8a8d2Smrg
91688f8a8d2Smrg	/* run GFX ras inject test */
91788f8a8d2Smrg	__amdgpu_ras_ip_inject_test(gfx_ras_inject_test,
91888f8a8d2Smrg		ARRAY_SIZE(gfx_ras_inject_test));
91988f8a8d2Smrg}
92088f8a8d2Smrg
9215324fb0dSmrgstatic void amdgpu_ras_inject_test(void)
9225324fb0dSmrg{
9235324fb0dSmrg	int i;
9245324fb0dSmrg	for (i = 0; i < devices_count; i++) {
9255324fb0dSmrg		set_test_card(i);
9265324fb0dSmrg		__amdgpu_ras_inject_test();
9275324fb0dSmrg	}
9285324fb0dSmrg}
9295324fb0dSmrg
9305324fb0dSmrgstatic void __amdgpu_ras_query_test(void)
9315324fb0dSmrg{
9325324fb0dSmrg	unsigned long ue, ce;
9335324fb0dSmrg	int ret;
9345324fb0dSmrg	int i;
9355324fb0dSmrg
9365324fb0dSmrg	for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
9375324fb0dSmrg		if (amdgpu_ras_is_feature_supported(i) <= 0)
9385324fb0dSmrg			continue;
9395324fb0dSmrg
9405324fb0dSmrg		if (!((1 << i) & ras_block_mask_query))
9415324fb0dSmrg			continue;
9425324fb0dSmrg
9435324fb0dSmrg		ret = amdgpu_ras_query_err_count(i, &ue, &ce);
9445324fb0dSmrg		CU_ASSERT_EQUAL(ret, 0);
9455324fb0dSmrg	}
9465324fb0dSmrg}
9475324fb0dSmrg
9485324fb0dSmrgstatic void amdgpu_ras_query_test(void)
9495324fb0dSmrg{
9505324fb0dSmrg	int i;
9515324fb0dSmrg	for (i = 0; i < devices_count; i++) {
9525324fb0dSmrg		set_test_card(i);
9535324fb0dSmrg		__amdgpu_ras_query_test();
9545324fb0dSmrg	}
9555324fb0dSmrg}
9565324fb0dSmrg
9575324fb0dSmrgstatic void amdgpu_ras_basic_test(void)
9585324fb0dSmrg{
9595324fb0dSmrg	unsigned long ue, ce;
9605324fb0dSmrg	char name[1024];
9615324fb0dSmrg	int ret;
9625324fb0dSmrg	int i;
9635324fb0dSmrg	int j;
9645324fb0dSmrg	uint32_t features;
9655324fb0dSmrg	char path[1024];
9665324fb0dSmrg
9675324fb0dSmrg	ret = is_file_ok("/sys/module/amdgpu/parameters/ras_mask", O_RDONLY);
9685324fb0dSmrg	CU_ASSERT_EQUAL(ret, 0);
9695324fb0dSmrg
9705324fb0dSmrg	for (i = 0; i < devices_count; i++) {
9715324fb0dSmrg		set_test_card(i);
9725324fb0dSmrg
9735324fb0dSmrg		ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
9745324fb0dSmrg				sizeof(features), &features);
9755324fb0dSmrg		CU_ASSERT_EQUAL(ret, 0);
9765324fb0dSmrg
9775324fb0dSmrg		sprintf(path, "%s%s", get_ras_debugfs_root(), "ras_ctrl");
9785324fb0dSmrg		ret = is_file_ok(path, O_WRONLY);
9795324fb0dSmrg		CU_ASSERT_EQUAL(ret, 0);
9805324fb0dSmrg
9815324fb0dSmrg		sprintf(path, "%s%s", get_ras_sysfs_root(), "features");
9825324fb0dSmrg		ret = is_file_ok(path, O_RDONLY);
9835324fb0dSmrg		CU_ASSERT_EQUAL(ret, 0);
9845324fb0dSmrg
9855324fb0dSmrg		for (j = 0; j < AMDGPU_RAS_BLOCK__LAST; j++) {
9865324fb0dSmrg			ret = amdgpu_ras_is_feature_supported(j);
9875324fb0dSmrg			if (ret <= 0)
9885324fb0dSmrg				continue;
9895324fb0dSmrg
9905324fb0dSmrg			if (!((1 << j) & ras_block_mask_basic))
9915324fb0dSmrg				continue;
9925324fb0dSmrg
9935324fb0dSmrg			sprintf(path, "%s%s%s", get_ras_sysfs_root(), ras_block_str(j), "_err_count");
9945324fb0dSmrg			ret = is_file_ok(path, O_RDONLY);
9955324fb0dSmrg			CU_ASSERT_EQUAL(ret, 0);
9965324fb0dSmrg
9975324fb0dSmrg			sprintf(path, "%s%s%s", get_ras_debugfs_root(), ras_block_str(j), "_err_inject");
9985324fb0dSmrg			ret = is_file_ok(path, O_WRONLY);
9995324fb0dSmrg			CU_ASSERT_EQUAL(ret, 0);
10005324fb0dSmrg		}
10015324fb0dSmrg	}
10025324fb0dSmrg}
1003