1 1.3 riastrad /* $NetBSD: amdgpu_ras.h,v 1.3 2021/12/19 10:59:01 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2018 Advanced Micro Devices, Inc. 5 1.1 riastrad * 6 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 7 1.1 riastrad * copy of this software and associated documentation files (the "Software"), 8 1.1 riastrad * to deal in the Software without restriction, including without limitation 9 1.1 riastrad * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 1.1 riastrad * and/or sell copies of the Software, and to permit persons to whom the 11 1.1 riastrad * Software is furnished to do so, subject to the following conditions: 12 1.1 riastrad * 13 1.1 riastrad * The above copyright notice and this permission notice shall be included in 14 1.1 riastrad * all copies or substantial portions of the Software. 15 1.1 riastrad * 16 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 1.1 riastrad * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 1.1 riastrad * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 1.1 riastrad * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 1.1 riastrad * OTHER DEALINGS IN THE SOFTWARE. 23 1.1 riastrad * 24 1.1 riastrad * 25 1.1 riastrad */ 26 1.1 riastrad #ifndef _AMDGPU_RAS_H 27 1.1 riastrad #define _AMDGPU_RAS_H 28 1.1 riastrad 29 1.1 riastrad #include <linux/debugfs.h> 30 1.1 riastrad #include <linux/list.h> 31 1.1 riastrad #include "amdgpu.h" 32 1.1 riastrad #include "amdgpu_psp.h" 33 1.1 riastrad #include "ta_ras_if.h" 34 1.1 riastrad #include "amdgpu_ras_eeprom.h" 35 1.1 riastrad 36 1.1 riastrad enum amdgpu_ras_block { 37 1.1 riastrad AMDGPU_RAS_BLOCK__UMC = 0, 38 1.1 riastrad AMDGPU_RAS_BLOCK__SDMA, 39 1.1 riastrad AMDGPU_RAS_BLOCK__GFX, 40 1.1 riastrad AMDGPU_RAS_BLOCK__MMHUB, 41 1.1 riastrad AMDGPU_RAS_BLOCK__ATHUB, 42 1.1 riastrad AMDGPU_RAS_BLOCK__PCIE_BIF, 43 1.1 riastrad AMDGPU_RAS_BLOCK__HDP, 44 1.1 riastrad AMDGPU_RAS_BLOCK__XGMI_WAFL, 45 1.1 riastrad AMDGPU_RAS_BLOCK__DF, 46 1.1 riastrad AMDGPU_RAS_BLOCK__SMN, 47 1.1 riastrad AMDGPU_RAS_BLOCK__SEM, 48 1.1 riastrad AMDGPU_RAS_BLOCK__MP0, 49 1.1 riastrad AMDGPU_RAS_BLOCK__MP1, 50 1.1 riastrad AMDGPU_RAS_BLOCK__FUSE, 51 1.1 riastrad 52 1.1 riastrad AMDGPU_RAS_BLOCK__LAST 53 1.1 riastrad }; 54 1.1 riastrad 55 1.1 riastrad #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST 56 1.1 riastrad #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) 57 1.1 riastrad 58 1.1 riastrad enum amdgpu_ras_gfx_subblock { 59 1.1 riastrad /* CPC */ 60 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 61 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = 62 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, 63 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, 64 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, 65 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 66 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, 67 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, 68 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 69 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, 70 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = 71 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, 72 1.1 riastrad /* CPF */ 73 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, 74 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = 75 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, 76 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, 77 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPF_TAG, 78 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, 79 1.1 riastrad /* CPG */ 80 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, 81 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = 82 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, 83 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, 84 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPG_TAG, 85 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, 86 1.1 riastrad /* GDS */ 87 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, 88 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, 89 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 90 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 91 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 92 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 93 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = 94 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 95 1.1 riastrad /* SPI */ 96 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, 97 1.1 riastrad /* SQ */ 98 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, 99 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, 100 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, 101 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, 102 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, 103 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, 104 1.1 riastrad /* SQC (3 ranges) */ 105 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, 106 1.1 riastrad /* SQC range 0 */ 107 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = 108 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, 109 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 110 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, 111 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 112 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 113 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 114 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 115 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 116 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 117 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = 118 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 119 1.1 riastrad /* SQC range 1 */ 120 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, 121 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 122 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, 123 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 124 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 125 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 126 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 127 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 128 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 129 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 130 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 131 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = 132 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 133 1.1 riastrad /* SQC range 2 */ 134 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, 135 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 136 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, 137 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 138 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 139 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 140 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 141 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 142 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 143 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 144 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 145 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = 146 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 147 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = 148 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, 149 1.1 riastrad /* TA */ 150 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, 151 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = 152 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, 153 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, 154 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, 155 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, 156 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, 157 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, 158 1.1 riastrad /* TCA */ 159 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, 160 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = 161 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, 162 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, 163 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = 164 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, 165 1.1 riastrad /* TCC (5 sub-ranges) */ 166 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, 167 1.1 riastrad /* TCC range 0 */ 168 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = 169 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, 170 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = 171 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, 172 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 173 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 174 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 175 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 176 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 177 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 178 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 179 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = 180 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 181 1.1 riastrad /* TCC range 1 */ 182 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, 183 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = 184 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, 185 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 186 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = 187 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 188 1.1 riastrad /* TCC range 2 */ 189 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, 190 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = 191 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, 192 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 193 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 194 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 195 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 196 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, 197 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 198 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 199 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = 200 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 201 1.1 riastrad /* TCC range 3 */ 202 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, 203 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = 204 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, 205 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 206 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = 207 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 208 1.1 riastrad /* TCC range 4 */ 209 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, 210 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 211 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, 212 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 213 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = 214 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 215 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = 216 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, 217 1.1 riastrad /* TCI */ 218 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, 219 1.1 riastrad /* TCP */ 220 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, 221 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = 222 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, 223 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 224 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, 225 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, 226 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, 227 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 228 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 229 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = 230 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 231 1.1 riastrad /* TD */ 232 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, 233 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = 234 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, 235 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 236 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, 237 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, 238 1.1 riastrad /* EA (3 sub-ranges) */ 239 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, 240 1.1 riastrad /* EA range 0 */ 241 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = 242 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, 243 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = 244 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, 245 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 246 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 247 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 248 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 249 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 250 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 251 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 252 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = 253 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 254 1.1 riastrad /* EA range 1 */ 255 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, 256 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = 257 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, 258 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 259 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 260 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 261 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 262 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 263 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 264 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = 265 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 266 1.1 riastrad /* EA range 2 */ 267 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, 268 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = 269 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, 270 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, 271 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, 272 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, 273 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = 274 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, 275 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = 276 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, 277 1.1 riastrad /* UTC VM L2 bank */ 278 1.1 riastrad AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, 279 1.1 riastrad /* UTC VM walker */ 280 1.1 riastrad AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, 281 1.1 riastrad /* UTC ATC L2 2MB cache */ 282 1.1 riastrad AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 283 1.1 riastrad /* UTC ATC L2 4KB cache */ 284 1.1 riastrad AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 285 1.1 riastrad AMDGPU_RAS_BLOCK__GFX_MAX 286 1.1 riastrad }; 287 1.1 riastrad 288 1.1 riastrad enum amdgpu_ras_error_type { 289 1.1 riastrad AMDGPU_RAS_ERROR__NONE = 0, 290 1.1 riastrad AMDGPU_RAS_ERROR__PARITY = 1, 291 1.1 riastrad AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2, 292 1.1 riastrad AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4, 293 1.1 riastrad AMDGPU_RAS_ERROR__POISON = 8, 294 1.1 riastrad }; 295 1.1 riastrad 296 1.1 riastrad enum amdgpu_ras_ret { 297 1.1 riastrad AMDGPU_RAS_SUCCESS = 0, 298 1.1 riastrad AMDGPU_RAS_FAIL, 299 1.1 riastrad AMDGPU_RAS_UE, 300 1.1 riastrad AMDGPU_RAS_CE, 301 1.1 riastrad AMDGPU_RAS_PT, 302 1.1 riastrad }; 303 1.1 riastrad 304 1.1 riastrad struct ras_common_if { 305 1.1 riastrad enum amdgpu_ras_block block; 306 1.1 riastrad enum amdgpu_ras_error_type type; 307 1.1 riastrad uint32_t sub_block_index; 308 1.1 riastrad /* block name */ 309 1.1 riastrad char name[32]; 310 1.1 riastrad }; 311 1.1 riastrad 312 1.1 riastrad struct amdgpu_ras { 313 1.1 riastrad /* ras infrastructure */ 314 1.1 riastrad /* for ras itself. */ 315 1.1 riastrad uint32_t hw_supported; 316 1.1 riastrad /* for IP to check its ras ability. */ 317 1.1 riastrad uint32_t supported; 318 1.1 riastrad uint32_t features; 319 1.1 riastrad struct list_head head; 320 1.1 riastrad /* debugfs */ 321 1.1 riastrad struct dentry *dir; 322 1.1 riastrad /* sysfs */ 323 1.3 riastrad #ifdef CONFIG_SYSFS 324 1.1 riastrad struct device_attribute features_attr; 325 1.1 riastrad struct bin_attribute badpages_attr; 326 1.3 riastrad #endif 327 1.1 riastrad /* block array */ 328 1.1 riastrad struct ras_manager *objs; 329 1.1 riastrad 330 1.1 riastrad /* gpu recovery */ 331 1.1 riastrad struct work_struct recovery_work; 332 1.1 riastrad atomic_t in_recovery; 333 1.1 riastrad struct amdgpu_device *adev; 334 1.1 riastrad /* error handler data */ 335 1.1 riastrad struct ras_err_handler_data *eh_data; 336 1.1 riastrad struct mutex recovery_lock; 337 1.1 riastrad 338 1.1 riastrad uint32_t flags; 339 1.1 riastrad bool reboot; 340 1.1 riastrad struct amdgpu_ras_eeprom_control eeprom_control; 341 1.1 riastrad }; 342 1.1 riastrad 343 1.1 riastrad struct ras_fs_data { 344 1.1 riastrad char sysfs_name[32]; 345 1.1 riastrad char debugfs_name[32]; 346 1.1 riastrad }; 347 1.1 riastrad 348 1.1 riastrad struct ras_err_data { 349 1.1 riastrad unsigned long ue_count; 350 1.1 riastrad unsigned long ce_count; 351 1.1 riastrad unsigned long err_addr_cnt; 352 1.1 riastrad struct eeprom_table_record *err_addr; 353 1.1 riastrad }; 354 1.1 riastrad 355 1.1 riastrad struct ras_err_handler_data { 356 1.1 riastrad /* point to bad page records array */ 357 1.1 riastrad struct eeprom_table_record *bps; 358 1.1 riastrad /* point to reserved bo array */ 359 1.1 riastrad struct amdgpu_bo **bps_bo; 360 1.1 riastrad /* the count of entries */ 361 1.1 riastrad int count; 362 1.1 riastrad /* the space can place new entries */ 363 1.1 riastrad int space_left; 364 1.1 riastrad /* last reserved entry's index + 1 */ 365 1.1 riastrad int last_reserved; 366 1.1 riastrad }; 367 1.1 riastrad 368 1.1 riastrad typedef int (*ras_ih_cb)(struct amdgpu_device *adev, 369 1.1 riastrad void *err_data, 370 1.1 riastrad struct amdgpu_iv_entry *entry); 371 1.1 riastrad 372 1.1 riastrad struct ras_ih_data { 373 1.1 riastrad /* interrupt bottom half */ 374 1.1 riastrad struct work_struct ih_work; 375 1.1 riastrad int inuse; 376 1.1 riastrad /* IP callback */ 377 1.1 riastrad ras_ih_cb cb; 378 1.1 riastrad /* full of entries */ 379 1.1 riastrad unsigned char *ring; 380 1.1 riastrad unsigned int ring_size; 381 1.1 riastrad unsigned int element_size; 382 1.1 riastrad unsigned int aligned_element_size; 383 1.1 riastrad unsigned int rptr; 384 1.1 riastrad unsigned int wptr; 385 1.1 riastrad }; 386 1.1 riastrad 387 1.1 riastrad struct ras_manager { 388 1.1 riastrad struct ras_common_if head; 389 1.1 riastrad /* reference count */ 390 1.1 riastrad int use; 391 1.1 riastrad /* ras block link */ 392 1.1 riastrad struct list_head node; 393 1.1 riastrad /* the device */ 394 1.1 riastrad struct amdgpu_device *adev; 395 1.1 riastrad /* debugfs */ 396 1.1 riastrad struct dentry *ent; 397 1.1 riastrad /* sysfs */ 398 1.3 riastrad #ifdef CONFIG_SYSFS 399 1.1 riastrad struct device_attribute sysfs_attr; 400 1.1 riastrad int attr_inuse; 401 1.3 riastrad #endif 402 1.1 riastrad 403 1.1 riastrad /* fs node name */ 404 1.1 riastrad struct ras_fs_data fs_data; 405 1.1 riastrad 406 1.1 riastrad /* IH data */ 407 1.1 riastrad struct ras_ih_data ih_data; 408 1.1 riastrad 409 1.1 riastrad struct ras_err_data err_data; 410 1.1 riastrad }; 411 1.1 riastrad 412 1.1 riastrad struct ras_badpage { 413 1.1 riastrad unsigned int bp; 414 1.1 riastrad unsigned int size; 415 1.1 riastrad unsigned int flags; 416 1.1 riastrad }; 417 1.1 riastrad 418 1.1 riastrad /* interfaces for IP */ 419 1.1 riastrad struct ras_fs_if { 420 1.1 riastrad struct ras_common_if head; 421 1.1 riastrad char sysfs_name[32]; 422 1.1 riastrad char debugfs_name[32]; 423 1.1 riastrad }; 424 1.1 riastrad 425 1.1 riastrad struct ras_query_if { 426 1.1 riastrad struct ras_common_if head; 427 1.1 riastrad unsigned long ue_count; 428 1.1 riastrad unsigned long ce_count; 429 1.1 riastrad }; 430 1.1 riastrad 431 1.1 riastrad struct ras_inject_if { 432 1.1 riastrad struct ras_common_if head; 433 1.1 riastrad uint64_t address; 434 1.1 riastrad uint64_t value; 435 1.1 riastrad }; 436 1.1 riastrad 437 1.1 riastrad struct ras_cure_if { 438 1.1 riastrad struct ras_common_if head; 439 1.1 riastrad uint64_t address; 440 1.1 riastrad }; 441 1.1 riastrad 442 1.1 riastrad struct ras_ih_if { 443 1.1 riastrad struct ras_common_if head; 444 1.1 riastrad ras_ih_cb cb; 445 1.1 riastrad }; 446 1.1 riastrad 447 1.1 riastrad struct ras_dispatch_if { 448 1.1 riastrad struct ras_common_if head; 449 1.1 riastrad struct amdgpu_iv_entry *entry; 450 1.1 riastrad }; 451 1.1 riastrad 452 1.1 riastrad struct ras_debug_if { 453 1.1 riastrad union { 454 1.1 riastrad struct ras_common_if head; 455 1.1 riastrad struct ras_inject_if inject; 456 1.1 riastrad }; 457 1.1 riastrad int op; 458 1.1 riastrad }; 459 1.1 riastrad /* work flow 460 1.1 riastrad * vbios 461 1.1 riastrad * 1: ras feature enable (enabled by default) 462 1.1 riastrad * psp 463 1.1 riastrad * 2: ras framework init (in ip_init) 464 1.1 riastrad * IP 465 1.1 riastrad * 3: IH add 466 1.1 riastrad * 4: debugfs/sysfs create 467 1.1 riastrad * 5: query/inject 468 1.1 riastrad * 6: debugfs/sysfs remove 469 1.1 riastrad * 7: IH remove 470 1.1 riastrad * 8: feature disable 471 1.1 riastrad */ 472 1.1 riastrad 473 1.1 riastrad #define amdgpu_ras_get_context(adev) ((adev)->psp.ras.ras) 474 1.1 riastrad #define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras.ras = (ras_con)) 475 1.1 riastrad 476 1.1 riastrad /* check if ras is supported on block, say, sdma, gfx */ 477 1.1 riastrad static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, 478 1.1 riastrad unsigned int block) 479 1.1 riastrad { 480 1.1 riastrad struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 481 1.1 riastrad 482 1.1 riastrad if (block >= AMDGPU_RAS_BLOCK_COUNT) 483 1.1 riastrad return 0; 484 1.1 riastrad return ras && (ras->supported & (1 << block)); 485 1.1 riastrad } 486 1.1 riastrad 487 1.1 riastrad int amdgpu_ras_recovery_init(struct amdgpu_device *adev); 488 1.1 riastrad int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, 489 1.1 riastrad unsigned int block); 490 1.1 riastrad 491 1.1 riastrad void amdgpu_ras_resume(struct amdgpu_device *adev); 492 1.1 riastrad void amdgpu_ras_suspend(struct amdgpu_device *adev); 493 1.1 riastrad 494 1.1 riastrad unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, 495 1.1 riastrad bool is_ce); 496 1.1 riastrad 497 1.1 riastrad /* error handling functions */ 498 1.1 riastrad int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, 499 1.1 riastrad struct eeprom_table_record *bps, int pages); 500 1.1 riastrad 501 1.1 riastrad int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); 502 1.1 riastrad 503 1.1 riastrad static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) 504 1.1 riastrad { 505 1.1 riastrad struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 506 1.1 riastrad 507 1.1 riastrad /* save bad page to eeprom before gpu reset, 508 1.1 riastrad * i2c may be unstable in gpu reset 509 1.1 riastrad */ 510 1.1 riastrad if (in_task()) 511 1.1 riastrad amdgpu_ras_reserve_bad_pages(adev); 512 1.1 riastrad 513 1.1 riastrad if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) 514 1.1 riastrad schedule_work(&ras->recovery_work); 515 1.1 riastrad return 0; 516 1.1 riastrad } 517 1.1 riastrad 518 1.1 riastrad static inline enum ta_ras_block 519 1.1 riastrad amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { 520 1.1 riastrad switch (block) { 521 1.1 riastrad case AMDGPU_RAS_BLOCK__UMC: 522 1.1 riastrad return TA_RAS_BLOCK__UMC; 523 1.1 riastrad case AMDGPU_RAS_BLOCK__SDMA: 524 1.1 riastrad return TA_RAS_BLOCK__SDMA; 525 1.1 riastrad case AMDGPU_RAS_BLOCK__GFX: 526 1.1 riastrad return TA_RAS_BLOCK__GFX; 527 1.1 riastrad case AMDGPU_RAS_BLOCK__MMHUB: 528 1.1 riastrad return TA_RAS_BLOCK__MMHUB; 529 1.1 riastrad case AMDGPU_RAS_BLOCK__ATHUB: 530 1.1 riastrad return TA_RAS_BLOCK__ATHUB; 531 1.1 riastrad case AMDGPU_RAS_BLOCK__PCIE_BIF: 532 1.1 riastrad return TA_RAS_BLOCK__PCIE_BIF; 533 1.1 riastrad case AMDGPU_RAS_BLOCK__HDP: 534 1.1 riastrad return TA_RAS_BLOCK__HDP; 535 1.1 riastrad case AMDGPU_RAS_BLOCK__XGMI_WAFL: 536 1.1 riastrad return TA_RAS_BLOCK__XGMI_WAFL; 537 1.1 riastrad case AMDGPU_RAS_BLOCK__DF: 538 1.1 riastrad return TA_RAS_BLOCK__DF; 539 1.1 riastrad case AMDGPU_RAS_BLOCK__SMN: 540 1.1 riastrad return TA_RAS_BLOCK__SMN; 541 1.1 riastrad case AMDGPU_RAS_BLOCK__SEM: 542 1.1 riastrad return TA_RAS_BLOCK__SEM; 543 1.1 riastrad case AMDGPU_RAS_BLOCK__MP0: 544 1.1 riastrad return TA_RAS_BLOCK__MP0; 545 1.1 riastrad case AMDGPU_RAS_BLOCK__MP1: 546 1.1 riastrad return TA_RAS_BLOCK__MP1; 547 1.1 riastrad case AMDGPU_RAS_BLOCK__FUSE: 548 1.1 riastrad return TA_RAS_BLOCK__FUSE; 549 1.1 riastrad default: 550 1.1 riastrad WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block); 551 1.1 riastrad return TA_RAS_BLOCK__UMC; 552 1.1 riastrad } 553 1.1 riastrad } 554 1.1 riastrad 555 1.1 riastrad static inline enum ta_ras_error_type 556 1.1 riastrad amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { 557 1.1 riastrad switch (error) { 558 1.1 riastrad case AMDGPU_RAS_ERROR__NONE: 559 1.1 riastrad return TA_RAS_ERROR__NONE; 560 1.1 riastrad case AMDGPU_RAS_ERROR__PARITY: 561 1.1 riastrad return TA_RAS_ERROR__PARITY; 562 1.1 riastrad case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE: 563 1.1 riastrad return TA_RAS_ERROR__SINGLE_CORRECTABLE; 564 1.1 riastrad case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE: 565 1.1 riastrad return TA_RAS_ERROR__MULTI_UNCORRECTABLE; 566 1.1 riastrad case AMDGPU_RAS_ERROR__POISON: 567 1.1 riastrad return TA_RAS_ERROR__POISON; 568 1.1 riastrad default: 569 1.1 riastrad WARN_ONCE(1, "RAS ERROR: unexpected error type %d\n", error); 570 1.1 riastrad return TA_RAS_ERROR__NONE; 571 1.1 riastrad } 572 1.1 riastrad } 573 1.1 riastrad 574 1.1 riastrad /* called in ip_init and ip_fini */ 575 1.1 riastrad int amdgpu_ras_init(struct amdgpu_device *adev); 576 1.1 riastrad int amdgpu_ras_fini(struct amdgpu_device *adev); 577 1.1 riastrad int amdgpu_ras_pre_fini(struct amdgpu_device *adev); 578 1.1 riastrad int amdgpu_ras_late_init(struct amdgpu_device *adev, 579 1.1 riastrad struct ras_common_if *ras_block, 580 1.1 riastrad struct ras_fs_if *fs_info, 581 1.1 riastrad struct ras_ih_if *ih_info); 582 1.1 riastrad void amdgpu_ras_late_fini(struct amdgpu_device *adev, 583 1.1 riastrad struct ras_common_if *ras_block, 584 1.1 riastrad struct ras_ih_if *ih_info); 585 1.1 riastrad 586 1.1 riastrad int amdgpu_ras_feature_enable(struct amdgpu_device *adev, 587 1.1 riastrad struct ras_common_if *head, bool enable); 588 1.1 riastrad 589 1.1 riastrad int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, 590 1.1 riastrad struct ras_common_if *head, bool enable); 591 1.1 riastrad 592 1.1 riastrad int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, 593 1.1 riastrad struct ras_fs_if *head); 594 1.1 riastrad 595 1.1 riastrad int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, 596 1.1 riastrad struct ras_common_if *head); 597 1.1 riastrad 598 1.1 riastrad void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, 599 1.1 riastrad struct ras_fs_if *head); 600 1.1 riastrad 601 1.1 riastrad void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, 602 1.1 riastrad struct ras_common_if *head); 603 1.1 riastrad 604 1.1 riastrad int amdgpu_ras_error_query(struct amdgpu_device *adev, 605 1.1 riastrad struct ras_query_if *info); 606 1.1 riastrad 607 1.1 riastrad int amdgpu_ras_error_inject(struct amdgpu_device *adev, 608 1.1 riastrad struct ras_inject_if *info); 609 1.1 riastrad 610 1.1 riastrad int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, 611 1.1 riastrad struct ras_ih_if *info); 612 1.1 riastrad 613 1.1 riastrad int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, 614 1.1 riastrad struct ras_ih_if *info); 615 1.1 riastrad 616 1.1 riastrad int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, 617 1.1 riastrad struct ras_dispatch_if *info); 618 1.1 riastrad 619 1.1 riastrad struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, 620 1.1 riastrad struct ras_common_if *head); 621 1.1 riastrad 622 1.1 riastrad extern atomic_t amdgpu_ras_in_intr; 623 1.1 riastrad 624 1.1 riastrad static inline bool amdgpu_ras_intr_triggered(void) 625 1.1 riastrad { 626 1.1 riastrad return !!atomic_read(&amdgpu_ras_in_intr); 627 1.1 riastrad } 628 1.1 riastrad 629 1.1 riastrad static inline void amdgpu_ras_intr_cleared(void) 630 1.1 riastrad { 631 1.1 riastrad atomic_set(&amdgpu_ras_in_intr, 0); 632 1.1 riastrad } 633 1.1 riastrad 634 1.1 riastrad void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); 635 1.1 riastrad 636 1.1 riastrad #endif 637