1 /* $NetBSD: amdgpu_mxgpu_nv.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $ */ 2 3 /* 4 * Copyright 2014 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26 #include <sys/cdefs.h> 27 __KERNEL_RCSID(0, "$NetBSD: amdgpu_mxgpu_nv.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $"); 28 29 #include "amdgpu.h" 30 #include "nbio/nbio_2_3_offset.h" 31 #include "nbio/nbio_2_3_sh_mask.h" 32 #include "gc/gc_10_1_0_offset.h" 33 #include "gc/gc_10_1_0_sh_mask.h" 34 #include "soc15.h" 35 #include "navi10_ih.h" 36 #include "soc15_common.h" 37 #include "mxgpu_nv.h" 38 #include "mxgpu_ai.h" 39 40 static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev) 41 { 42 WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2); 43 } 44 45 static void xgpu_nv_mailbox_set_valid(struct amdgpu_device *adev, bool val) 46 { 47 WREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0); 48 } 49 50 /* 51 * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine 52 * RCV_MSG_VALID filed of BIF_BX_PF_MAILBOX_CONTROL must already be set to 1 53 * by host. 54 * 55 * if called no in IRQ routine, this peek_msg cannot guaranteed to return the 56 * correct value since it doesn't return the RCV_DW0 under the case that 57 * RCV_MSG_VALID is set by host. 58 */ 59 static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev) 60 { 61 return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 62 mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0)); 63 } 64 65 66 static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev, 67 enum idh_event event) 68 { 69 u32 reg; 70 71 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 72 mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0)); 73 if (reg != event) 74 return -ENOENT; 75 76 xgpu_nv_mailbox_send_ack(adev); 77 78 return 0; 79 } 80 81 static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev) 82 { 83 return RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2; 84 } 85 86 static int xgpu_nv_poll_ack(struct amdgpu_device *adev) 87 { 88 int timeout = NV_MAILBOX_POLL_ACK_TIMEDOUT; 89 u8 reg; 90 91 do { 92 reg = RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE); 93 if (reg & 2) 94 return 0; 95 96 mdelay(5); 97 timeout -= 5; 98 } while (timeout > 1); 99 100 pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT); 101 102 return -ETIME; 103 } 104 105 static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) 106 { 107 int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT; 108 109 do { 110 r = xgpu_nv_mailbox_rcv_msg(adev, event); 111 if (!r) 112 return 0; 113 114 msleep(10); 115 timeout -= 10; 116 } while (timeout > 1); 117 118 pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); 119 120 return -ETIME; 121 } 122 123 static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, 124 enum idh_request req, u32 data1, u32 data2, u32 data3) 125 { 126 u32 reg; 127 int r; 128 uint8_t trn; 129 130 /* IMPORTANT: 131 * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK 132 * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK 133 * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_nv_poll_ack() 134 * will return immediatly 135 */ 136 do { 137 xgpu_nv_mailbox_set_valid(adev, false); 138 trn = xgpu_nv_peek_ack(adev); 139 if (trn) { 140 pr_err("trn=%x ACK should not assert! wait again !\n", trn); 141 msleep(1); 142 } 143 } while (trn); 144 145 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 146 mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0)); 147 reg = REG_SET_FIELD(reg, BIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0, 148 MSGBUF_DATA, req); 149 WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0), 150 reg); 151 WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW1), 152 data1); 153 WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW2), 154 data2); 155 WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW3), 156 data3); 157 158 xgpu_nv_mailbox_set_valid(adev, true); 159 160 /* start to poll ack */ 161 r = xgpu_nv_poll_ack(adev); 162 if (r) 163 pr_err("Doesn't get ack from pf, continue\n"); 164 165 xgpu_nv_mailbox_set_valid(adev, false); 166 } 167 168 static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, 169 enum idh_request req) 170 { 171 int r; 172 173 xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0); 174 175 /* start to check msg if request is idh_req_gpu_init_access */ 176 if (req == IDH_REQ_GPU_INIT_ACCESS || 177 req == IDH_REQ_GPU_FINI_ACCESS || 178 req == IDH_REQ_GPU_RESET_ACCESS) { 179 r = xgpu_nv_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); 180 if (r) { 181 pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); 182 return r; 183 } 184 /* Retrieve checksum from mailbox2 */ 185 if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) { 186 adev->virt.fw_reserve.checksum_key = 187 RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 188 mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW2)); 189 } 190 } 191 192 return 0; 193 } 194 195 static int xgpu_nv_request_reset(struct amdgpu_device *adev) 196 { 197 return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); 198 } 199 200 static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev, 201 bool init) 202 { 203 enum idh_request req; 204 205 req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS; 206 return xgpu_nv_send_access_requests(adev, req); 207 } 208 209 static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev, 210 bool init) 211 { 212 enum idh_request req; 213 int r = 0; 214 215 req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS; 216 r = xgpu_nv_send_access_requests(adev, req); 217 218 return r; 219 } 220 221 static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev, 222 struct amdgpu_irq_src *source, 223 struct amdgpu_iv_entry *entry) 224 { 225 DRM_DEBUG("get ack intr and do nothing.\n"); 226 return 0; 227 } 228 229 static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev, 230 struct amdgpu_irq_src *source, 231 unsigned type, 232 enum amdgpu_interrupt_state state) 233 { 234 u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL)); 235 236 tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, ACK_INT_EN, 237 (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); 238 WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp); 239 240 return 0; 241 } 242 243 static void xgpu_nv_mailbox_flr_work(struct work_struct *work) 244 { 245 struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); 246 struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); 247 int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; 248 int locked; 249 250 /* block amdgpu_gpu_recover till msg FLR COMPLETE received, 251 * otherwise the mailbox msg will be ruined/reseted by 252 * the VF FLR. 253 * 254 * we can unlock the lock_reset to allow "amdgpu_job_timedout" 255 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received 256 * which means host side had finished this VF's FLR. 257 */ 258 locked = mutex_trylock(&adev->lock_reset); 259 if (locked) 260 adev->in_gpu_reset = true; 261 262 do { 263 if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) 264 goto flr_done; 265 266 msleep(10); 267 timeout -= 10; 268 } while (timeout > 1); 269 270 flr_done: 271 if (locked) { 272 adev->in_gpu_reset = false; 273 mutex_unlock(&adev->lock_reset); 274 } 275 276 /* Trigger recovery for world switch failure if no TDR */ 277 if (amdgpu_device_should_recover_gpu(adev) 278 && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || 279 adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || 280 adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || 281 adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) 282 amdgpu_device_gpu_recover(adev, NULL); 283 } 284 285 static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, 286 struct amdgpu_irq_src *src, 287 unsigned type, 288 enum amdgpu_interrupt_state state) 289 { 290 u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL)); 291 292 tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, VALID_INT_EN, 293 (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); 294 WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp); 295 296 return 0; 297 } 298 299 static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, 300 struct amdgpu_irq_src *source, 301 struct amdgpu_iv_entry *entry) 302 { 303 enum idh_event event = xgpu_nv_mailbox_peek_msg(adev); 304 305 switch (event) { 306 case IDH_FLR_NOTIFICATION: 307 if (amdgpu_sriov_runtime(adev)) 308 schedule_work(&adev->virt.flr_work); 309 break; 310 /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore 311 * it byfar since that polling thread will handle it, 312 * other msg like flr complete is not handled here. 313 */ 314 case IDH_CLR_MSG_BUF: 315 case IDH_FLR_NOTIFICATION_CMPL: 316 case IDH_READY_TO_ACCESS_GPU: 317 default: 318 break; 319 } 320 321 return 0; 322 } 323 324 static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_ack_irq_funcs = { 325 .set = xgpu_nv_set_mailbox_ack_irq, 326 .process = xgpu_nv_mailbox_ack_irq, 327 }; 328 329 static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_rcv_irq_funcs = { 330 .set = xgpu_nv_set_mailbox_rcv_irq, 331 .process = xgpu_nv_mailbox_rcv_irq, 332 }; 333 334 void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev) 335 { 336 adev->virt.ack_irq.num_types = 1; 337 adev->virt.ack_irq.funcs = &xgpu_nv_mailbox_ack_irq_funcs; 338 adev->virt.rcv_irq.num_types = 1; 339 adev->virt.rcv_irq.funcs = &xgpu_nv_mailbox_rcv_irq_funcs; 340 } 341 342 int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev) 343 { 344 int r; 345 346 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); 347 if (r) 348 return r; 349 350 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); 351 if (r) { 352 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); 353 return r; 354 } 355 356 return 0; 357 } 358 359 int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev) 360 { 361 int r; 362 363 r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0); 364 if (r) 365 return r; 366 r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0); 367 if (r) { 368 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); 369 return r; 370 } 371 372 INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work); 373 374 return 0; 375 } 376 377 void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev) 378 { 379 amdgpu_irq_put(adev, &adev->virt.ack_irq, 0); 380 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); 381 } 382 383 const struct amdgpu_virt_ops xgpu_nv_virt_ops = { 384 .req_full_gpu = xgpu_nv_request_full_gpu_access, 385 .rel_full_gpu = xgpu_nv_release_full_gpu_access, 386 .reset_gpu = xgpu_nv_request_reset, 387 .wait_reset = NULL, 388 .trans_msg = xgpu_nv_mailbox_trans_msg, 389 }; 390