1 /* $NetBSD: kfd_dbgdev.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $ */ 2 3 /* 4 * Copyright 2014 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26 #include <sys/cdefs.h> 27 __KERNEL_RCSID(0, "$NetBSD: kfd_dbgdev.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $"); 28 29 #include <linux/types.h> 30 #include <linux/kernel.h> 31 #include <linux/log2.h> 32 #include <linux/sched.h> 33 #include <linux/slab.h> 34 #include <linux/mutex.h> 35 #include <linux/device.h> 36 37 #include "kfd_pm4_headers.h" 38 #include "kfd_pm4_headers_diq.h" 39 #include "kfd_kernel_queue.h" 40 #include "kfd_priv.h" 41 #include "kfd_pm4_opcodes.h" 42 #include "cik_regs.h" 43 #include "kfd_dbgmgr.h" 44 #include "kfd_dbgdev.h" 45 #include "kfd_device_queue_manager.h" 46 47 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) 48 { 49 dev->kfd2kgd->address_watch_disable(dev->kgd); 50 } 51 52 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, 53 unsigned int pasid, uint64_t vmid0_address, 54 uint32_t *packet_buff, size_t size_in_bytes) 55 { 56 struct pm4__release_mem *rm_packet; 57 struct pm4__indirect_buffer_pasid *ib_packet; 58 struct kfd_mem_obj *mem_obj; 59 size_t pq_packets_size_in_bytes; 60 union ULARGE_INTEGER *largep; 61 union ULARGE_INTEGER addr; 62 struct kernel_queue *kq; 63 uint64_t *rm_state; 64 unsigned int *ib_packet_buff; 65 int status; 66 67 if (WARN_ON(!size_in_bytes)) 68 return -EINVAL; 69 70 kq = dbgdev->kq; 71 72 pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + 73 sizeof(struct pm4__indirect_buffer_pasid); 74 75 /* 76 * We acquire a buffer from DIQ 77 * The receive packet buff will be sitting on the Indirect Buffer 78 * and in the PQ we put the IB packet + sync packet(s). 79 */ 80 status = kq_acquire_packet_buffer(kq, 81 pq_packets_size_in_bytes / sizeof(uint32_t), 82 &ib_packet_buff); 83 if (status) { 84 pr_err("kq_acquire_packet_buffer failed\n"); 85 return status; 86 } 87 88 memset(ib_packet_buff, 0, pq_packets_size_in_bytes); 89 90 ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); 91 92 ib_packet->header.count = 3; 93 ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; 94 ib_packet->header.type = PM4_TYPE_3; 95 96 largep = (union ULARGE_INTEGER *) &vmid0_address; 97 98 ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; 99 ib_packet->bitfields3.ib_base_hi = largep->u.high_part; 100 101 ib_packet->control = (1 << 23) | (1 << 31) | 102 ((size_in_bytes / 4) & 0xfffff); 103 104 ib_packet->bitfields5.pasid = pasid; 105 106 /* 107 * for now we use release mem for GPU-CPU synchronization 108 * Consider WaitRegMem + WriteData as a better alternative 109 * we get a GART allocations ( gpu/cpu mapping), 110 * for the sync variable, and wait until: 111 * (a) Sync with HW 112 * (b) Sync var is written by CP to mem. 113 */ 114 rm_packet = (struct pm4__release_mem *) (ib_packet_buff + 115 (sizeof(struct pm4__indirect_buffer_pasid) / 116 sizeof(unsigned int))); 117 118 status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), 119 &mem_obj); 120 121 if (status) { 122 pr_err("Failed to allocate GART memory\n"); 123 kq_rollback_packet(kq); 124 return status; 125 } 126 127 rm_state = (uint64_t *) mem_obj->cpu_ptr; 128 129 *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; 130 131 rm_packet->header.opcode = IT_RELEASE_MEM; 132 rm_packet->header.type = PM4_TYPE_3; 133 rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2; 134 135 rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; 136 rm_packet->bitfields2.event_index = 137 event_index___release_mem__end_of_pipe; 138 139 rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; 140 rm_packet->bitfields2.atc = 0; 141 rm_packet->bitfields2.tc_wb_action_ena = 1; 142 143 addr.quad_part = mem_obj->gpu_addr; 144 145 rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; 146 rm_packet->address_hi = addr.u.high_part; 147 148 rm_packet->bitfields3.data_sel = 149 data_sel___release_mem__send_64_bit_data; 150 151 rm_packet->bitfields3.int_sel = 152 int_sel___release_mem__send_data_after_write_confirm; 153 154 rm_packet->bitfields3.dst_sel = 155 dst_sel___release_mem__memory_controller; 156 157 rm_packet->data_lo = QUEUESTATE__ACTIVE; 158 159 kq_submit_packet(kq); 160 161 /* Wait till CP writes sync code: */ 162 status = amdkfd_fence_wait_timeout( 163 (unsigned int *) rm_state, 164 QUEUESTATE__ACTIVE, 1500); 165 166 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 167 168 return status; 169 } 170 171 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) 172 { 173 /* 174 * no action is needed in this case, 175 * just make sure diq will not be used 176 */ 177 178 dbgdev->kq = NULL; 179 180 return 0; 181 } 182 183 static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) 184 { 185 struct queue_properties properties; 186 unsigned int qid; 187 struct kernel_queue *kq = NULL; 188 int status; 189 190 properties.type = KFD_QUEUE_TYPE_DIQ; 191 192 status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, 193 &properties, &qid, NULL); 194 195 if (status) { 196 pr_err("Failed to create DIQ\n"); 197 return status; 198 } 199 200 pr_debug("DIQ Created with queue id: %d\n", qid); 201 202 kq = pqm_get_kernel_queue(dbgdev->pqm, qid); 203 204 if (!kq) { 205 pr_err("Error getting DIQ\n"); 206 pqm_destroy_queue(dbgdev->pqm, qid); 207 return -EFAULT; 208 } 209 210 dbgdev->kq = kq; 211 212 return status; 213 } 214 215 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) 216 { 217 /* disable watch address */ 218 dbgdev_address_watch_disable_nodiq(dbgdev->dev); 219 return 0; 220 } 221 222 static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) 223 { 224 /* todo - disable address watch */ 225 int status; 226 227 status = pqm_destroy_queue(dbgdev->pqm, 228 dbgdev->kq->queue->properties.queue_id); 229 dbgdev->kq = NULL; 230 231 return status; 232 } 233 234 static void dbgdev_address_watch_set_registers( 235 const struct dbg_address_watch_info *adw_info, 236 union TCP_WATCH_ADDR_H_BITS *addrHi, 237 union TCP_WATCH_ADDR_L_BITS *addrLo, 238 union TCP_WATCH_CNTL_BITS *cntl, 239 unsigned int index, unsigned int vmid) 240 { 241 union ULARGE_INTEGER addr; 242 243 addr.quad_part = 0; 244 addrHi->u32All = 0; 245 addrLo->u32All = 0; 246 cntl->u32All = 0; 247 248 if (adw_info->watch_mask) 249 cntl->bitfields.mask = 250 (uint32_t) (adw_info->watch_mask[index] & 251 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); 252 else 253 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 254 255 addr.quad_part = (unsigned long long) adw_info->watch_address[index]; 256 257 addrHi->bitfields.addr = addr.u.high_part & 258 ADDRESS_WATCH_REG_ADDHIGH_MASK; 259 addrLo->bitfields.addr = 260 (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); 261 262 cntl->bitfields.mode = adw_info->watch_mode[index]; 263 cntl->bitfields.vmid = (uint32_t) vmid; 264 /* for now assume it is an ATC address */ 265 cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; 266 267 pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); 268 pr_debug("\t\t%20s %08x\n", "set reg add high :", 269 addrHi->bitfields.addr); 270 pr_debug("\t\t%20s %08x\n", "set reg add low :", 271 addrLo->bitfields.addr); 272 } 273 274 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, 275 struct dbg_address_watch_info *adw_info) 276 { 277 union TCP_WATCH_ADDR_H_BITS addrHi; 278 union TCP_WATCH_ADDR_L_BITS addrLo; 279 union TCP_WATCH_CNTL_BITS cntl; 280 struct kfd_process_device *pdd; 281 unsigned int i; 282 283 /* taking the vmid for that process on the safe way using pdd */ 284 pdd = kfd_get_process_device_data(dbgdev->dev, 285 adw_info->process); 286 if (!pdd) { 287 pr_err("Failed to get pdd for wave control no DIQ\n"); 288 return -EFAULT; 289 } 290 291 addrHi.u32All = 0; 292 addrLo.u32All = 0; 293 cntl.u32All = 0; 294 295 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 296 (adw_info->num_watch_points == 0)) { 297 pr_err("num_watch_points is invalid\n"); 298 return -EINVAL; 299 } 300 301 if (!adw_info->watch_mode || !adw_info->watch_address) { 302 pr_err("adw_info fields are not valid\n"); 303 return -EINVAL; 304 } 305 306 for (i = 0; i < adw_info->num_watch_points; i++) { 307 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, 308 &cntl, i, pdd->qpd.vmid); 309 310 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 311 pr_debug("\t\t%20s %08x\n", "register index :", i); 312 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); 313 pr_debug("\t\t%20s %08x\n", "Address Low is :", 314 addrLo.bitfields.addr); 315 pr_debug("\t\t%20s %08x\n", "Address high is :", 316 addrHi.bitfields.addr); 317 pr_debug("\t\t%20s %08x\n", "Address high is :", 318 addrHi.bitfields.addr); 319 pr_debug("\t\t%20s %08x\n", "Control Mask is :", 320 cntl.bitfields.mask); 321 pr_debug("\t\t%20s %08x\n", "Control Mode is :", 322 cntl.bitfields.mode); 323 pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 324 cntl.bitfields.vmid); 325 pr_debug("\t\t%20s %08x\n", "Control atc is :", 326 cntl.bitfields.atc); 327 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 328 329 pdd->dev->kfd2kgd->address_watch_execute( 330 dbgdev->dev->kgd, 331 i, 332 cntl.u32All, 333 addrHi.u32All, 334 addrLo.u32All); 335 } 336 337 return 0; 338 } 339 340 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, 341 struct dbg_address_watch_info *adw_info) 342 { 343 struct pm4__set_config_reg *packets_vec; 344 union TCP_WATCH_ADDR_H_BITS addrHi; 345 union TCP_WATCH_ADDR_L_BITS addrLo; 346 union TCP_WATCH_CNTL_BITS cntl; 347 struct kfd_mem_obj *mem_obj; 348 unsigned int aw_reg_add_dword; 349 uint32_t *packet_buff_uint; 350 unsigned int i; 351 int status; 352 size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; 353 /* we do not control the vmid in DIQ mode, just a place holder */ 354 unsigned int vmid = 0; 355 356 addrHi.u32All = 0; 357 addrLo.u32All = 0; 358 cntl.u32All = 0; 359 360 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 361 (adw_info->num_watch_points == 0)) { 362 pr_err("num_watch_points is invalid\n"); 363 return -EINVAL; 364 } 365 366 if (!adw_info->watch_mode || !adw_info->watch_address) { 367 pr_err("adw_info fields are not valid\n"); 368 return -EINVAL; 369 } 370 371 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 372 373 if (status) { 374 pr_err("Failed to allocate GART memory\n"); 375 return status; 376 } 377 378 packet_buff_uint = mem_obj->cpu_ptr; 379 380 memset(packet_buff_uint, 0, ib_size); 381 382 packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); 383 384 packets_vec[0].header.count = 1; 385 packets_vec[0].header.opcode = IT_SET_CONFIG_REG; 386 packets_vec[0].header.type = PM4_TYPE_3; 387 packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 388 packets_vec[0].bitfields2.insert_vmid = 1; 389 packets_vec[1].ordinal1 = packets_vec[0].ordinal1; 390 packets_vec[1].bitfields2.insert_vmid = 0; 391 packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 392 packets_vec[2].bitfields2.insert_vmid = 0; 393 packets_vec[3].ordinal1 = packets_vec[0].ordinal1; 394 packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 395 packets_vec[3].bitfields2.insert_vmid = 1; 396 397 for (i = 0; i < adw_info->num_watch_points; i++) { 398 dbgdev_address_watch_set_registers(adw_info, 399 &addrHi, 400 &addrLo, 401 &cntl, 402 i, 403 vmid); 404 405 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 406 pr_debug("\t\t%20s %08x\n", "register index :", i); 407 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); 408 pr_debug("\t\t%20s %p\n", "Add ptr is :", 409 adw_info->watch_address); 410 pr_debug("\t\t%20s %08llx\n", "Add is :", 411 adw_info->watch_address[i]); 412 pr_debug("\t\t%20s %08x\n", "Address Low is :", 413 addrLo.bitfields.addr); 414 pr_debug("\t\t%20s %08x\n", "Address high is :", 415 addrHi.bitfields.addr); 416 pr_debug("\t\t%20s %08x\n", "Control Mask is :", 417 cntl.bitfields.mask); 418 pr_debug("\t\t%20s %08x\n", "Control Mode is :", 419 cntl.bitfields.mode); 420 pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 421 cntl.bitfields.vmid); 422 pr_debug("\t\t%20s %08x\n", "Control atc is :", 423 cntl.bitfields.atc); 424 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 425 426 aw_reg_add_dword = 427 dbgdev->dev->kfd2kgd->address_watch_get_offset( 428 dbgdev->dev->kgd, 429 i, 430 ADDRESS_WATCH_REG_CNTL); 431 432 packets_vec[0].bitfields2.reg_offset = 433 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 434 435 packets_vec[0].reg_data[0] = cntl.u32All; 436 437 aw_reg_add_dword = 438 dbgdev->dev->kfd2kgd->address_watch_get_offset( 439 dbgdev->dev->kgd, 440 i, 441 ADDRESS_WATCH_REG_ADDR_HI); 442 443 packets_vec[1].bitfields2.reg_offset = 444 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 445 packets_vec[1].reg_data[0] = addrHi.u32All; 446 447 aw_reg_add_dword = 448 dbgdev->dev->kfd2kgd->address_watch_get_offset( 449 dbgdev->dev->kgd, 450 i, 451 ADDRESS_WATCH_REG_ADDR_LO); 452 453 packets_vec[2].bitfields2.reg_offset = 454 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 455 packets_vec[2].reg_data[0] = addrLo.u32All; 456 457 /* enable watch flag if address is not zero*/ 458 if (adw_info->watch_address[i] > 0) 459 cntl.bitfields.valid = 1; 460 else 461 cntl.bitfields.valid = 0; 462 463 aw_reg_add_dword = 464 dbgdev->dev->kfd2kgd->address_watch_get_offset( 465 dbgdev->dev->kgd, 466 i, 467 ADDRESS_WATCH_REG_CNTL); 468 469 packets_vec[3].bitfields2.reg_offset = 470 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 471 packets_vec[3].reg_data[0] = cntl.u32All; 472 473 status = dbgdev_diq_submit_ib( 474 dbgdev, 475 adw_info->process->pasid, 476 mem_obj->gpu_addr, 477 packet_buff_uint, 478 ib_size); 479 480 if (status) { 481 pr_err("Failed to submit IB to DIQ\n"); 482 break; 483 } 484 } 485 486 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 487 return status; 488 } 489 490 static int dbgdev_wave_control_set_registers( 491 struct dbg_wave_control_info *wac_info, 492 union SQ_CMD_BITS *in_reg_sq_cmd, 493 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) 494 { 495 int status = 0; 496 union SQ_CMD_BITS reg_sq_cmd; 497 union GRBM_GFX_INDEX_BITS reg_gfx_index; 498 struct HsaDbgWaveMsgAMDGen2 *pMsg; 499 500 reg_sq_cmd.u32All = 0; 501 reg_gfx_index.u32All = 0; 502 pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; 503 504 switch (wac_info->mode) { 505 /* Send command to single wave */ 506 case HSA_DBG_WAVEMODE_SINGLE: 507 /* 508 * Limit access to the process waves only, 509 * by setting vmid check 510 */ 511 reg_sq_cmd.bits.check_vmid = 1; 512 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; 513 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; 514 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; 515 516 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 517 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 518 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 519 520 break; 521 522 /* Send command to all waves with matching VMID */ 523 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: 524 525 reg_gfx_index.bits.sh_broadcast_writes = 1; 526 reg_gfx_index.bits.se_broadcast_writes = 1; 527 reg_gfx_index.bits.instance_broadcast_writes = 1; 528 529 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 530 531 break; 532 533 /* Send command to all CU waves with matching VMID */ 534 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: 535 536 reg_sq_cmd.bits.check_vmid = 1; 537 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 538 539 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 540 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 541 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 542 543 break; 544 545 default: 546 return -EINVAL; 547 } 548 549 switch (wac_info->operand) { 550 case HSA_DBG_WAVEOP_HALT: 551 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; 552 break; 553 554 case HSA_DBG_WAVEOP_RESUME: 555 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; 556 break; 557 558 case HSA_DBG_WAVEOP_KILL: 559 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 560 break; 561 562 case HSA_DBG_WAVEOP_DEBUG: 563 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; 564 break; 565 566 case HSA_DBG_WAVEOP_TRAP: 567 if (wac_info->trapId < MAX_TRAPID) { 568 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; 569 reg_sq_cmd.bits.trap_id = wac_info->trapId; 570 } else { 571 status = -EINVAL; 572 } 573 break; 574 575 default: 576 status = -EINVAL; 577 break; 578 } 579 580 if (status == 0) { 581 *in_reg_sq_cmd = reg_sq_cmd; 582 *in_reg_gfx_index = reg_gfx_index; 583 } 584 585 return status; 586 } 587 588 static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, 589 struct dbg_wave_control_info *wac_info) 590 { 591 592 int status; 593 union SQ_CMD_BITS reg_sq_cmd; 594 union GRBM_GFX_INDEX_BITS reg_gfx_index; 595 struct kfd_mem_obj *mem_obj; 596 uint32_t *packet_buff_uint; 597 struct pm4__set_config_reg *packets_vec; 598 size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; 599 600 reg_sq_cmd.u32All = 0; 601 602 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, 603 ®_gfx_index); 604 if (status) { 605 pr_err("Failed to set wave control registers\n"); 606 return status; 607 } 608 609 /* we do not control the VMID in DIQ, so reset it to a known value */ 610 reg_sq_cmd.bits.vm_id = 0; 611 612 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 613 614 pr_debug("\t\t mode is: %u\n", wac_info->mode); 615 pr_debug("\t\t operand is: %u\n", wac_info->operand); 616 pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 617 pr_debug("\t\t msg value is: %u\n", 618 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 619 pr_debug("\t\t vmid is: N/A\n"); 620 621 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 622 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 623 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 624 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 625 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 626 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 627 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 628 629 pr_debug("\t\t ibw is : %u\n", 630 reg_gfx_index.bitfields.instance_broadcast_writes); 631 pr_debug("\t\t ii is : %u\n", 632 reg_gfx_index.bitfields.instance_index); 633 pr_debug("\t\t sebw is : %u\n", 634 reg_gfx_index.bitfields.se_broadcast_writes); 635 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 636 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 637 pr_debug("\t\t sbw is : %u\n", 638 reg_gfx_index.bitfields.sh_broadcast_writes); 639 640 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 641 642 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 643 644 if (status != 0) { 645 pr_err("Failed to allocate GART memory\n"); 646 return status; 647 } 648 649 packet_buff_uint = mem_obj->cpu_ptr; 650 651 memset(packet_buff_uint, 0, ib_size); 652 653 packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; 654 packets_vec[0].header.count = 1; 655 packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; 656 packets_vec[0].header.type = PM4_TYPE_3; 657 packets_vec[0].bitfields2.reg_offset = 658 GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; 659 660 packets_vec[0].bitfields2.insert_vmid = 0; 661 packets_vec[0].reg_data[0] = reg_gfx_index.u32All; 662 663 packets_vec[1].header.count = 1; 664 packets_vec[1].header.opcode = IT_SET_CONFIG_REG; 665 packets_vec[1].header.type = PM4_TYPE_3; 666 packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; 667 668 packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; 669 packets_vec[1].bitfields2.insert_vmid = 1; 670 packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; 671 672 /* Restore the GRBM_GFX_INDEX register */ 673 674 reg_gfx_index.u32All = 0; 675 reg_gfx_index.bits.sh_broadcast_writes = 1; 676 reg_gfx_index.bits.instance_broadcast_writes = 1; 677 reg_gfx_index.bits.se_broadcast_writes = 1; 678 679 680 packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 681 packets_vec[2].bitfields2.reg_offset = 682 GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; 683 684 packets_vec[2].bitfields2.insert_vmid = 0; 685 packets_vec[2].reg_data[0] = reg_gfx_index.u32All; 686 687 status = dbgdev_diq_submit_ib( 688 dbgdev, 689 wac_info->process->pasid, 690 mem_obj->gpu_addr, 691 packet_buff_uint, 692 ib_size); 693 694 if (status) 695 pr_err("Failed to submit IB to DIQ\n"); 696 697 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 698 699 return status; 700 } 701 702 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, 703 struct dbg_wave_control_info *wac_info) 704 { 705 int status; 706 union SQ_CMD_BITS reg_sq_cmd; 707 union GRBM_GFX_INDEX_BITS reg_gfx_index; 708 struct kfd_process_device *pdd; 709 710 reg_sq_cmd.u32All = 0; 711 712 /* taking the VMID for that process on the safe way using PDD */ 713 pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); 714 715 if (!pdd) { 716 pr_err("Failed to get pdd for wave control no DIQ\n"); 717 return -EFAULT; 718 } 719 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, 720 ®_gfx_index); 721 if (status) { 722 pr_err("Failed to set wave control registers\n"); 723 return status; 724 } 725 726 /* for non DIQ we need to patch the VMID: */ 727 728 reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; 729 730 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 731 732 pr_debug("\t\t mode is: %u\n", wac_info->mode); 733 pr_debug("\t\t operand is: %u\n", wac_info->operand); 734 pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 735 pr_debug("\t\t msg value is: %u\n", 736 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 737 pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); 738 739 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 740 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 741 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 742 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 743 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 744 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 745 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 746 747 pr_debug("\t\t ibw is : %u\n", 748 reg_gfx_index.bitfields.instance_broadcast_writes); 749 pr_debug("\t\t ii is : %u\n", 750 reg_gfx_index.bitfields.instance_index); 751 pr_debug("\t\t sebw is : %u\n", 752 reg_gfx_index.bitfields.se_broadcast_writes); 753 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 754 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 755 pr_debug("\t\t sbw is : %u\n", 756 reg_gfx_index.bitfields.sh_broadcast_writes); 757 758 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 759 760 return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, 761 reg_gfx_index.u32All, 762 reg_sq_cmd.u32All); 763 } 764 765 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) 766 { 767 int status = 0; 768 unsigned int vmid; 769 uint16_t queried_pasid; 770 union SQ_CMD_BITS reg_sq_cmd; 771 union GRBM_GFX_INDEX_BITS reg_gfx_index; 772 struct kfd_process_device *pdd; 773 struct dbg_wave_control_info wac_info; 774 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 775 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 776 777 reg_sq_cmd.u32All = 0; 778 status = 0; 779 780 wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; 781 wac_info.operand = HSA_DBG_WAVEOP_KILL; 782 783 pr_debug("Killing all process wavefronts\n"); 784 785 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 786 * ATC_VMID15_PASID_MAPPING 787 * to check which VMID the current process is mapped to. 788 */ 789 790 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 791 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 792 (dev->kgd, vmid, &queried_pasid); 793 794 if (status && queried_pasid == p->pasid) { 795 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 796 vmid, p->pasid); 797 break; 798 } 799 } 800 801 if (vmid > last_vmid_to_scan) { 802 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 803 return -EFAULT; 804 } 805 806 /* taking the VMID for that process on the safe way using PDD */ 807 pdd = kfd_get_process_device_data(dev, p); 808 if (!pdd) 809 return -EFAULT; 810 811 status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, 812 ®_gfx_index); 813 if (status != 0) 814 return -EINVAL; 815 816 /* for non DIQ we need to patch the VMID: */ 817 reg_sq_cmd.bits.vm_id = vmid; 818 819 dev->kfd2kgd->wave_control_execute(dev->kgd, 820 reg_gfx_index.u32All, 821 reg_sq_cmd.u32All); 822 823 return 0; 824 } 825 826 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, 827 enum DBGDEV_TYPE type) 828 { 829 pdbgdev->dev = pdev; 830 pdbgdev->kq = NULL; 831 pdbgdev->type = type; 832 pdbgdev->pqm = NULL; 833 834 switch (type) { 835 case DBGDEV_TYPE_NODIQ: 836 pdbgdev->dbgdev_register = dbgdev_register_nodiq; 837 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; 838 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; 839 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; 840 break; 841 case DBGDEV_TYPE_DIQ: 842 default: 843 pdbgdev->dbgdev_register = dbgdev_register_diq; 844 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; 845 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; 846 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; 847 break; 848 } 849 850 } 851