Home | History | Annotate | Line # | Download | only in amdkfd
      1 /*	$NetBSD: kfd_dbgdev.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2014 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  */
     25 
     26 #include <sys/cdefs.h>
     27 __KERNEL_RCSID(0, "$NetBSD: kfd_dbgdev.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $");
     28 
     29 #include <linux/types.h>
     30 #include <linux/kernel.h>
     31 #include <linux/log2.h>
     32 #include <linux/sched.h>
     33 #include <linux/slab.h>
     34 #include <linux/mutex.h>
     35 #include <linux/device.h>
     36 
     37 #include "kfd_pm4_headers.h"
     38 #include "kfd_pm4_headers_diq.h"
     39 #include "kfd_kernel_queue.h"
     40 #include "kfd_priv.h"
     41 #include "kfd_pm4_opcodes.h"
     42 #include "cik_regs.h"
     43 #include "kfd_dbgmgr.h"
     44 #include "kfd_dbgdev.h"
     45 #include "kfd_device_queue_manager.h"
     46 
     47 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
     48 {
     49 	dev->kfd2kgd->address_watch_disable(dev->kgd);
     50 }
     51 
     52 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
     53 				unsigned int pasid, uint64_t vmid0_address,
     54 				uint32_t *packet_buff, size_t size_in_bytes)
     55 {
     56 	struct pm4__release_mem *rm_packet;
     57 	struct pm4__indirect_buffer_pasid *ib_packet;
     58 	struct kfd_mem_obj *mem_obj;
     59 	size_t pq_packets_size_in_bytes;
     60 	union ULARGE_INTEGER *largep;
     61 	union ULARGE_INTEGER addr;
     62 	struct kernel_queue *kq;
     63 	uint64_t *rm_state;
     64 	unsigned int *ib_packet_buff;
     65 	int status;
     66 
     67 	if (WARN_ON(!size_in_bytes))
     68 		return -EINVAL;
     69 
     70 	kq = dbgdev->kq;
     71 
     72 	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
     73 				sizeof(struct pm4__indirect_buffer_pasid);
     74 
     75 	/*
     76 	 * We acquire a buffer from DIQ
     77 	 * The receive packet buff will be sitting on the Indirect Buffer
     78 	 * and in the PQ we put the IB packet + sync packet(s).
     79 	 */
     80 	status = kq_acquire_packet_buffer(kq,
     81 				pq_packets_size_in_bytes / sizeof(uint32_t),
     82 				&ib_packet_buff);
     83 	if (status) {
     84 		pr_err("kq_acquire_packet_buffer failed\n");
     85 		return status;
     86 	}
     87 
     88 	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
     89 
     90 	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
     91 
     92 	ib_packet->header.count = 3;
     93 	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
     94 	ib_packet->header.type = PM4_TYPE_3;
     95 
     96 	largep = (union ULARGE_INTEGER *) &vmid0_address;
     97 
     98 	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
     99 	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
    100 
    101 	ib_packet->control = (1 << 23) | (1 << 31) |
    102 			((size_in_bytes / 4) & 0xfffff);
    103 
    104 	ib_packet->bitfields5.pasid = pasid;
    105 
    106 	/*
    107 	 * for now we use release mem for GPU-CPU synchronization
    108 	 * Consider WaitRegMem + WriteData as a better alternative
    109 	 * we get a GART allocations ( gpu/cpu mapping),
    110 	 * for the sync variable, and wait until:
    111 	 * (a) Sync with HW
    112 	 * (b) Sync var is written by CP to mem.
    113 	 */
    114 	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
    115 			(sizeof(struct pm4__indirect_buffer_pasid) /
    116 					sizeof(unsigned int)));
    117 
    118 	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
    119 					&mem_obj);
    120 
    121 	if (status) {
    122 		pr_err("Failed to allocate GART memory\n");
    123 		kq_rollback_packet(kq);
    124 		return status;
    125 	}
    126 
    127 	rm_state = (uint64_t *) mem_obj->cpu_ptr;
    128 
    129 	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
    130 
    131 	rm_packet->header.opcode = IT_RELEASE_MEM;
    132 	rm_packet->header.type = PM4_TYPE_3;
    133 	rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
    134 
    135 	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
    136 	rm_packet->bitfields2.event_index =
    137 				event_index___release_mem__end_of_pipe;
    138 
    139 	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
    140 	rm_packet->bitfields2.atc = 0;
    141 	rm_packet->bitfields2.tc_wb_action_ena = 1;
    142 
    143 	addr.quad_part = mem_obj->gpu_addr;
    144 
    145 	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
    146 	rm_packet->address_hi = addr.u.high_part;
    147 
    148 	rm_packet->bitfields3.data_sel =
    149 				data_sel___release_mem__send_64_bit_data;
    150 
    151 	rm_packet->bitfields3.int_sel =
    152 			int_sel___release_mem__send_data_after_write_confirm;
    153 
    154 	rm_packet->bitfields3.dst_sel =
    155 			dst_sel___release_mem__memory_controller;
    156 
    157 	rm_packet->data_lo = QUEUESTATE__ACTIVE;
    158 
    159 	kq_submit_packet(kq);
    160 
    161 	/* Wait till CP writes sync code: */
    162 	status = amdkfd_fence_wait_timeout(
    163 			(unsigned int *) rm_state,
    164 			QUEUESTATE__ACTIVE, 1500);
    165 
    166 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
    167 
    168 	return status;
    169 }
    170 
    171 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
    172 {
    173 	/*
    174 	 * no action is needed in this case,
    175 	 * just make sure diq will not be used
    176 	 */
    177 
    178 	dbgdev->kq = NULL;
    179 
    180 	return 0;
    181 }
    182 
    183 static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
    184 {
    185 	struct queue_properties properties;
    186 	unsigned int qid;
    187 	struct kernel_queue *kq = NULL;
    188 	int status;
    189 
    190 	properties.type = KFD_QUEUE_TYPE_DIQ;
    191 
    192 	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
    193 				&properties, &qid, NULL);
    194 
    195 	if (status) {
    196 		pr_err("Failed to create DIQ\n");
    197 		return status;
    198 	}
    199 
    200 	pr_debug("DIQ Created with queue id: %d\n", qid);
    201 
    202 	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
    203 
    204 	if (!kq) {
    205 		pr_err("Error getting DIQ\n");
    206 		pqm_destroy_queue(dbgdev->pqm, qid);
    207 		return -EFAULT;
    208 	}
    209 
    210 	dbgdev->kq = kq;
    211 
    212 	return status;
    213 }
    214 
    215 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
    216 {
    217 	/* disable watch address */
    218 	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
    219 	return 0;
    220 }
    221 
    222 static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
    223 {
    224 	/* todo - disable address watch */
    225 	int status;
    226 
    227 	status = pqm_destroy_queue(dbgdev->pqm,
    228 			dbgdev->kq->queue->properties.queue_id);
    229 	dbgdev->kq = NULL;
    230 
    231 	return status;
    232 }
    233 
    234 static void dbgdev_address_watch_set_registers(
    235 			const struct dbg_address_watch_info *adw_info,
    236 			union TCP_WATCH_ADDR_H_BITS *addrHi,
    237 			union TCP_WATCH_ADDR_L_BITS *addrLo,
    238 			union TCP_WATCH_CNTL_BITS *cntl,
    239 			unsigned int index, unsigned int vmid)
    240 {
    241 	union ULARGE_INTEGER addr;
    242 
    243 	addr.quad_part = 0;
    244 	addrHi->u32All = 0;
    245 	addrLo->u32All = 0;
    246 	cntl->u32All = 0;
    247 
    248 	if (adw_info->watch_mask)
    249 		cntl->bitfields.mask =
    250 			(uint32_t) (adw_info->watch_mask[index] &
    251 					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
    252 	else
    253 		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
    254 
    255 	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
    256 
    257 	addrHi->bitfields.addr = addr.u.high_part &
    258 					ADDRESS_WATCH_REG_ADDHIGH_MASK;
    259 	addrLo->bitfields.addr =
    260 			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
    261 
    262 	cntl->bitfields.mode = adw_info->watch_mode[index];
    263 	cntl->bitfields.vmid = (uint32_t) vmid;
    264 	/* for now assume it is an ATC address */
    265 	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
    266 
    267 	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
    268 	pr_debug("\t\t%20s %08x\n", "set reg add high :",
    269 			addrHi->bitfields.addr);
    270 	pr_debug("\t\t%20s %08x\n", "set reg add low :",
    271 			addrLo->bitfields.addr);
    272 }
    273 
    274 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
    275 				      struct dbg_address_watch_info *adw_info)
    276 {
    277 	union TCP_WATCH_ADDR_H_BITS addrHi;
    278 	union TCP_WATCH_ADDR_L_BITS addrLo;
    279 	union TCP_WATCH_CNTL_BITS cntl;
    280 	struct kfd_process_device *pdd;
    281 	unsigned int i;
    282 
    283 	/* taking the vmid for that process on the safe way using pdd */
    284 	pdd = kfd_get_process_device_data(dbgdev->dev,
    285 					adw_info->process);
    286 	if (!pdd) {
    287 		pr_err("Failed to get pdd for wave control no DIQ\n");
    288 		return -EFAULT;
    289 	}
    290 
    291 	addrHi.u32All = 0;
    292 	addrLo.u32All = 0;
    293 	cntl.u32All = 0;
    294 
    295 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
    296 			(adw_info->num_watch_points == 0)) {
    297 		pr_err("num_watch_points is invalid\n");
    298 		return -EINVAL;
    299 	}
    300 
    301 	if (!adw_info->watch_mode || !adw_info->watch_address) {
    302 		pr_err("adw_info fields are not valid\n");
    303 		return -EINVAL;
    304 	}
    305 
    306 	for (i = 0; i < adw_info->num_watch_points; i++) {
    307 		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
    308 						&cntl, i, pdd->qpd.vmid);
    309 
    310 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
    311 		pr_debug("\t\t%20s %08x\n", "register index :", i);
    312 		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
    313 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
    314 				addrLo.bitfields.addr);
    315 		pr_debug("\t\t%20s %08x\n", "Address high is :",
    316 				addrHi.bitfields.addr);
    317 		pr_debug("\t\t%20s %08x\n", "Address high is :",
    318 				addrHi.bitfields.addr);
    319 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
    320 				cntl.bitfields.mask);
    321 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
    322 				cntl.bitfields.mode);
    323 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
    324 				cntl.bitfields.vmid);
    325 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
    326 				cntl.bitfields.atc);
    327 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
    328 
    329 		pdd->dev->kfd2kgd->address_watch_execute(
    330 						dbgdev->dev->kgd,
    331 						i,
    332 						cntl.u32All,
    333 						addrHi.u32All,
    334 						addrLo.u32All);
    335 	}
    336 
    337 	return 0;
    338 }
    339 
    340 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
    341 				    struct dbg_address_watch_info *adw_info)
    342 {
    343 	struct pm4__set_config_reg *packets_vec;
    344 	union TCP_WATCH_ADDR_H_BITS addrHi;
    345 	union TCP_WATCH_ADDR_L_BITS addrLo;
    346 	union TCP_WATCH_CNTL_BITS cntl;
    347 	struct kfd_mem_obj *mem_obj;
    348 	unsigned int aw_reg_add_dword;
    349 	uint32_t *packet_buff_uint;
    350 	unsigned int i;
    351 	int status;
    352 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
    353 	/* we do not control the vmid in DIQ mode, just a place holder */
    354 	unsigned int vmid = 0;
    355 
    356 	addrHi.u32All = 0;
    357 	addrLo.u32All = 0;
    358 	cntl.u32All = 0;
    359 
    360 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
    361 			(adw_info->num_watch_points == 0)) {
    362 		pr_err("num_watch_points is invalid\n");
    363 		return -EINVAL;
    364 	}
    365 
    366 	if (!adw_info->watch_mode || !adw_info->watch_address) {
    367 		pr_err("adw_info fields are not valid\n");
    368 		return -EINVAL;
    369 	}
    370 
    371 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
    372 
    373 	if (status) {
    374 		pr_err("Failed to allocate GART memory\n");
    375 		return status;
    376 	}
    377 
    378 	packet_buff_uint = mem_obj->cpu_ptr;
    379 
    380 	memset(packet_buff_uint, 0, ib_size);
    381 
    382 	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
    383 
    384 	packets_vec[0].header.count = 1;
    385 	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
    386 	packets_vec[0].header.type = PM4_TYPE_3;
    387 	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
    388 	packets_vec[0].bitfields2.insert_vmid = 1;
    389 	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
    390 	packets_vec[1].bitfields2.insert_vmid = 0;
    391 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
    392 	packets_vec[2].bitfields2.insert_vmid = 0;
    393 	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
    394 	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
    395 	packets_vec[3].bitfields2.insert_vmid = 1;
    396 
    397 	for (i = 0; i < adw_info->num_watch_points; i++) {
    398 		dbgdev_address_watch_set_registers(adw_info,
    399 						&addrHi,
    400 						&addrLo,
    401 						&cntl,
    402 						i,
    403 						vmid);
    404 
    405 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
    406 		pr_debug("\t\t%20s %08x\n", "register index :", i);
    407 		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
    408 		pr_debug("\t\t%20s %p\n", "Add ptr is :",
    409 				adw_info->watch_address);
    410 		pr_debug("\t\t%20s %08llx\n", "Add     is :",
    411 				adw_info->watch_address[i]);
    412 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
    413 				addrLo.bitfields.addr);
    414 		pr_debug("\t\t%20s %08x\n", "Address high is :",
    415 				addrHi.bitfields.addr);
    416 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
    417 				cntl.bitfields.mask);
    418 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
    419 				cntl.bitfields.mode);
    420 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
    421 				cntl.bitfields.vmid);
    422 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
    423 				cntl.bitfields.atc);
    424 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
    425 
    426 		aw_reg_add_dword =
    427 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
    428 					dbgdev->dev->kgd,
    429 					i,
    430 					ADDRESS_WATCH_REG_CNTL);
    431 
    432 		packets_vec[0].bitfields2.reg_offset =
    433 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
    434 
    435 		packets_vec[0].reg_data[0] = cntl.u32All;
    436 
    437 		aw_reg_add_dword =
    438 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
    439 					dbgdev->dev->kgd,
    440 					i,
    441 					ADDRESS_WATCH_REG_ADDR_HI);
    442 
    443 		packets_vec[1].bitfields2.reg_offset =
    444 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
    445 		packets_vec[1].reg_data[0] = addrHi.u32All;
    446 
    447 		aw_reg_add_dword =
    448 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
    449 					dbgdev->dev->kgd,
    450 					i,
    451 					ADDRESS_WATCH_REG_ADDR_LO);
    452 
    453 		packets_vec[2].bitfields2.reg_offset =
    454 				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
    455 		packets_vec[2].reg_data[0] = addrLo.u32All;
    456 
    457 		/* enable watch flag if address is not zero*/
    458 		if (adw_info->watch_address[i] > 0)
    459 			cntl.bitfields.valid = 1;
    460 		else
    461 			cntl.bitfields.valid = 0;
    462 
    463 		aw_reg_add_dword =
    464 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
    465 					dbgdev->dev->kgd,
    466 					i,
    467 					ADDRESS_WATCH_REG_CNTL);
    468 
    469 		packets_vec[3].bitfields2.reg_offset =
    470 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
    471 		packets_vec[3].reg_data[0] = cntl.u32All;
    472 
    473 		status = dbgdev_diq_submit_ib(
    474 					dbgdev,
    475 					adw_info->process->pasid,
    476 					mem_obj->gpu_addr,
    477 					packet_buff_uint,
    478 					ib_size);
    479 
    480 		if (status) {
    481 			pr_err("Failed to submit IB to DIQ\n");
    482 			break;
    483 		}
    484 	}
    485 
    486 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
    487 	return status;
    488 }
    489 
    490 static int dbgdev_wave_control_set_registers(
    491 				struct dbg_wave_control_info *wac_info,
    492 				union SQ_CMD_BITS *in_reg_sq_cmd,
    493 				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
    494 {
    495 	int status = 0;
    496 	union SQ_CMD_BITS reg_sq_cmd;
    497 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
    498 	struct HsaDbgWaveMsgAMDGen2 *pMsg;
    499 
    500 	reg_sq_cmd.u32All = 0;
    501 	reg_gfx_index.u32All = 0;
    502 	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
    503 
    504 	switch (wac_info->mode) {
    505 	/* Send command to single wave */
    506 	case HSA_DBG_WAVEMODE_SINGLE:
    507 		/*
    508 		 * Limit access to the process waves only,
    509 		 * by setting vmid check
    510 		 */
    511 		reg_sq_cmd.bits.check_vmid = 1;
    512 		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
    513 		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
    514 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
    515 
    516 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
    517 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
    518 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
    519 
    520 		break;
    521 
    522 	/* Send command to all waves with matching VMID */
    523 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
    524 
    525 		reg_gfx_index.bits.sh_broadcast_writes = 1;
    526 		reg_gfx_index.bits.se_broadcast_writes = 1;
    527 		reg_gfx_index.bits.instance_broadcast_writes = 1;
    528 
    529 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
    530 
    531 		break;
    532 
    533 	/* Send command to all CU waves with matching VMID */
    534 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
    535 
    536 		reg_sq_cmd.bits.check_vmid = 1;
    537 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
    538 
    539 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
    540 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
    541 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
    542 
    543 		break;
    544 
    545 	default:
    546 		return -EINVAL;
    547 	}
    548 
    549 	switch (wac_info->operand) {
    550 	case HSA_DBG_WAVEOP_HALT:
    551 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
    552 		break;
    553 
    554 	case HSA_DBG_WAVEOP_RESUME:
    555 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
    556 		break;
    557 
    558 	case HSA_DBG_WAVEOP_KILL:
    559 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
    560 		break;
    561 
    562 	case HSA_DBG_WAVEOP_DEBUG:
    563 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
    564 		break;
    565 
    566 	case HSA_DBG_WAVEOP_TRAP:
    567 		if (wac_info->trapId < MAX_TRAPID) {
    568 			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
    569 			reg_sq_cmd.bits.trap_id = wac_info->trapId;
    570 		} else {
    571 			status = -EINVAL;
    572 		}
    573 		break;
    574 
    575 	default:
    576 		status = -EINVAL;
    577 		break;
    578 	}
    579 
    580 	if (status == 0) {
    581 		*in_reg_sq_cmd = reg_sq_cmd;
    582 		*in_reg_gfx_index = reg_gfx_index;
    583 	}
    584 
    585 	return status;
    586 }
    587 
    588 static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
    589 					struct dbg_wave_control_info *wac_info)
    590 {
    591 
    592 	int status;
    593 	union SQ_CMD_BITS reg_sq_cmd;
    594 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
    595 	struct kfd_mem_obj *mem_obj;
    596 	uint32_t *packet_buff_uint;
    597 	struct pm4__set_config_reg *packets_vec;
    598 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
    599 
    600 	reg_sq_cmd.u32All = 0;
    601 
    602 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
    603 							&reg_gfx_index);
    604 	if (status) {
    605 		pr_err("Failed to set wave control registers\n");
    606 		return status;
    607 	}
    608 
    609 	/* we do not control the VMID in DIQ, so reset it to a known value */
    610 	reg_sq_cmd.bits.vm_id = 0;
    611 
    612 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
    613 
    614 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
    615 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
    616 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
    617 	pr_debug("\t\t msg value is: %u\n",
    618 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
    619 	pr_debug("\t\t vmid      is: N/A\n");
    620 
    621 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
    622 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
    623 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
    624 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
    625 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
    626 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
    627 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
    628 
    629 	pr_debug("\t\t ibw       is : %u\n",
    630 			reg_gfx_index.bitfields.instance_broadcast_writes);
    631 	pr_debug("\t\t ii        is : %u\n",
    632 			reg_gfx_index.bitfields.instance_index);
    633 	pr_debug("\t\t sebw      is : %u\n",
    634 			reg_gfx_index.bitfields.se_broadcast_writes);
    635 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
    636 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
    637 	pr_debug("\t\t sbw       is : %u\n",
    638 			reg_gfx_index.bitfields.sh_broadcast_writes);
    639 
    640 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
    641 
    642 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
    643 
    644 	if (status != 0) {
    645 		pr_err("Failed to allocate GART memory\n");
    646 		return status;
    647 	}
    648 
    649 	packet_buff_uint = mem_obj->cpu_ptr;
    650 
    651 	memset(packet_buff_uint, 0, ib_size);
    652 
    653 	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
    654 	packets_vec[0].header.count = 1;
    655 	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
    656 	packets_vec[0].header.type = PM4_TYPE_3;
    657 	packets_vec[0].bitfields2.reg_offset =
    658 			GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
    659 
    660 	packets_vec[0].bitfields2.insert_vmid = 0;
    661 	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
    662 
    663 	packets_vec[1].header.count = 1;
    664 	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
    665 	packets_vec[1].header.type = PM4_TYPE_3;
    666 	packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
    667 
    668 	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
    669 	packets_vec[1].bitfields2.insert_vmid = 1;
    670 	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
    671 
    672 	/* Restore the GRBM_GFX_INDEX register */
    673 
    674 	reg_gfx_index.u32All = 0;
    675 	reg_gfx_index.bits.sh_broadcast_writes = 1;
    676 	reg_gfx_index.bits.instance_broadcast_writes = 1;
    677 	reg_gfx_index.bits.se_broadcast_writes = 1;
    678 
    679 
    680 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
    681 	packets_vec[2].bitfields2.reg_offset =
    682 				GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
    683 
    684 	packets_vec[2].bitfields2.insert_vmid = 0;
    685 	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
    686 
    687 	status = dbgdev_diq_submit_ib(
    688 			dbgdev,
    689 			wac_info->process->pasid,
    690 			mem_obj->gpu_addr,
    691 			packet_buff_uint,
    692 			ib_size);
    693 
    694 	if (status)
    695 		pr_err("Failed to submit IB to DIQ\n");
    696 
    697 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
    698 
    699 	return status;
    700 }
    701 
    702 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
    703 					struct dbg_wave_control_info *wac_info)
    704 {
    705 	int status;
    706 	union SQ_CMD_BITS reg_sq_cmd;
    707 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
    708 	struct kfd_process_device *pdd;
    709 
    710 	reg_sq_cmd.u32All = 0;
    711 
    712 	/* taking the VMID for that process on the safe way using PDD */
    713 	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
    714 
    715 	if (!pdd) {
    716 		pr_err("Failed to get pdd for wave control no DIQ\n");
    717 		return -EFAULT;
    718 	}
    719 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
    720 							&reg_gfx_index);
    721 	if (status) {
    722 		pr_err("Failed to set wave control registers\n");
    723 		return status;
    724 	}
    725 
    726 	/* for non DIQ we need to patch the VMID: */
    727 
    728 	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
    729 
    730 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
    731 
    732 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
    733 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
    734 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
    735 	pr_debug("\t\t msg value is: %u\n",
    736 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
    737 	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
    738 
    739 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
    740 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
    741 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
    742 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
    743 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
    744 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
    745 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
    746 
    747 	pr_debug("\t\t ibw       is : %u\n",
    748 			reg_gfx_index.bitfields.instance_broadcast_writes);
    749 	pr_debug("\t\t ii        is : %u\n",
    750 			reg_gfx_index.bitfields.instance_index);
    751 	pr_debug("\t\t sebw      is : %u\n",
    752 			reg_gfx_index.bitfields.se_broadcast_writes);
    753 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
    754 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
    755 	pr_debug("\t\t sbw       is : %u\n",
    756 			reg_gfx_index.bitfields.sh_broadcast_writes);
    757 
    758 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
    759 
    760 	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
    761 							reg_gfx_index.u32All,
    762 							reg_sq_cmd.u32All);
    763 }
    764 
    765 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
    766 {
    767 	int status = 0;
    768 	unsigned int vmid;
    769 	uint16_t queried_pasid;
    770 	union SQ_CMD_BITS reg_sq_cmd;
    771 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
    772 	struct kfd_process_device *pdd;
    773 	struct dbg_wave_control_info wac_info;
    774 	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
    775 	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
    776 
    777 	reg_sq_cmd.u32All = 0;
    778 	status = 0;
    779 
    780 	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
    781 	wac_info.operand = HSA_DBG_WAVEOP_KILL;
    782 
    783 	pr_debug("Killing all process wavefronts\n");
    784 
    785 	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
    786 	 * ATC_VMID15_PASID_MAPPING
    787 	 * to check which VMID the current process is mapped to.
    788 	 */
    789 
    790 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
    791 		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
    792 				(dev->kgd, vmid, &queried_pasid);
    793 
    794 		if (status && queried_pasid == p->pasid) {
    795 			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
    796 					vmid, p->pasid);
    797 			break;
    798 		}
    799 	}
    800 
    801 	if (vmid > last_vmid_to_scan) {
    802 		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
    803 		return -EFAULT;
    804 	}
    805 
    806 	/* taking the VMID for that process on the safe way using PDD */
    807 	pdd = kfd_get_process_device_data(dev, p);
    808 	if (!pdd)
    809 		return -EFAULT;
    810 
    811 	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
    812 			&reg_gfx_index);
    813 	if (status != 0)
    814 		return -EINVAL;
    815 
    816 	/* for non DIQ we need to patch the VMID: */
    817 	reg_sq_cmd.bits.vm_id = vmid;
    818 
    819 	dev->kfd2kgd->wave_control_execute(dev->kgd,
    820 					reg_gfx_index.u32All,
    821 					reg_sq_cmd.u32All);
    822 
    823 	return 0;
    824 }
    825 
    826 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
    827 			enum DBGDEV_TYPE type)
    828 {
    829 	pdbgdev->dev = pdev;
    830 	pdbgdev->kq = NULL;
    831 	pdbgdev->type = type;
    832 	pdbgdev->pqm = NULL;
    833 
    834 	switch (type) {
    835 	case DBGDEV_TYPE_NODIQ:
    836 		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
    837 		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
    838 		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
    839 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
    840 		break;
    841 	case DBGDEV_TYPE_DIQ:
    842 	default:
    843 		pdbgdev->dbgdev_register = dbgdev_register_diq;
    844 		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
    845 		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
    846 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
    847 		break;
    848 	}
    849 
    850 }
    851