Home | History | Annotate | Line # | Download | only in amdkfd
      1 /*	$NetBSD: kfd_mqd_manager_v10.c,v 1.2 2021/12/18 23:44:59 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2018 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  */
     25 
     26 #include <sys/cdefs.h>
     27 __KERNEL_RCSID(0, "$NetBSD: kfd_mqd_manager_v10.c,v 1.2 2021/12/18 23:44:59 riastradh Exp $");
     28 
     29 #include <linux/printk.h>
     30 #include <linux/slab.h>
     31 #include <linux/uaccess.h>
     32 #include "kfd_priv.h"
     33 #include "kfd_mqd_manager.h"
     34 #include "v10_structs.h"
     35 #include "gc/gc_10_1_0_offset.h"
     36 #include "gc/gc_10_1_0_sh_mask.h"
     37 #include "amdgpu_amdkfd.h"
     38 
     39 static inline struct v10_compute_mqd *get_mqd(void *mqd)
     40 {
     41 	return (struct v10_compute_mqd *)mqd;
     42 }
     43 
     44 static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
     45 {
     46 	return (struct v10_sdma_mqd *)mqd;
     47 }
     48 
     49 static void update_cu_mask(struct mqd_manager *mm, void *mqd,
     50 			   struct queue_properties *q)
     51 {
     52 	struct v10_compute_mqd *m;
     53 	uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
     54 
     55 	if (q->cu_mask_count == 0)
     56 		return;
     57 
     58 	mqd_symmetrically_map_cu_mask(mm,
     59 		q->cu_mask, q->cu_mask_count, se_mask);
     60 
     61 	m = get_mqd(mqd);
     62 	m->compute_static_thread_mgmt_se0 = se_mask[0];
     63 	m->compute_static_thread_mgmt_se1 = se_mask[1];
     64 	m->compute_static_thread_mgmt_se2 = se_mask[2];
     65 	m->compute_static_thread_mgmt_se3 = se_mask[3];
     66 
     67 	pr_debug("update cu mask to %#x %#x %#x %#x\n",
     68 		m->compute_static_thread_mgmt_se0,
     69 		m->compute_static_thread_mgmt_se1,
     70 		m->compute_static_thread_mgmt_se2,
     71 		m->compute_static_thread_mgmt_se3);
     72 }
     73 
     74 static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q)
     75 {
     76 	m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
     77 	m->cp_hqd_queue_priority = q->priority;
     78 }
     79 
     80 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
     81 		struct queue_properties *q)
     82 {
     83 	struct kfd_mem_obj *mqd_mem_obj;
     84 
     85 	if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
     86 			&mqd_mem_obj))
     87 		return NULL;
     88 
     89 	return mqd_mem_obj;
     90 }
     91 
     92 static void init_mqd(struct mqd_manager *mm, void **mqd,
     93 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
     94 			struct queue_properties *q)
     95 {
     96 	uint64_t addr;
     97 	struct v10_compute_mqd *m;
     98 
     99 	m = (struct v10_compute_mqd *) mqd_mem_obj->cpu_ptr;
    100 	addr = mqd_mem_obj->gpu_addr;
    101 
    102 	memset(m, 0, sizeof(struct v10_compute_mqd));
    103 
    104 	m->header = 0xC0310800;
    105 	m->compute_pipelinestat_enable = 1;
    106 	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
    107 	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
    108 	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
    109 	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
    110 
    111 	m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
    112 			0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
    113 
    114 	m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
    115 
    116 	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
    117 	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
    118 
    119 	m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
    120 			1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
    121 			10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
    122 
    123 	if (q->format == KFD_QUEUE_FORMAT_AQL) {
    124 		m->cp_hqd_aql_control =
    125 			1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
    126 	}
    127 
    128 	if (mm->dev->cwsr_enabled) {
    129 		m->cp_hqd_persistent_state |=
    130 			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
    131 		m->cp_hqd_ctx_save_base_addr_lo =
    132 			lower_32_bits(q->ctx_save_restore_area_address);
    133 		m->cp_hqd_ctx_save_base_addr_hi =
    134 			upper_32_bits(q->ctx_save_restore_area_address);
    135 		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
    136 		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
    137 		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
    138 		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
    139 	}
    140 
    141 	*mqd = m;
    142 	if (gart_addr)
    143 		*gart_addr = addr;
    144 	mm->update_mqd(mm, m, q);
    145 }
    146 
    147 static int load_mqd(struct mqd_manager *mm, void *mqd,
    148 			uint32_t pipe_id, uint32_t queue_id,
    149 			struct queue_properties *p, struct mm_struct *mms)
    150 {
    151 	int r = 0;
    152 	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
    153 	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
    154 
    155 	r = mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
    156 					  (uint32_t __user *)p->write_ptr,
    157 					  wptr_shift, 0, mms);
    158 	return r;
    159 }
    160 
    161 static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
    162 			    uint32_t pipe_id, uint32_t queue_id,
    163 			    struct queue_properties *p, struct mm_struct *mms)
    164 {
    165 	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
    166 					      queue_id, p->doorbell_off);
    167 }
    168 
    169 static void update_mqd(struct mqd_manager *mm, void *mqd,
    170 		      struct queue_properties *q)
    171 {
    172 	struct v10_compute_mqd *m;
    173 
    174 	m = get_mqd(mqd);
    175 
    176 	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
    177 	m->cp_hqd_pq_control |=
    178 			ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
    179 	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
    180 
    181 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
    182 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
    183 
    184 	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
    185 	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
    186 	m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
    187 	m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
    188 
    189 	m->cp_hqd_pq_doorbell_control =
    190 		q->doorbell_off <<
    191 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
    192 	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
    193 			m->cp_hqd_pq_doorbell_control);
    194 
    195 	m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
    196 
    197 	/*
    198 	 * HW does not clamp this field correctly. Maximum EOP queue size
    199 	 * is constrained by per-SE EOP done signal count, which is 8-bit.
    200 	 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
    201 	 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
    202 	 * is safe, giving a maximum field value of 0xA.
    203 	 */
    204 	m->cp_hqd_eop_control = min(0xA,
    205 		ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
    206 	m->cp_hqd_eop_base_addr_lo =
    207 			lower_32_bits(q->eop_ring_buffer_address >> 8);
    208 	m->cp_hqd_eop_base_addr_hi =
    209 			upper_32_bits(q->eop_ring_buffer_address >> 8);
    210 
    211 	m->cp_hqd_iq_timer = 0;
    212 
    213 	m->cp_hqd_vmid = q->vmid;
    214 
    215 	if (q->format == KFD_QUEUE_FORMAT_AQL) {
    216 		/* GC 10 removed WPP_CLAMP from PQ Control */
    217 		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
    218 				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
    219 				1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ;
    220 		m->cp_hqd_pq_doorbell_control |=
    221 			1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
    222 	}
    223 	if (mm->dev->cwsr_enabled)
    224 		m->cp_hqd_ctx_save_control = 0;
    225 
    226 	update_cu_mask(mm, mqd, q);
    227 	set_priority(m, q);
    228 
    229 	q->is_active = QUEUE_IS_ACTIVE(*q);
    230 }
    231 
    232 static int destroy_mqd(struct mqd_manager *mm, void *mqd,
    233 		       enum kfd_preempt_type type,
    234 		       unsigned int timeout, uint32_t pipe_id,
    235 		       uint32_t queue_id)
    236 {
    237 	return mm->dev->kfd2kgd->hqd_destroy
    238 		(mm->dev->kgd, mqd, type, timeout,
    239 		 pipe_id, queue_id);
    240 }
    241 
    242 static void free_mqd(struct mqd_manager *mm, void *mqd,
    243 			struct kfd_mem_obj *mqd_mem_obj)
    244 {
    245 	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
    246 }
    247 
    248 static bool is_occupied(struct mqd_manager *mm, void *mqd,
    249 			uint64_t queue_address,	uint32_t pipe_id,
    250 			uint32_t queue_id)
    251 {
    252 	return mm->dev->kfd2kgd->hqd_is_occupied(
    253 		mm->dev->kgd, queue_address,
    254 		pipe_id, queue_id);
    255 }
    256 
    257 static int get_wave_state(struct mqd_manager *mm, void *mqd,
    258 			  void __user *ctl_stack,
    259 			  u32 *ctl_stack_used_size,
    260 			  u32 *save_area_used_size)
    261 {
    262 	struct v10_compute_mqd *m;
    263 
    264 	m = get_mqd(mqd);
    265 
    266 	/* Control stack is written backwards, while workgroup context data
    267 	 * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
    268 	 * Current position is at m->cp_hqd_cntl_stack_offset and
    269 	 * m->cp_hqd_wg_state_offset, respectively.
    270 	 */
    271 	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
    272 		m->cp_hqd_cntl_stack_offset;
    273 	*save_area_used_size = m->cp_hqd_wg_state_offset -
    274 		m->cp_hqd_cntl_stack_size;
    275 
    276 	/* Control stack is not copied to user mode for GFXv10 because
    277 	 * it's part of the context save area that is already
    278 	 * accessible to user mode
    279 	 */
    280 
    281 	return 0;
    282 }
    283 
    284 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
    285 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
    286 			struct queue_properties *q)
    287 {
    288 	struct v10_compute_mqd *m;
    289 
    290 	init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
    291 
    292 	m = get_mqd(*mqd);
    293 
    294 	m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
    295 			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
    296 }
    297 
    298 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
    299 		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
    300 		struct queue_properties *q)
    301 {
    302 	struct v10_sdma_mqd *m;
    303 
    304 	m = (struct v10_sdma_mqd *) mqd_mem_obj->cpu_ptr;
    305 
    306 	memset(m, 0, sizeof(struct v10_sdma_mqd));
    307 
    308 	*mqd = m;
    309 	if (gart_addr)
    310 		*gart_addr = mqd_mem_obj->gpu_addr;
    311 
    312 	mm->update_mqd(mm, m, q);
    313 }
    314 
    315 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
    316 		uint32_t pipe_id, uint32_t queue_id,
    317 		struct queue_properties *p, struct mm_struct *mms)
    318 {
    319 	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
    320 					       (uint32_t __user *)p->write_ptr,
    321 					       mms);
    322 }
    323 
    324 #define SDMA_RLC_DUMMY_DEFAULT 0xf
    325 
    326 static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
    327 		struct queue_properties *q)
    328 {
    329 	struct v10_sdma_mqd *m;
    330 
    331 	m = get_sdma_mqd(mqd);
    332 	m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
    333 		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
    334 		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
    335 		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
    336 		6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
    337 
    338 	m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
    339 	m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
    340 	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
    341 	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
    342 	m->sdmax_rlcx_doorbell_offset =
    343 		q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
    344 
    345 	m->sdma_engine_id = q->sdma_engine_id;
    346 	m->sdma_queue_id = q->sdma_queue_id;
    347 	m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
    348 
    349 	q->is_active = QUEUE_IS_ACTIVE(*q);
    350 }
    351 
    352 /*
    353  *  * preempt type here is ignored because there is only one way
    354  *  * to preempt sdma queue
    355  */
    356 static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
    357 		enum kfd_preempt_type type,
    358 		unsigned int timeout, uint32_t pipe_id,
    359 		uint32_t queue_id)
    360 {
    361 	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
    362 }
    363 
    364 static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
    365 		uint64_t queue_address, uint32_t pipe_id,
    366 		uint32_t queue_id)
    367 {
    368 	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
    369 }
    370 
    371 #if defined(CONFIG_DEBUG_FS)
    372 
    373 static int debugfs_show_mqd(struct seq_file *m, void *data)
    374 {
    375 	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
    376 		     data, sizeof(struct v10_compute_mqd), false);
    377 	return 0;
    378 }
    379 
    380 static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
    381 {
    382 	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
    383 		     data, sizeof(struct v10_sdma_mqd), false);
    384 	return 0;
    385 }
    386 
    387 #endif
    388 
    389 struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
    390 		struct kfd_dev *dev)
    391 {
    392 	struct mqd_manager *mqd;
    393 
    394 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
    395 		return NULL;
    396 
    397 	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
    398 	if (!mqd)
    399 		return NULL;
    400 
    401 	mqd->dev = dev;
    402 
    403 	switch (type) {
    404 	case KFD_MQD_TYPE_CP:
    405 		pr_debug("%s@%i\n", __func__, __LINE__);
    406 		mqd->allocate_mqd = allocate_mqd;
    407 		mqd->init_mqd = init_mqd;
    408 		mqd->free_mqd = free_mqd;
    409 		mqd->load_mqd = load_mqd;
    410 		mqd->update_mqd = update_mqd;
    411 		mqd->destroy_mqd = destroy_mqd;
    412 		mqd->is_occupied = is_occupied;
    413 		mqd->mqd_size = sizeof(struct v10_compute_mqd);
    414 		mqd->get_wave_state = get_wave_state;
    415 #if defined(CONFIG_DEBUG_FS)
    416 		mqd->debugfs_show_mqd = debugfs_show_mqd;
    417 #endif
    418 		pr_debug("%s@%i\n", __func__, __LINE__);
    419 		break;
    420 	case KFD_MQD_TYPE_HIQ:
    421 		pr_debug("%s@%i\n", __func__, __LINE__);
    422 		mqd->allocate_mqd = allocate_hiq_mqd;
    423 		mqd->init_mqd = init_mqd_hiq;
    424 		mqd->free_mqd = free_mqd_hiq_sdma;
    425 		mqd->load_mqd = hiq_load_mqd_kiq;
    426 		mqd->update_mqd = update_mqd;
    427 		mqd->destroy_mqd = destroy_mqd;
    428 		mqd->is_occupied = is_occupied;
    429 		mqd->mqd_size = sizeof(struct v10_compute_mqd);
    430 #if defined(CONFIG_DEBUG_FS)
    431 		mqd->debugfs_show_mqd = debugfs_show_mqd;
    432 #endif
    433 		pr_debug("%s@%i\n", __func__, __LINE__);
    434 		break;
    435 	case KFD_MQD_TYPE_DIQ:
    436 		mqd->allocate_mqd = allocate_mqd;
    437 		mqd->init_mqd = init_mqd_hiq;
    438 		mqd->free_mqd = free_mqd;
    439 		mqd->load_mqd = load_mqd;
    440 		mqd->update_mqd = update_mqd;
    441 		mqd->destroy_mqd = destroy_mqd;
    442 		mqd->is_occupied = is_occupied;
    443 		mqd->mqd_size = sizeof(struct v10_compute_mqd);
    444 #if defined(CONFIG_DEBUG_FS)
    445 		mqd->debugfs_show_mqd = debugfs_show_mqd;
    446 #endif
    447 		break;
    448 	case KFD_MQD_TYPE_SDMA:
    449 		pr_debug("%s@%i\n", __func__, __LINE__);
    450 		mqd->allocate_mqd = allocate_sdma_mqd;
    451 		mqd->init_mqd = init_mqd_sdma;
    452 		mqd->free_mqd = free_mqd_hiq_sdma;
    453 		mqd->load_mqd = load_mqd_sdma;
    454 		mqd->update_mqd = update_mqd_sdma;
    455 		mqd->destroy_mqd = destroy_mqd_sdma;
    456 		mqd->is_occupied = is_occupied_sdma;
    457 		mqd->mqd_size = sizeof(struct v10_sdma_mqd);
    458 #if defined(CONFIG_DEBUG_FS)
    459 		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
    460 #endif
    461 		pr_debug("%s@%i\n", __func__, __LINE__);
    462 		break;
    463 	default:
    464 		kfree(mqd);
    465 		return NULL;
    466 	}
    467 
    468 	return mqd;
    469 }
    470