1 /* $NetBSD: kfd_mqd_manager_v10.c,v 1.2 2021/12/18 23:44:59 riastradh Exp $ */ 2 3 /* 4 * Copyright 2018 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26 #include <sys/cdefs.h> 27 __KERNEL_RCSID(0, "$NetBSD: kfd_mqd_manager_v10.c,v 1.2 2021/12/18 23:44:59 riastradh Exp $"); 28 29 #include <linux/printk.h> 30 #include <linux/slab.h> 31 #include <linux/uaccess.h> 32 #include "kfd_priv.h" 33 #include "kfd_mqd_manager.h" 34 #include "v10_structs.h" 35 #include "gc/gc_10_1_0_offset.h" 36 #include "gc/gc_10_1_0_sh_mask.h" 37 #include "amdgpu_amdkfd.h" 38 39 static inline struct v10_compute_mqd *get_mqd(void *mqd) 40 { 41 return (struct v10_compute_mqd *)mqd; 42 } 43 44 static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) 45 { 46 return (struct v10_sdma_mqd *)mqd; 47 } 48 49 static void update_cu_mask(struct mqd_manager *mm, void *mqd, 50 struct queue_properties *q) 51 { 52 struct v10_compute_mqd *m; 53 uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ 54 55 if (q->cu_mask_count == 0) 56 return; 57 58 mqd_symmetrically_map_cu_mask(mm, 59 q->cu_mask, q->cu_mask_count, se_mask); 60 61 m = get_mqd(mqd); 62 m->compute_static_thread_mgmt_se0 = se_mask[0]; 63 m->compute_static_thread_mgmt_se1 = se_mask[1]; 64 m->compute_static_thread_mgmt_se2 = se_mask[2]; 65 m->compute_static_thread_mgmt_se3 = se_mask[3]; 66 67 pr_debug("update cu mask to %#x %#x %#x %#x\n", 68 m->compute_static_thread_mgmt_se0, 69 m->compute_static_thread_mgmt_se1, 70 m->compute_static_thread_mgmt_se2, 71 m->compute_static_thread_mgmt_se3); 72 } 73 74 static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q) 75 { 76 m->cp_hqd_pipe_priority = pipe_priority_map[q->priority]; 77 m->cp_hqd_queue_priority = q->priority; 78 } 79 80 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, 81 struct queue_properties *q) 82 { 83 struct kfd_mem_obj *mqd_mem_obj; 84 85 if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd), 86 &mqd_mem_obj)) 87 return NULL; 88 89 return mqd_mem_obj; 90 } 91 92 static void init_mqd(struct mqd_manager *mm, void **mqd, 93 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, 94 struct queue_properties *q) 95 { 96 uint64_t addr; 97 struct v10_compute_mqd *m; 98 99 m = (struct v10_compute_mqd *) mqd_mem_obj->cpu_ptr; 100 addr = mqd_mem_obj->gpu_addr; 101 102 memset(m, 0, sizeof(struct v10_compute_mqd)); 103 104 m->header = 0xC0310800; 105 m->compute_pipelinestat_enable = 1; 106 m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; 107 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; 108 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; 109 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; 110 111 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 112 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; 113 114 m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; 115 116 m->cp_mqd_base_addr_lo = lower_32_bits(addr); 117 m->cp_mqd_base_addr_hi = upper_32_bits(addr); 118 119 m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | 120 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | 121 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; 122 123 if (q->format == KFD_QUEUE_FORMAT_AQL) { 124 m->cp_hqd_aql_control = 125 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; 126 } 127 128 if (mm->dev->cwsr_enabled) { 129 m->cp_hqd_persistent_state |= 130 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); 131 m->cp_hqd_ctx_save_base_addr_lo = 132 lower_32_bits(q->ctx_save_restore_area_address); 133 m->cp_hqd_ctx_save_base_addr_hi = 134 upper_32_bits(q->ctx_save_restore_area_address); 135 m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; 136 m->cp_hqd_cntl_stack_size = q->ctl_stack_size; 137 m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; 138 m->cp_hqd_wg_state_offset = q->ctl_stack_size; 139 } 140 141 *mqd = m; 142 if (gart_addr) 143 *gart_addr = addr; 144 mm->update_mqd(mm, m, q); 145 } 146 147 static int load_mqd(struct mqd_manager *mm, void *mqd, 148 uint32_t pipe_id, uint32_t queue_id, 149 struct queue_properties *p, struct mm_struct *mms) 150 { 151 int r = 0; 152 /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ 153 uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); 154 155 r = mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, 156 (uint32_t __user *)p->write_ptr, 157 wptr_shift, 0, mms); 158 return r; 159 } 160 161 static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, 162 uint32_t pipe_id, uint32_t queue_id, 163 struct queue_properties *p, struct mm_struct *mms) 164 { 165 return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, 166 queue_id, p->doorbell_off); 167 } 168 169 static void update_mqd(struct mqd_manager *mm, void *mqd, 170 struct queue_properties *q) 171 { 172 struct v10_compute_mqd *m; 173 174 m = get_mqd(mqd); 175 176 m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; 177 m->cp_hqd_pq_control |= 178 ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; 179 pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); 180 181 m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); 182 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); 183 184 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); 185 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); 186 m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); 187 m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); 188 189 m->cp_hqd_pq_doorbell_control = 190 q->doorbell_off << 191 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; 192 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", 193 m->cp_hqd_pq_doorbell_control); 194 195 m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT; 196 197 /* 198 * HW does not clamp this field correctly. Maximum EOP queue size 199 * is constrained by per-SE EOP done signal count, which is 8-bit. 200 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit 201 * more than (EOP entry count - 1) so a queue size of 0x800 dwords 202 * is safe, giving a maximum field value of 0xA. 203 */ 204 m->cp_hqd_eop_control = min(0xA, 205 ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); 206 m->cp_hqd_eop_base_addr_lo = 207 lower_32_bits(q->eop_ring_buffer_address >> 8); 208 m->cp_hqd_eop_base_addr_hi = 209 upper_32_bits(q->eop_ring_buffer_address >> 8); 210 211 m->cp_hqd_iq_timer = 0; 212 213 m->cp_hqd_vmid = q->vmid; 214 215 if (q->format == KFD_QUEUE_FORMAT_AQL) { 216 /* GC 10 removed WPP_CLAMP from PQ Control */ 217 m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | 218 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | 219 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ; 220 m->cp_hqd_pq_doorbell_control |= 221 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; 222 } 223 if (mm->dev->cwsr_enabled) 224 m->cp_hqd_ctx_save_control = 0; 225 226 update_cu_mask(mm, mqd, q); 227 set_priority(m, q); 228 229 q->is_active = QUEUE_IS_ACTIVE(*q); 230 } 231 232 static int destroy_mqd(struct mqd_manager *mm, void *mqd, 233 enum kfd_preempt_type type, 234 unsigned int timeout, uint32_t pipe_id, 235 uint32_t queue_id) 236 { 237 return mm->dev->kfd2kgd->hqd_destroy 238 (mm->dev->kgd, mqd, type, timeout, 239 pipe_id, queue_id); 240 } 241 242 static void free_mqd(struct mqd_manager *mm, void *mqd, 243 struct kfd_mem_obj *mqd_mem_obj) 244 { 245 kfd_gtt_sa_free(mm->dev, mqd_mem_obj); 246 } 247 248 static bool is_occupied(struct mqd_manager *mm, void *mqd, 249 uint64_t queue_address, uint32_t pipe_id, 250 uint32_t queue_id) 251 { 252 return mm->dev->kfd2kgd->hqd_is_occupied( 253 mm->dev->kgd, queue_address, 254 pipe_id, queue_id); 255 } 256 257 static int get_wave_state(struct mqd_manager *mm, void *mqd, 258 void __user *ctl_stack, 259 u32 *ctl_stack_used_size, 260 u32 *save_area_used_size) 261 { 262 struct v10_compute_mqd *m; 263 264 m = get_mqd(mqd); 265 266 /* Control stack is written backwards, while workgroup context data 267 * is written forwards. Both starts from m->cp_hqd_cntl_stack_size. 268 * Current position is at m->cp_hqd_cntl_stack_offset and 269 * m->cp_hqd_wg_state_offset, respectively. 270 */ 271 *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - 272 m->cp_hqd_cntl_stack_offset; 273 *save_area_used_size = m->cp_hqd_wg_state_offset - 274 m->cp_hqd_cntl_stack_size; 275 276 /* Control stack is not copied to user mode for GFXv10 because 277 * it's part of the context save area that is already 278 * accessible to user mode 279 */ 280 281 return 0; 282 } 283 284 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 285 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, 286 struct queue_properties *q) 287 { 288 struct v10_compute_mqd *m; 289 290 init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); 291 292 m = get_mqd(*mqd); 293 294 m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | 295 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; 296 } 297 298 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, 299 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, 300 struct queue_properties *q) 301 { 302 struct v10_sdma_mqd *m; 303 304 m = (struct v10_sdma_mqd *) mqd_mem_obj->cpu_ptr; 305 306 memset(m, 0, sizeof(struct v10_sdma_mqd)); 307 308 *mqd = m; 309 if (gart_addr) 310 *gart_addr = mqd_mem_obj->gpu_addr; 311 312 mm->update_mqd(mm, m, q); 313 } 314 315 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, 316 uint32_t pipe_id, uint32_t queue_id, 317 struct queue_properties *p, struct mm_struct *mms) 318 { 319 return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, 320 (uint32_t __user *)p->write_ptr, 321 mms); 322 } 323 324 #define SDMA_RLC_DUMMY_DEFAULT 0xf 325 326 static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, 327 struct queue_properties *q) 328 { 329 struct v10_sdma_mqd *m; 330 331 m = get_sdma_mqd(mqd); 332 m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) 333 << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | 334 q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 335 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 336 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; 337 338 m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8); 339 m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); 340 m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); 341 m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); 342 m->sdmax_rlcx_doorbell_offset = 343 q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; 344 345 m->sdma_engine_id = q->sdma_engine_id; 346 m->sdma_queue_id = q->sdma_queue_id; 347 m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; 348 349 q->is_active = QUEUE_IS_ACTIVE(*q); 350 } 351 352 /* 353 * * preempt type here is ignored because there is only one way 354 * * to preempt sdma queue 355 */ 356 static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, 357 enum kfd_preempt_type type, 358 unsigned int timeout, uint32_t pipe_id, 359 uint32_t queue_id) 360 { 361 return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); 362 } 363 364 static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, 365 uint64_t queue_address, uint32_t pipe_id, 366 uint32_t queue_id) 367 { 368 return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); 369 } 370 371 #if defined(CONFIG_DEBUG_FS) 372 373 static int debugfs_show_mqd(struct seq_file *m, void *data) 374 { 375 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, 376 data, sizeof(struct v10_compute_mqd), false); 377 return 0; 378 } 379 380 static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) 381 { 382 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, 383 data, sizeof(struct v10_sdma_mqd), false); 384 return 0; 385 } 386 387 #endif 388 389 struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, 390 struct kfd_dev *dev) 391 { 392 struct mqd_manager *mqd; 393 394 if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) 395 return NULL; 396 397 mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); 398 if (!mqd) 399 return NULL; 400 401 mqd->dev = dev; 402 403 switch (type) { 404 case KFD_MQD_TYPE_CP: 405 pr_debug("%s@%i\n", __func__, __LINE__); 406 mqd->allocate_mqd = allocate_mqd; 407 mqd->init_mqd = init_mqd; 408 mqd->free_mqd = free_mqd; 409 mqd->load_mqd = load_mqd; 410 mqd->update_mqd = update_mqd; 411 mqd->destroy_mqd = destroy_mqd; 412 mqd->is_occupied = is_occupied; 413 mqd->mqd_size = sizeof(struct v10_compute_mqd); 414 mqd->get_wave_state = get_wave_state; 415 #if defined(CONFIG_DEBUG_FS) 416 mqd->debugfs_show_mqd = debugfs_show_mqd; 417 #endif 418 pr_debug("%s@%i\n", __func__, __LINE__); 419 break; 420 case KFD_MQD_TYPE_HIQ: 421 pr_debug("%s@%i\n", __func__, __LINE__); 422 mqd->allocate_mqd = allocate_hiq_mqd; 423 mqd->init_mqd = init_mqd_hiq; 424 mqd->free_mqd = free_mqd_hiq_sdma; 425 mqd->load_mqd = hiq_load_mqd_kiq; 426 mqd->update_mqd = update_mqd; 427 mqd->destroy_mqd = destroy_mqd; 428 mqd->is_occupied = is_occupied; 429 mqd->mqd_size = sizeof(struct v10_compute_mqd); 430 #if defined(CONFIG_DEBUG_FS) 431 mqd->debugfs_show_mqd = debugfs_show_mqd; 432 #endif 433 pr_debug("%s@%i\n", __func__, __LINE__); 434 break; 435 case KFD_MQD_TYPE_DIQ: 436 mqd->allocate_mqd = allocate_mqd; 437 mqd->init_mqd = init_mqd_hiq; 438 mqd->free_mqd = free_mqd; 439 mqd->load_mqd = load_mqd; 440 mqd->update_mqd = update_mqd; 441 mqd->destroy_mqd = destroy_mqd; 442 mqd->is_occupied = is_occupied; 443 mqd->mqd_size = sizeof(struct v10_compute_mqd); 444 #if defined(CONFIG_DEBUG_FS) 445 mqd->debugfs_show_mqd = debugfs_show_mqd; 446 #endif 447 break; 448 case KFD_MQD_TYPE_SDMA: 449 pr_debug("%s@%i\n", __func__, __LINE__); 450 mqd->allocate_mqd = allocate_sdma_mqd; 451 mqd->init_mqd = init_mqd_sdma; 452 mqd->free_mqd = free_mqd_hiq_sdma; 453 mqd->load_mqd = load_mqd_sdma; 454 mqd->update_mqd = update_mqd_sdma; 455 mqd->destroy_mqd = destroy_mqd_sdma; 456 mqd->is_occupied = is_occupied_sdma; 457 mqd->mqd_size = sizeof(struct v10_sdma_mqd); 458 #if defined(CONFIG_DEBUG_FS) 459 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; 460 #endif 461 pr_debug("%s@%i\n", __func__, __LINE__); 462 break; 463 default: 464 kfree(mqd); 465 return NULL; 466 } 467 468 return mqd; 469 } 470