1 /* $NetBSD: kfd_kernel_queue.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $ */ 2 3 /* 4 * Copyright 2014 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26 #include <sys/cdefs.h> 27 __KERNEL_RCSID(0, "$NetBSD: kfd_kernel_queue.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $"); 28 29 #include <linux/types.h> 30 #include <linux/mutex.h> 31 #include <linux/slab.h> 32 #include <linux/printk.h> 33 #include <linux/sched.h> 34 #include "kfd_kernel_queue.h" 35 #include "kfd_priv.h" 36 #include "kfd_device_queue_manager.h" 37 #include "kfd_pm4_headers.h" 38 #include "kfd_pm4_opcodes.h" 39 40 #define PM4_COUNT_ZERO (((1 << 15) - 1) << 16) 41 42 /* Initialize a kernel queue, including allocations of GART memory 43 * needed for the queue. 44 */ 45 static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, 46 enum kfd_queue_type type, unsigned int queue_size) 47 { 48 struct queue_properties prop; 49 int retval; 50 union PM4_MES_TYPE_3_HEADER nop; 51 52 if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ)) 53 return false; 54 55 pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ, 56 queue_size); 57 58 memset(&prop, 0, sizeof(prop)); 59 memset(&nop, 0, sizeof(nop)); 60 61 nop.opcode = IT_NOP; 62 nop.type = PM4_TYPE_3; 63 nop.u32all |= PM4_COUNT_ZERO; 64 65 kq->dev = dev; 66 kq->nop_packet = nop.u32all; 67 switch (type) { 68 case KFD_QUEUE_TYPE_DIQ: 69 kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ]; 70 break; 71 case KFD_QUEUE_TYPE_HIQ: 72 kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 73 break; 74 default: 75 pr_err("Invalid queue type %d\n", type); 76 return false; 77 } 78 79 if (!kq->mqd_mgr) 80 return false; 81 82 prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); 83 84 if (!prop.doorbell_ptr) { 85 pr_err("Failed to initialize doorbell"); 86 goto err_get_kernel_doorbell; 87 } 88 89 retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq); 90 if (retval != 0) { 91 pr_err("Failed to init pq queues size %d\n", queue_size); 92 goto err_pq_allocate_vidmem; 93 } 94 95 kq->pq_kernel_addr = kq->pq->cpu_ptr; 96 kq->pq_gpu_addr = kq->pq->gpu_addr; 97 98 /* For CIK family asics, kq->eop_mem is not needed */ 99 if (dev->device_info->asic_family > CHIP_MULLINS) { 100 retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); 101 if (retval != 0) 102 goto err_eop_allocate_vidmem; 103 104 kq->eop_gpu_addr = kq->eop_mem->gpu_addr; 105 kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; 106 107 memset(kq->eop_kernel_addr, 0, PAGE_SIZE); 108 } 109 110 retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), 111 &kq->rptr_mem); 112 113 if (retval != 0) 114 goto err_rptr_allocate_vidmem; 115 116 kq->rptr_kernel = kq->rptr_mem->cpu_ptr; 117 kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; 118 119 retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, 120 &kq->wptr_mem); 121 122 if (retval != 0) 123 goto err_wptr_allocate_vidmem; 124 125 kq->wptr_kernel = kq->wptr_mem->cpu_ptr; 126 kq->wptr_gpu_addr = kq->wptr_mem->gpu_addr; 127 128 memset(kq->pq_kernel_addr, 0, queue_size); 129 memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel)); 130 memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel)); 131 132 prop.queue_size = queue_size; 133 prop.is_interop = false; 134 prop.priority = 1; 135 prop.queue_percent = 100; 136 prop.type = type; 137 prop.vmid = 0; 138 prop.queue_address = kq->pq_gpu_addr; 139 prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr; 140 prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; 141 prop.eop_ring_buffer_address = kq->eop_gpu_addr; 142 prop.eop_ring_buffer_size = PAGE_SIZE; 143 prop.cu_mask = NULL; 144 145 if (init_queue(&kq->queue, &prop) != 0) 146 goto err_init_queue; 147 148 kq->queue->device = dev; 149 kq->queue->process = kfd_get_process(current); 150 151 kq->queue->mqd_mem_obj = kq->mqd_mgr->allocate_mqd(kq->mqd_mgr->dev, 152 &kq->queue->properties); 153 if (!kq->queue->mqd_mem_obj) 154 goto err_allocate_mqd; 155 kq->mqd_mgr->init_mqd(kq->mqd_mgr, &kq->queue->mqd, 156 kq->queue->mqd_mem_obj, 157 &kq->queue->gart_mqd_addr, 158 &kq->queue->properties); 159 /* assign HIQ to HQD */ 160 if (type == KFD_QUEUE_TYPE_HIQ) { 161 pr_debug("Assigning hiq to hqd\n"); 162 kq->queue->pipe = KFD_CIK_HIQ_PIPE; 163 kq->queue->queue = KFD_CIK_HIQ_QUEUE; 164 kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd, 165 kq->queue->pipe, kq->queue->queue, 166 &kq->queue->properties, NULL); 167 } else { 168 /* allocate fence for DIQ */ 169 170 retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t), 171 &kq->fence_mem_obj); 172 173 if (retval != 0) 174 goto err_alloc_fence; 175 176 kq->fence_kernel_address = kq->fence_mem_obj->cpu_ptr; 177 kq->fence_gpu_addr = kq->fence_mem_obj->gpu_addr; 178 } 179 180 print_queue(kq->queue); 181 182 return true; 183 err_alloc_fence: 184 kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd, kq->queue->mqd_mem_obj); 185 err_allocate_mqd: 186 uninit_queue(kq->queue); 187 err_init_queue: 188 kfd_gtt_sa_free(dev, kq->wptr_mem); 189 err_wptr_allocate_vidmem: 190 kfd_gtt_sa_free(dev, kq->rptr_mem); 191 err_rptr_allocate_vidmem: 192 kfd_gtt_sa_free(dev, kq->eop_mem); 193 err_eop_allocate_vidmem: 194 kfd_gtt_sa_free(dev, kq->pq); 195 err_pq_allocate_vidmem: 196 kfd_release_kernel_doorbell(dev, prop.doorbell_ptr); 197 err_get_kernel_doorbell: 198 return false; 199 200 } 201 202 /* Uninitialize a kernel queue and free all its memory usages. */ 203 static void kq_uninitialize(struct kernel_queue *kq, bool hanging) 204 { 205 if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging) 206 kq->mqd_mgr->destroy_mqd(kq->mqd_mgr, 207 kq->queue->mqd, 208 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 209 KFD_UNMAP_LATENCY_MS, 210 kq->queue->pipe, 211 kq->queue->queue); 212 else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) 213 kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj); 214 215 kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd, 216 kq->queue->mqd_mem_obj); 217 218 kfd_gtt_sa_free(kq->dev, kq->rptr_mem); 219 kfd_gtt_sa_free(kq->dev, kq->wptr_mem); 220 221 /* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free() 222 * is able to handle NULL properly. 223 */ 224 kfd_gtt_sa_free(kq->dev, kq->eop_mem); 225 226 kfd_gtt_sa_free(kq->dev, kq->pq); 227 kfd_release_kernel_doorbell(kq->dev, 228 kq->queue->properties.doorbell_ptr); 229 uninit_queue(kq->queue); 230 } 231 232 int kq_acquire_packet_buffer(struct kernel_queue *kq, 233 size_t packet_size_in_dwords, unsigned int **buffer_ptr) 234 { 235 size_t available_size; 236 size_t queue_size_dwords; 237 uint32_t wptr, rptr; 238 uint64_t wptr64; 239 unsigned int *queue_address; 240 241 /* When rptr == wptr, the buffer is empty. 242 * When rptr == wptr + 1, the buffer is full. 243 * It is always rptr that advances to the position of wptr, rather than 244 * the opposite. So we can only use up to queue_size_dwords - 1 dwords. 245 */ 246 rptr = *kq->rptr_kernel; 247 wptr = kq->pending_wptr; 248 wptr64 = kq->pending_wptr64; 249 queue_address = (unsigned int *)kq->pq_kernel_addr; 250 queue_size_dwords = kq->queue->properties.queue_size / 4; 251 252 pr_debug("rptr: %d\n", rptr); 253 pr_debug("wptr: %d\n", wptr); 254 pr_debug("queue_address 0x%p\n", queue_address); 255 256 available_size = (rptr + queue_size_dwords - 1 - wptr) % 257 queue_size_dwords; 258 259 if (packet_size_in_dwords > available_size) { 260 /* 261 * make sure calling functions know 262 * acquire_packet_buffer() failed 263 */ 264 goto err_no_space; 265 } 266 267 if (wptr + packet_size_in_dwords >= queue_size_dwords) { 268 /* make sure after rolling back to position 0, there is 269 * still enough space. 270 */ 271 if (packet_size_in_dwords >= rptr) 272 goto err_no_space; 273 274 /* fill nops, roll back and start at position 0 */ 275 while (wptr > 0) { 276 queue_address[wptr] = kq->nop_packet; 277 wptr = (wptr + 1) % queue_size_dwords; 278 wptr64++; 279 } 280 } 281 282 *buffer_ptr = &queue_address[wptr]; 283 kq->pending_wptr = wptr + packet_size_in_dwords; 284 kq->pending_wptr64 = wptr64 + packet_size_in_dwords; 285 286 return 0; 287 288 err_no_space: 289 *buffer_ptr = NULL; 290 return -ENOMEM; 291 } 292 293 void kq_submit_packet(struct kernel_queue *kq) 294 { 295 #ifdef DEBUG 296 int i; 297 298 for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) { 299 pr_debug("0x%2X ", kq->pq_kernel_addr[i]); 300 if (i % 15 == 0) 301 pr_debug("\n"); 302 } 303 pr_debug("\n"); 304 #endif 305 if (kq->dev->device_info->doorbell_size == 8) { 306 *kq->wptr64_kernel = kq->pending_wptr64; 307 write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, 308 kq->pending_wptr64); 309 } else { 310 *kq->wptr_kernel = kq->pending_wptr; 311 write_kernel_doorbell(kq->queue->properties.doorbell_ptr, 312 kq->pending_wptr); 313 } 314 } 315 316 void kq_rollback_packet(struct kernel_queue *kq) 317 { 318 if (kq->dev->device_info->doorbell_size == 8) { 319 kq->pending_wptr64 = *kq->wptr64_kernel; 320 kq->pending_wptr = *kq->wptr_kernel % 321 (kq->queue->properties.queue_size / 4); 322 } else { 323 kq->pending_wptr = *kq->wptr_kernel; 324 } 325 } 326 327 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, 328 enum kfd_queue_type type) 329 { 330 struct kernel_queue *kq; 331 332 kq = kzalloc(sizeof(*kq), GFP_KERNEL); 333 if (!kq) 334 return NULL; 335 336 if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) 337 return kq; 338 339 pr_err("Failed to init kernel queue\n"); 340 341 kfree(kq); 342 return NULL; 343 } 344 345 void kernel_queue_uninit(struct kernel_queue *kq, bool hanging) 346 { 347 kq_uninitialize(kq, hanging); 348 kfree(kq); 349 } 350 351 /* FIXME: Can this test be removed? */ 352 static __attribute__((unused)) void test_kq(struct kfd_dev *dev) 353 { 354 struct kernel_queue *kq; 355 uint32_t *buffer, i; 356 int retval; 357 358 pr_err("Starting kernel queue test\n"); 359 360 kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); 361 if (unlikely(!kq)) { 362 pr_err(" Failed to initialize HIQ\n"); 363 pr_err("Kernel queue test failed\n"); 364 return; 365 } 366 367 retval = kq_acquire_packet_buffer(kq, 5, &buffer); 368 if (unlikely(retval != 0)) { 369 pr_err(" Failed to acquire packet buffer\n"); 370 pr_err("Kernel queue test failed\n"); 371 return; 372 } 373 for (i = 0; i < 5; i++) 374 buffer[i] = kq->nop_packet; 375 kq_submit_packet(kq); 376 377 pr_err("Ending kernel queue test\n"); 378 } 379 380 381