1 1.1 riastrad /* $NetBSD: kfd_process.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2014 Advanced Micro Devices, Inc. 5 1.1 riastrad * 6 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 7 1.1 riastrad * copy of this software and associated documentation files (the "Software"), 8 1.1 riastrad * to deal in the Software without restriction, including without limitation 9 1.1 riastrad * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 1.1 riastrad * and/or sell copies of the Software, and to permit persons to whom the 11 1.1 riastrad * Software is furnished to do so, subject to the following conditions: 12 1.1 riastrad * 13 1.1 riastrad * The above copyright notice and this permission notice shall be included in 14 1.1 riastrad * all copies or substantial portions of the Software. 15 1.1 riastrad * 16 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 1.1 riastrad * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 1.1 riastrad * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 1.1 riastrad * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 1.1 riastrad * OTHER DEALINGS IN THE SOFTWARE. 23 1.1 riastrad */ 24 1.1 riastrad 25 1.1 riastrad #include <sys/cdefs.h> 26 1.1 riastrad __KERNEL_RCSID(0, "$NetBSD: kfd_process.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $"); 27 1.1 riastrad 28 1.1 riastrad #include <linux/mutex.h> 29 1.1 riastrad #include <linux/log2.h> 30 1.1 riastrad #include <linux/sched.h> 31 1.3 riastrad #include <linux/sched/mm.h> 32 1.3 riastrad #include <linux/sched/task.h> 33 1.1 riastrad #include <linux/slab.h> 34 1.1 riastrad #include <linux/amd-iommu.h> 35 1.1 riastrad #include <linux/notifier.h> 36 1.1 riastrad #include <linux/compat.h> 37 1.3 riastrad #include <linux/mman.h> 38 1.3 riastrad #include <linux/file.h> 39 1.3 riastrad #include "amdgpu_amdkfd.h" 40 1.3 riastrad #include "amdgpu.h" 41 1.1 riastrad 42 1.1 riastrad struct mm_struct; 43 1.1 riastrad 44 1.1 riastrad #include "kfd_priv.h" 45 1.3 riastrad #include "kfd_device_queue_manager.h" 46 1.1 riastrad #include "kfd_dbgmgr.h" 47 1.3 riastrad #include "kfd_iommu.h" 48 1.1 riastrad 49 1.1 riastrad /* 50 1.1 riastrad * List of struct kfd_process (field kfd_process). 51 1.1 riastrad * Unique/indexed by mm_struct* 52 1.1 riastrad */ 53 1.3 riastrad DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 54 1.1 riastrad static DEFINE_MUTEX(kfd_processes_mutex); 55 1.1 riastrad 56 1.3 riastrad DEFINE_SRCU(kfd_processes_srcu); 57 1.1 riastrad 58 1.3 riastrad /* For process termination handling */ 59 1.1 riastrad static struct workqueue_struct *kfd_process_wq; 60 1.1 riastrad 61 1.3 riastrad /* Ordered, single-threaded workqueue for restoring evicted 62 1.3 riastrad * processes. Restoring multiple processes concurrently under memory 63 1.3 riastrad * pressure can lead to processes blocking each other from validating 64 1.3 riastrad * their BOs and result in a live-lock situation where processes 65 1.3 riastrad * remain evicted indefinitely. 66 1.3 riastrad */ 67 1.3 riastrad static struct workqueue_struct *kfd_restore_wq; 68 1.1 riastrad 69 1.1 riastrad static struct kfd_process *find_process(const struct task_struct *thread); 70 1.3 riastrad static void kfd_process_ref_release(struct kref *ref); 71 1.1 riastrad static struct kfd_process *create_process(const struct task_struct *thread); 72 1.3 riastrad static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep); 73 1.3 riastrad 74 1.3 riastrad static void evict_process_worker(struct work_struct *work); 75 1.3 riastrad static void restore_process_worker(struct work_struct *work); 76 1.3 riastrad 77 1.3 riastrad struct kfd_procfs_tree { 78 1.3 riastrad struct kobject *kobj; 79 1.3 riastrad }; 80 1.3 riastrad 81 1.3 riastrad static struct kfd_procfs_tree procfs; 82 1.3 riastrad 83 1.3 riastrad static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, 84 1.3 riastrad char *buffer) 85 1.3 riastrad { 86 1.3 riastrad int val = 0; 87 1.3 riastrad 88 1.3 riastrad if (strcmp(attr->name, "pasid") == 0) { 89 1.3 riastrad struct kfd_process *p = container_of(attr, struct kfd_process, 90 1.3 riastrad attr_pasid); 91 1.3 riastrad val = p->pasid; 92 1.3 riastrad } else { 93 1.3 riastrad pr_err("Invalid attribute"); 94 1.3 riastrad return -EINVAL; 95 1.3 riastrad } 96 1.3 riastrad 97 1.3 riastrad return snprintf(buffer, PAGE_SIZE, "%d\n", val); 98 1.3 riastrad } 99 1.3 riastrad 100 1.3 riastrad static void kfd_procfs_kobj_release(struct kobject *kobj) 101 1.3 riastrad { 102 1.3 riastrad kfree(kobj); 103 1.3 riastrad } 104 1.3 riastrad 105 1.3 riastrad static const struct sysfs_ops kfd_procfs_ops = { 106 1.3 riastrad .show = kfd_procfs_show, 107 1.3 riastrad }; 108 1.3 riastrad 109 1.3 riastrad static struct kobj_type procfs_type = { 110 1.3 riastrad .release = kfd_procfs_kobj_release, 111 1.3 riastrad .sysfs_ops = &kfd_procfs_ops, 112 1.3 riastrad }; 113 1.1 riastrad 114 1.3 riastrad void kfd_procfs_init(void) 115 1.3 riastrad { 116 1.3 riastrad int ret = 0; 117 1.3 riastrad 118 1.3 riastrad procfs.kobj = kfd_alloc_struct(procfs.kobj); 119 1.3 riastrad if (!procfs.kobj) 120 1.3 riastrad return; 121 1.3 riastrad 122 1.3 riastrad ret = kobject_init_and_add(procfs.kobj, &procfs_type, 123 1.3 riastrad &kfd_device->kobj, "proc"); 124 1.3 riastrad if (ret) { 125 1.3 riastrad pr_warn("Could not create procfs proc folder"); 126 1.3 riastrad /* If we fail to create the procfs, clean up */ 127 1.3 riastrad kfd_procfs_shutdown(); 128 1.3 riastrad } 129 1.3 riastrad } 130 1.3 riastrad 131 1.3 riastrad void kfd_procfs_shutdown(void) 132 1.3 riastrad { 133 1.3 riastrad if (procfs.kobj) { 134 1.3 riastrad kobject_del(procfs.kobj); 135 1.3 riastrad kobject_put(procfs.kobj); 136 1.3 riastrad procfs.kobj = NULL; 137 1.3 riastrad } 138 1.3 riastrad } 139 1.3 riastrad 140 1.3 riastrad int kfd_process_create_wq(void) 141 1.1 riastrad { 142 1.1 riastrad if (!kfd_process_wq) 143 1.3 riastrad kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); 144 1.3 riastrad if (!kfd_restore_wq) 145 1.3 riastrad kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); 146 1.3 riastrad 147 1.3 riastrad if (!kfd_process_wq || !kfd_restore_wq) { 148 1.3 riastrad kfd_process_destroy_wq(); 149 1.3 riastrad return -ENOMEM; 150 1.3 riastrad } 151 1.3 riastrad 152 1.3 riastrad return 0; 153 1.1 riastrad } 154 1.1 riastrad 155 1.1 riastrad void kfd_process_destroy_wq(void) 156 1.1 riastrad { 157 1.1 riastrad if (kfd_process_wq) { 158 1.1 riastrad destroy_workqueue(kfd_process_wq); 159 1.1 riastrad kfd_process_wq = NULL; 160 1.1 riastrad } 161 1.3 riastrad if (kfd_restore_wq) { 162 1.3 riastrad destroy_workqueue(kfd_restore_wq); 163 1.3 riastrad kfd_restore_wq = NULL; 164 1.3 riastrad } 165 1.3 riastrad } 166 1.3 riastrad 167 1.3 riastrad static void kfd_process_free_gpuvm(struct kgd_mem *mem, 168 1.3 riastrad struct kfd_process_device *pdd) 169 1.3 riastrad { 170 1.3 riastrad struct kfd_dev *dev = pdd->dev; 171 1.3 riastrad 172 1.3 riastrad amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); 173 1.3 riastrad amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem); 174 1.1 riastrad } 175 1.1 riastrad 176 1.3 riastrad /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process 177 1.3 riastrad * This function should be only called right after the process 178 1.3 riastrad * is created and when kfd_processes_mutex is still being held 179 1.3 riastrad * to avoid concurrency. Because of that exclusiveness, we do 180 1.3 riastrad * not need to take p->mutex. 181 1.3 riastrad */ 182 1.3 riastrad static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, 183 1.3 riastrad uint64_t gpu_va, uint32_t size, 184 1.3 riastrad uint32_t flags, void **kptr) 185 1.3 riastrad { 186 1.3 riastrad struct kfd_dev *kdev = pdd->dev; 187 1.3 riastrad struct kgd_mem *mem = NULL; 188 1.3 riastrad int handle; 189 1.3 riastrad int err; 190 1.3 riastrad 191 1.3 riastrad err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, 192 1.3 riastrad pdd->vm, &mem, NULL, flags); 193 1.3 riastrad if (err) 194 1.3 riastrad goto err_alloc_mem; 195 1.3 riastrad 196 1.3 riastrad err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm); 197 1.3 riastrad if (err) 198 1.3 riastrad goto err_map_mem; 199 1.3 riastrad 200 1.3 riastrad err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true); 201 1.3 riastrad if (err) { 202 1.3 riastrad pr_debug("Sync memory failed, wait interrupted by user signal\n"); 203 1.3 riastrad goto sync_memory_failed; 204 1.3 riastrad } 205 1.3 riastrad 206 1.3 riastrad /* Create an obj handle so kfd_process_device_remove_obj_handle 207 1.3 riastrad * will take care of the bo removal when the process finishes. 208 1.3 riastrad * We do not need to take p->mutex, because the process is just 209 1.3 riastrad * created and the ioctls have not had the chance to run. 210 1.3 riastrad */ 211 1.3 riastrad handle = kfd_process_device_create_obj_handle(pdd, mem); 212 1.3 riastrad 213 1.3 riastrad if (handle < 0) { 214 1.3 riastrad err = handle; 215 1.3 riastrad goto free_gpuvm; 216 1.3 riastrad } 217 1.3 riastrad 218 1.3 riastrad if (kptr) { 219 1.3 riastrad err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd, 220 1.3 riastrad (struct kgd_mem *)mem, kptr, NULL); 221 1.3 riastrad if (err) { 222 1.3 riastrad pr_debug("Map GTT BO to kernel failed\n"); 223 1.3 riastrad goto free_obj_handle; 224 1.3 riastrad } 225 1.3 riastrad } 226 1.3 riastrad 227 1.3 riastrad return err; 228 1.3 riastrad 229 1.3 riastrad free_obj_handle: 230 1.3 riastrad kfd_process_device_remove_obj_handle(pdd, handle); 231 1.3 riastrad free_gpuvm: 232 1.3 riastrad sync_memory_failed: 233 1.3 riastrad kfd_process_free_gpuvm(mem, pdd); 234 1.3 riastrad return err; 235 1.3 riastrad 236 1.3 riastrad err_map_mem: 237 1.3 riastrad amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem); 238 1.3 riastrad err_alloc_mem: 239 1.3 riastrad *kptr = NULL; 240 1.3 riastrad return err; 241 1.3 riastrad } 242 1.3 riastrad 243 1.3 riastrad /* kfd_process_device_reserve_ib_mem - Reserve memory inside the 244 1.3 riastrad * process for IB usage The memory reserved is for KFD to submit 245 1.3 riastrad * IB to AMDGPU from kernel. If the memory is reserved 246 1.3 riastrad * successfully, ib_kaddr will have the CPU/kernel 247 1.3 riastrad * address. Check ib_kaddr before accessing the memory. 248 1.3 riastrad */ 249 1.3 riastrad static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) 250 1.3 riastrad { 251 1.3 riastrad struct qcm_process_device *qpd = &pdd->qpd; 252 1.3 riastrad uint32_t flags = ALLOC_MEM_FLAGS_GTT | 253 1.3 riastrad ALLOC_MEM_FLAGS_NO_SUBSTITUTE | 254 1.3 riastrad ALLOC_MEM_FLAGS_WRITABLE | 255 1.3 riastrad ALLOC_MEM_FLAGS_EXECUTABLE; 256 1.3 riastrad void *kaddr; 257 1.3 riastrad int ret; 258 1.3 riastrad 259 1.3 riastrad if (qpd->ib_kaddr || !qpd->ib_base) 260 1.3 riastrad return 0; 261 1.3 riastrad 262 1.3 riastrad /* ib_base is only set for dGPU */ 263 1.3 riastrad ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, 264 1.3 riastrad &kaddr); 265 1.3 riastrad if (ret) 266 1.3 riastrad return ret; 267 1.3 riastrad 268 1.3 riastrad qpd->ib_kaddr = kaddr; 269 1.3 riastrad 270 1.3 riastrad return 0; 271 1.3 riastrad } 272 1.3 riastrad 273 1.3 riastrad struct kfd_process *kfd_create_process(struct file *filep) 274 1.1 riastrad { 275 1.1 riastrad struct kfd_process *process; 276 1.3 riastrad struct task_struct *thread = current; 277 1.3 riastrad int ret; 278 1.1 riastrad 279 1.3 riastrad if (!thread->mm) 280 1.1 riastrad return ERR_PTR(-EINVAL); 281 1.1 riastrad 282 1.1 riastrad /* Only the pthreads threading model is supported. */ 283 1.1 riastrad if (thread->group_leader->mm != thread->mm) 284 1.1 riastrad return ERR_PTR(-EINVAL); 285 1.1 riastrad 286 1.1 riastrad /* 287 1.1 riastrad * take kfd processes mutex before starting of process creation 288 1.1 riastrad * so there won't be a case where two threads of the same process 289 1.1 riastrad * create two kfd_process structures 290 1.1 riastrad */ 291 1.1 riastrad mutex_lock(&kfd_processes_mutex); 292 1.1 riastrad 293 1.1 riastrad /* A prior open of /dev/kfd could have already created the process. */ 294 1.1 riastrad process = find_process(thread); 295 1.3 riastrad if (process) { 296 1.3 riastrad pr_debug("Process already found\n"); 297 1.3 riastrad } else { 298 1.3 riastrad process = create_process(thread); 299 1.3 riastrad if (IS_ERR(process)) 300 1.3 riastrad goto out; 301 1.3 riastrad 302 1.3 riastrad ret = kfd_process_init_cwsr_apu(process, filep); 303 1.3 riastrad if (ret) { 304 1.3 riastrad process = ERR_PTR(ret); 305 1.3 riastrad goto out; 306 1.3 riastrad } 307 1.1 riastrad 308 1.3 riastrad if (!procfs.kobj) 309 1.3 riastrad goto out; 310 1.3 riastrad 311 1.3 riastrad process->kobj = kfd_alloc_struct(process->kobj); 312 1.3 riastrad if (!process->kobj) { 313 1.3 riastrad pr_warn("Creating procfs kobject failed"); 314 1.3 riastrad goto out; 315 1.3 riastrad } 316 1.3 riastrad ret = kobject_init_and_add(process->kobj, &procfs_type, 317 1.3 riastrad procfs.kobj, "%d", 318 1.3 riastrad (int)process->lead_thread->pid); 319 1.3 riastrad if (ret) { 320 1.3 riastrad pr_warn("Creating procfs pid directory failed"); 321 1.3 riastrad goto out; 322 1.3 riastrad } 323 1.1 riastrad 324 1.3 riastrad process->attr_pasid.name = "pasid"; 325 1.3 riastrad process->attr_pasid.mode = KFD_SYSFS_FILE_MODE; 326 1.3 riastrad sysfs_attr_init(&process->attr_pasid); 327 1.3 riastrad ret = sysfs_create_file(process->kobj, &process->attr_pasid); 328 1.3 riastrad if (ret) 329 1.3 riastrad pr_warn("Creating pasid for pid %d failed", 330 1.3 riastrad (int)process->lead_thread->pid); 331 1.3 riastrad } 332 1.3 riastrad out: 333 1.3 riastrad if (!IS_ERR(process)) 334 1.3 riastrad kref_get(&process->ref); 335 1.1 riastrad mutex_unlock(&kfd_processes_mutex); 336 1.1 riastrad 337 1.1 riastrad return process; 338 1.1 riastrad } 339 1.1 riastrad 340 1.1 riastrad struct kfd_process *kfd_get_process(const struct task_struct *thread) 341 1.1 riastrad { 342 1.1 riastrad struct kfd_process *process; 343 1.1 riastrad 344 1.3 riastrad if (!thread->mm) 345 1.1 riastrad return ERR_PTR(-EINVAL); 346 1.1 riastrad 347 1.1 riastrad /* Only the pthreads threading model is supported. */ 348 1.1 riastrad if (thread->group_leader->mm != thread->mm) 349 1.1 riastrad return ERR_PTR(-EINVAL); 350 1.1 riastrad 351 1.1 riastrad process = find_process(thread); 352 1.3 riastrad if (!process) 353 1.3 riastrad return ERR_PTR(-EINVAL); 354 1.1 riastrad 355 1.1 riastrad return process; 356 1.1 riastrad } 357 1.1 riastrad 358 1.1 riastrad static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) 359 1.1 riastrad { 360 1.1 riastrad struct kfd_process *process; 361 1.1 riastrad 362 1.1 riastrad hash_for_each_possible_rcu(kfd_processes_table, process, 363 1.1 riastrad kfd_processes, (uintptr_t)mm) 364 1.1 riastrad if (process->mm == mm) 365 1.1 riastrad return process; 366 1.1 riastrad 367 1.1 riastrad return NULL; 368 1.1 riastrad } 369 1.1 riastrad 370 1.1 riastrad static struct kfd_process *find_process(const struct task_struct *thread) 371 1.1 riastrad { 372 1.1 riastrad struct kfd_process *p; 373 1.1 riastrad int idx; 374 1.1 riastrad 375 1.1 riastrad idx = srcu_read_lock(&kfd_processes_srcu); 376 1.1 riastrad p = find_process_by_mm(thread->mm); 377 1.1 riastrad srcu_read_unlock(&kfd_processes_srcu, idx); 378 1.1 riastrad 379 1.1 riastrad return p; 380 1.1 riastrad } 381 1.1 riastrad 382 1.3 riastrad void kfd_unref_process(struct kfd_process *p) 383 1.3 riastrad { 384 1.3 riastrad kref_put(&p->ref, kfd_process_ref_release); 385 1.3 riastrad } 386 1.3 riastrad 387 1.3 riastrad static void kfd_process_device_free_bos(struct kfd_process_device *pdd) 388 1.1 riastrad { 389 1.3 riastrad struct kfd_process *p = pdd->process; 390 1.3 riastrad void *mem; 391 1.3 riastrad int id; 392 1.3 riastrad 393 1.3 riastrad /* 394 1.3 riastrad * Remove all handles from idr and release appropriate 395 1.3 riastrad * local memory object 396 1.3 riastrad */ 397 1.3 riastrad idr_for_each_entry(&pdd->alloc_idr, mem, id) { 398 1.3 riastrad struct kfd_process_device *peer_pdd; 399 1.3 riastrad 400 1.3 riastrad list_for_each_entry(peer_pdd, &p->per_device_data, 401 1.3 riastrad per_device_list) { 402 1.3 riastrad if (!peer_pdd->vm) 403 1.3 riastrad continue; 404 1.3 riastrad amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 405 1.3 riastrad peer_pdd->dev->kgd, mem, peer_pdd->vm); 406 1.3 riastrad } 407 1.1 riastrad 408 1.3 riastrad amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem); 409 1.3 riastrad kfd_process_device_remove_obj_handle(pdd, id); 410 1.3 riastrad } 411 1.3 riastrad } 412 1.1 riastrad 413 1.3 riastrad static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) 414 1.3 riastrad { 415 1.3 riastrad struct kfd_process_device *pdd; 416 1.1 riastrad 417 1.3 riastrad list_for_each_entry(pdd, &p->per_device_data, per_device_list) 418 1.3 riastrad kfd_process_device_free_bos(pdd); 419 1.3 riastrad } 420 1.1 riastrad 421 1.3 riastrad static void kfd_process_destroy_pdds(struct kfd_process *p) 422 1.3 riastrad { 423 1.3 riastrad struct kfd_process_device *pdd, *temp; 424 1.1 riastrad 425 1.1 riastrad list_for_each_entry_safe(pdd, temp, &p->per_device_data, 426 1.3 riastrad per_device_list) { 427 1.3 riastrad pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n", 428 1.1 riastrad pdd->dev->id, p->pasid); 429 1.1 riastrad 430 1.3 riastrad if (pdd->drm_file) { 431 1.3 riastrad amdgpu_amdkfd_gpuvm_release_process_vm( 432 1.3 riastrad pdd->dev->kgd, pdd->vm); 433 1.3 riastrad fput(pdd->drm_file); 434 1.3 riastrad } 435 1.3 riastrad else if (pdd->vm) 436 1.3 riastrad amdgpu_amdkfd_gpuvm_destroy_process_vm( 437 1.3 riastrad pdd->dev->kgd, pdd->vm); 438 1.1 riastrad 439 1.1 riastrad list_del(&pdd->per_device_list); 440 1.1 riastrad 441 1.3 riastrad if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) 442 1.3 riastrad free_pages((unsigned long)pdd->qpd.cwsr_kaddr, 443 1.3 riastrad get_order(KFD_CWSR_TBA_TMA_SIZE)); 444 1.3 riastrad 445 1.3 riastrad kfree(pdd->qpd.doorbell_bitmap); 446 1.3 riastrad idr_destroy(&pdd->alloc_idr); 447 1.3 riastrad 448 1.1 riastrad kfree(pdd); 449 1.1 riastrad } 450 1.3 riastrad } 451 1.3 riastrad 452 1.3 riastrad /* No process locking is needed in this function, because the process 453 1.3 riastrad * is not findable any more. We must assume that no other thread is 454 1.3 riastrad * using it any more, otherwise we couldn't safely free the process 455 1.3 riastrad * structure in the end. 456 1.3 riastrad */ 457 1.3 riastrad static void kfd_process_wq_release(struct work_struct *work) 458 1.3 riastrad { 459 1.3 riastrad struct kfd_process *p = container_of(work, struct kfd_process, 460 1.3 riastrad release_work); 461 1.3 riastrad 462 1.3 riastrad /* Remove the procfs files */ 463 1.3 riastrad if (p->kobj) { 464 1.3 riastrad sysfs_remove_file(p->kobj, &p->attr_pasid); 465 1.3 riastrad kobject_del(p->kobj); 466 1.3 riastrad kobject_put(p->kobj); 467 1.3 riastrad p->kobj = NULL; 468 1.3 riastrad } 469 1.3 riastrad 470 1.3 riastrad kfd_iommu_unbind_process(p); 471 1.3 riastrad 472 1.3 riastrad kfd_process_free_outstanding_kfd_bos(p); 473 1.3 riastrad 474 1.3 riastrad kfd_process_destroy_pdds(p); 475 1.3 riastrad dma_fence_put(p->ef); 476 1.1 riastrad 477 1.1 riastrad kfd_event_free_process(p); 478 1.1 riastrad 479 1.1 riastrad kfd_pasid_free(p->pasid); 480 1.3 riastrad kfd_free_process_doorbells(p); 481 1.1 riastrad 482 1.1 riastrad mutex_destroy(&p->mutex); 483 1.1 riastrad 484 1.3 riastrad put_task_struct(p->lead_thread); 485 1.1 riastrad 486 1.1 riastrad kfree(p); 487 1.1 riastrad } 488 1.1 riastrad 489 1.3 riastrad static void kfd_process_ref_release(struct kref *ref) 490 1.1 riastrad { 491 1.3 riastrad struct kfd_process *p = container_of(ref, struct kfd_process, ref); 492 1.1 riastrad 493 1.3 riastrad INIT_WORK(&p->release_work, kfd_process_wq_release); 494 1.3 riastrad queue_work(kfd_process_wq, &p->release_work); 495 1.3 riastrad } 496 1.1 riastrad 497 1.3 riastrad static void kfd_process_free_notifier(struct mmu_notifier *mn) 498 1.3 riastrad { 499 1.3 riastrad kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); 500 1.1 riastrad } 501 1.1 riastrad 502 1.1 riastrad static void kfd_process_notifier_release(struct mmu_notifier *mn, 503 1.1 riastrad struct mm_struct *mm) 504 1.1 riastrad { 505 1.1 riastrad struct kfd_process *p; 506 1.1 riastrad struct kfd_process_device *pdd = NULL; 507 1.1 riastrad 508 1.1 riastrad /* 509 1.1 riastrad * The kfd_process structure can not be free because the 510 1.1 riastrad * mmu_notifier srcu is read locked 511 1.1 riastrad */ 512 1.1 riastrad p = container_of(mn, struct kfd_process, mmu_notifier); 513 1.3 riastrad if (WARN_ON(p->mm != mm)) 514 1.3 riastrad return; 515 1.1 riastrad 516 1.1 riastrad mutex_lock(&kfd_processes_mutex); 517 1.1 riastrad hash_del_rcu(&p->kfd_processes); 518 1.1 riastrad mutex_unlock(&kfd_processes_mutex); 519 1.1 riastrad synchronize_srcu(&kfd_processes_srcu); 520 1.1 riastrad 521 1.3 riastrad cancel_delayed_work_sync(&p->eviction_work); 522 1.3 riastrad cancel_delayed_work_sync(&p->restore_work); 523 1.3 riastrad 524 1.1 riastrad mutex_lock(&p->mutex); 525 1.1 riastrad 526 1.3 riastrad /* Iterate over all process device data structures and if the 527 1.3 riastrad * pdd is in debug mode, we should first force unregistration, 528 1.3 riastrad * then we will be able to destroy the queues 529 1.1 riastrad */ 530 1.1 riastrad list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 531 1.3 riastrad struct kfd_dev *dev = pdd->dev; 532 1.3 riastrad 533 1.3 riastrad mutex_lock(kfd_get_dbgmgr_mutex()); 534 1.3 riastrad if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { 535 1.3 riastrad if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { 536 1.3 riastrad kfd_dbgmgr_destroy(dev->dbgmgr); 537 1.3 riastrad dev->dbgmgr = NULL; 538 1.3 riastrad } 539 1.1 riastrad } 540 1.3 riastrad mutex_unlock(kfd_get_dbgmgr_mutex()); 541 1.1 riastrad } 542 1.1 riastrad 543 1.3 riastrad kfd_process_dequeue_from_all_devices(p); 544 1.3 riastrad pqm_uninit(&p->pqm); 545 1.3 riastrad 546 1.3 riastrad /* Indicate to other users that MM is no longer valid */ 547 1.3 riastrad p->mm = NULL; 548 1.3 riastrad 549 1.1 riastrad mutex_unlock(&p->mutex); 550 1.1 riastrad 551 1.3 riastrad mmu_notifier_put(&p->mmu_notifier); 552 1.1 riastrad } 553 1.1 riastrad 554 1.1 riastrad static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { 555 1.1 riastrad .release = kfd_process_notifier_release, 556 1.3 riastrad .free_notifier = kfd_process_free_notifier, 557 1.1 riastrad }; 558 1.1 riastrad 559 1.3 riastrad static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) 560 1.3 riastrad { 561 1.3 riastrad unsigned long offset; 562 1.3 riastrad struct kfd_process_device *pdd; 563 1.3 riastrad 564 1.3 riastrad list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 565 1.3 riastrad struct kfd_dev *dev = pdd->dev; 566 1.3 riastrad struct qcm_process_device *qpd = &pdd->qpd; 567 1.3 riastrad 568 1.3 riastrad if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) 569 1.3 riastrad continue; 570 1.3 riastrad 571 1.3 riastrad offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id); 572 1.3 riastrad qpd->tba_addr = (int64_t)vm_mmap(filep, 0, 573 1.3 riastrad KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, 574 1.3 riastrad MAP_SHARED, offset); 575 1.3 riastrad 576 1.3 riastrad if (IS_ERR_VALUE(qpd->tba_addr)) { 577 1.3 riastrad int err = qpd->tba_addr; 578 1.3 riastrad 579 1.3 riastrad pr_err("Failure to set tba address. error %d.\n", err); 580 1.3 riastrad qpd->tba_addr = 0; 581 1.3 riastrad qpd->cwsr_kaddr = NULL; 582 1.3 riastrad return err; 583 1.3 riastrad } 584 1.3 riastrad 585 1.3 riastrad memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); 586 1.3 riastrad 587 1.3 riastrad qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; 588 1.3 riastrad pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", 589 1.3 riastrad qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); 590 1.3 riastrad } 591 1.3 riastrad 592 1.3 riastrad return 0; 593 1.3 riastrad } 594 1.3 riastrad 595 1.3 riastrad static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) 596 1.3 riastrad { 597 1.3 riastrad struct kfd_dev *dev = pdd->dev; 598 1.3 riastrad struct qcm_process_device *qpd = &pdd->qpd; 599 1.3 riastrad uint32_t flags = ALLOC_MEM_FLAGS_GTT | 600 1.3 riastrad ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE; 601 1.3 riastrad void *kaddr; 602 1.3 riastrad int ret; 603 1.3 riastrad 604 1.3 riastrad if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) 605 1.3 riastrad return 0; 606 1.3 riastrad 607 1.3 riastrad /* cwsr_base is only set for dGPU */ 608 1.3 riastrad ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base, 609 1.3 riastrad KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr); 610 1.3 riastrad if (ret) 611 1.3 riastrad return ret; 612 1.3 riastrad 613 1.3 riastrad qpd->cwsr_kaddr = kaddr; 614 1.3 riastrad qpd->tba_addr = qpd->cwsr_base; 615 1.3 riastrad 616 1.3 riastrad memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); 617 1.3 riastrad 618 1.3 riastrad qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; 619 1.3 riastrad pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", 620 1.3 riastrad qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); 621 1.3 riastrad 622 1.3 riastrad return 0; 623 1.3 riastrad } 624 1.3 riastrad 625 1.3 riastrad /* 626 1.3 riastrad * On return the kfd_process is fully operational and will be freed when the 627 1.3 riastrad * mm is released 628 1.3 riastrad */ 629 1.1 riastrad static struct kfd_process *create_process(const struct task_struct *thread) 630 1.1 riastrad { 631 1.1 riastrad struct kfd_process *process; 632 1.1 riastrad int err = -ENOMEM; 633 1.1 riastrad 634 1.1 riastrad process = kzalloc(sizeof(*process), GFP_KERNEL); 635 1.1 riastrad if (!process) 636 1.1 riastrad goto err_alloc_process; 637 1.1 riastrad 638 1.3 riastrad kref_init(&process->ref); 639 1.3 riastrad mutex_init(&process->mutex); 640 1.3 riastrad process->mm = thread->mm; 641 1.3 riastrad process->lead_thread = thread->group_leader; 642 1.3 riastrad INIT_LIST_HEAD(&process->per_device_data); 643 1.3 riastrad INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); 644 1.3 riastrad INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); 645 1.3 riastrad process->last_restore_timestamp = get_jiffies_64(); 646 1.3 riastrad kfd_event_init_process(process); 647 1.3 riastrad process->is_32bit_user_mode = in_compat_syscall(); 648 1.1 riastrad 649 1.1 riastrad process->pasid = kfd_pasid_alloc(); 650 1.1 riastrad if (process->pasid == 0) 651 1.1 riastrad goto err_alloc_pasid; 652 1.1 riastrad 653 1.3 riastrad if (kfd_alloc_process_doorbells(process) < 0) 654 1.3 riastrad goto err_alloc_doorbells; 655 1.3 riastrad 656 1.3 riastrad err = pqm_init(&process->pqm, process); 657 1.3 riastrad if (err != 0) 658 1.3 riastrad goto err_process_pqm_init; 659 1.1 riastrad 660 1.3 riastrad /* init process apertures*/ 661 1.3 riastrad err = kfd_init_apertures(process); 662 1.3 riastrad if (err != 0) 663 1.3 riastrad goto err_init_apertures; 664 1.1 riastrad 665 1.3 riastrad /* Must be last, have to use release destruction after this */ 666 1.1 riastrad process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; 667 1.3 riastrad err = mmu_notifier_register(&process->mmu_notifier, process->mm); 668 1.1 riastrad if (err) 669 1.3 riastrad goto err_register_notifier; 670 1.1 riastrad 671 1.3 riastrad get_task_struct(process->lead_thread); 672 1.1 riastrad hash_add_rcu(kfd_processes_table, &process->kfd_processes, 673 1.1 riastrad (uintptr_t)process->mm); 674 1.1 riastrad 675 1.1 riastrad return process; 676 1.1 riastrad 677 1.3 riastrad err_register_notifier: 678 1.3 riastrad kfd_process_free_outstanding_kfd_bos(process); 679 1.3 riastrad kfd_process_destroy_pdds(process); 680 1.3 riastrad err_init_apertures: 681 1.1 riastrad pqm_uninit(&process->pqm); 682 1.1 riastrad err_process_pqm_init: 683 1.3 riastrad kfd_free_process_doorbells(process); 684 1.3 riastrad err_alloc_doorbells: 685 1.1 riastrad kfd_pasid_free(process->pasid); 686 1.1 riastrad err_alloc_pasid: 687 1.3 riastrad mutex_destroy(&process->mutex); 688 1.1 riastrad kfree(process); 689 1.1 riastrad err_alloc_process: 690 1.1 riastrad return ERR_PTR(err); 691 1.1 riastrad } 692 1.1 riastrad 693 1.3 riastrad static int init_doorbell_bitmap(struct qcm_process_device *qpd, 694 1.3 riastrad struct kfd_dev *dev) 695 1.3 riastrad { 696 1.3 riastrad unsigned int i; 697 1.3 riastrad int range_start = dev->shared_resources.non_cp_doorbells_start; 698 1.3 riastrad int range_end = dev->shared_resources.non_cp_doorbells_end; 699 1.3 riastrad 700 1.3 riastrad if (!KFD_IS_SOC15(dev->device_info->asic_family)) 701 1.3 riastrad return 0; 702 1.3 riastrad 703 1.3 riastrad qpd->doorbell_bitmap = 704 1.3 riastrad kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, 705 1.3 riastrad BITS_PER_BYTE), GFP_KERNEL); 706 1.3 riastrad if (!qpd->doorbell_bitmap) 707 1.3 riastrad return -ENOMEM; 708 1.3 riastrad 709 1.3 riastrad /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */ 710 1.3 riastrad pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end); 711 1.3 riastrad pr_debug("reserved doorbell 0x%03x - 0x%03x\n", 712 1.3 riastrad range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, 713 1.3 riastrad range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); 714 1.3 riastrad 715 1.3 riastrad for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { 716 1.3 riastrad if (i >= range_start && i <= range_end) { 717 1.3 riastrad set_bit(i, qpd->doorbell_bitmap); 718 1.3 riastrad set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, 719 1.3 riastrad qpd->doorbell_bitmap); 720 1.3 riastrad } 721 1.3 riastrad } 722 1.3 riastrad 723 1.3 riastrad return 0; 724 1.3 riastrad } 725 1.3 riastrad 726 1.1 riastrad struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 727 1.1 riastrad struct kfd_process *p) 728 1.1 riastrad { 729 1.1 riastrad struct kfd_process_device *pdd = NULL; 730 1.1 riastrad 731 1.1 riastrad list_for_each_entry(pdd, &p->per_device_data, per_device_list) 732 1.1 riastrad if (pdd->dev == dev) 733 1.3 riastrad return pdd; 734 1.1 riastrad 735 1.3 riastrad return NULL; 736 1.1 riastrad } 737 1.1 riastrad 738 1.1 riastrad struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 739 1.1 riastrad struct kfd_process *p) 740 1.1 riastrad { 741 1.1 riastrad struct kfd_process_device *pdd = NULL; 742 1.1 riastrad 743 1.1 riastrad pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); 744 1.3 riastrad if (!pdd) 745 1.3 riastrad return NULL; 746 1.3 riastrad 747 1.3 riastrad if (init_doorbell_bitmap(&pdd->qpd, dev)) { 748 1.3 riastrad pr_err("Failed to init doorbell for process\n"); 749 1.3 riastrad kfree(pdd); 750 1.3 riastrad return NULL; 751 1.1 riastrad } 752 1.1 riastrad 753 1.3 riastrad pdd->dev = dev; 754 1.3 riastrad INIT_LIST_HEAD(&pdd->qpd.queues_list); 755 1.3 riastrad INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); 756 1.3 riastrad pdd->qpd.dqm = dev->dqm; 757 1.3 riastrad pdd->qpd.pqm = &p->pqm; 758 1.3 riastrad pdd->qpd.evicted = 0; 759 1.3 riastrad pdd->process = p; 760 1.3 riastrad pdd->bound = PDD_UNBOUND; 761 1.3 riastrad pdd->already_dequeued = false; 762 1.3 riastrad list_add(&pdd->per_device_list, &p->per_device_data); 763 1.3 riastrad 764 1.3 riastrad /* Init idr used for memory handle translation */ 765 1.3 riastrad idr_init(&pdd->alloc_idr); 766 1.3 riastrad 767 1.1 riastrad return pdd; 768 1.1 riastrad } 769 1.1 riastrad 770 1.3 riastrad /** 771 1.3 riastrad * kfd_process_device_init_vm - Initialize a VM for a process-device 772 1.3 riastrad * 773 1.3 riastrad * @pdd: The process-device 774 1.3 riastrad * @drm_file: Optional pointer to a DRM file descriptor 775 1.3 riastrad * 776 1.3 riastrad * If @drm_file is specified, it will be used to acquire the VM from 777 1.3 riastrad * that file descriptor. If successful, the @pdd takes ownership of 778 1.3 riastrad * the file descriptor. 779 1.3 riastrad * 780 1.3 riastrad * If @drm_file is NULL, a new VM is created. 781 1.3 riastrad * 782 1.3 riastrad * Returns 0 on success, -errno on failure. 783 1.3 riastrad */ 784 1.3 riastrad int kfd_process_device_init_vm(struct kfd_process_device *pdd, 785 1.3 riastrad struct file *drm_file) 786 1.3 riastrad { 787 1.3 riastrad struct kfd_process *p; 788 1.3 riastrad struct kfd_dev *dev; 789 1.3 riastrad int ret; 790 1.3 riastrad 791 1.3 riastrad if (pdd->vm) 792 1.3 riastrad return drm_file ? -EBUSY : 0; 793 1.3 riastrad 794 1.3 riastrad p = pdd->process; 795 1.3 riastrad dev = pdd->dev; 796 1.3 riastrad 797 1.3 riastrad if (drm_file) 798 1.3 riastrad ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( 799 1.3 riastrad dev->kgd, drm_file, p->pasid, 800 1.3 riastrad &pdd->vm, &p->kgd_process_info, &p->ef); 801 1.3 riastrad else 802 1.3 riastrad ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid, 803 1.3 riastrad &pdd->vm, &p->kgd_process_info, &p->ef); 804 1.3 riastrad if (ret) { 805 1.3 riastrad pr_err("Failed to create process VM object\n"); 806 1.3 riastrad return ret; 807 1.3 riastrad } 808 1.3 riastrad 809 1.3 riastrad amdgpu_vm_set_task_info(pdd->vm); 810 1.3 riastrad 811 1.3 riastrad ret = kfd_process_device_reserve_ib_mem(pdd); 812 1.3 riastrad if (ret) 813 1.3 riastrad goto err_reserve_ib_mem; 814 1.3 riastrad ret = kfd_process_device_init_cwsr_dgpu(pdd); 815 1.3 riastrad if (ret) 816 1.3 riastrad goto err_init_cwsr; 817 1.3 riastrad 818 1.3 riastrad pdd->drm_file = drm_file; 819 1.3 riastrad 820 1.3 riastrad return 0; 821 1.3 riastrad 822 1.3 riastrad err_init_cwsr: 823 1.3 riastrad err_reserve_ib_mem: 824 1.3 riastrad kfd_process_device_free_bos(pdd); 825 1.3 riastrad if (!drm_file) 826 1.3 riastrad amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm); 827 1.3 riastrad pdd->vm = NULL; 828 1.3 riastrad 829 1.3 riastrad return ret; 830 1.3 riastrad } 831 1.3 riastrad 832 1.1 riastrad /* 833 1.1 riastrad * Direct the IOMMU to bind the process (specifically the pasid->mm) 834 1.1 riastrad * to the device. 835 1.1 riastrad * Unbinding occurs when the process dies or the device is removed. 836 1.1 riastrad * 837 1.1 riastrad * Assumes that the process lock is held. 838 1.1 riastrad */ 839 1.1 riastrad struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 840 1.1 riastrad struct kfd_process *p) 841 1.1 riastrad { 842 1.1 riastrad struct kfd_process_device *pdd; 843 1.1 riastrad int err; 844 1.1 riastrad 845 1.1 riastrad pdd = kfd_get_process_device_data(dev, p); 846 1.1 riastrad if (!pdd) { 847 1.1 riastrad pr_err("Process device data doesn't exist\n"); 848 1.1 riastrad return ERR_PTR(-ENOMEM); 849 1.1 riastrad } 850 1.1 riastrad 851 1.3 riastrad err = kfd_iommu_bind_process_to_device(pdd); 852 1.3 riastrad if (err) 853 1.3 riastrad return ERR_PTR(err); 854 1.1 riastrad 855 1.3 riastrad err = kfd_process_device_init_vm(pdd, NULL); 856 1.3 riastrad if (err) 857 1.1 riastrad return ERR_PTR(err); 858 1.1 riastrad 859 1.3 riastrad return pdd; 860 1.3 riastrad } 861 1.3 riastrad 862 1.3 riastrad struct kfd_process_device *kfd_get_first_process_device_data( 863 1.3 riastrad struct kfd_process *p) 864 1.3 riastrad { 865 1.3 riastrad return list_first_entry(&p->per_device_data, 866 1.3 riastrad struct kfd_process_device, 867 1.3 riastrad per_device_list); 868 1.3 riastrad } 869 1.3 riastrad 870 1.3 riastrad struct kfd_process_device *kfd_get_next_process_device_data( 871 1.3 riastrad struct kfd_process *p, 872 1.3 riastrad struct kfd_process_device *pdd) 873 1.3 riastrad { 874 1.3 riastrad if (list_is_last(&pdd->per_device_list, &p->per_device_data)) 875 1.3 riastrad return NULL; 876 1.3 riastrad return list_next_entry(pdd, per_device_list); 877 1.3 riastrad } 878 1.3 riastrad 879 1.3 riastrad bool kfd_has_process_device_data(struct kfd_process *p) 880 1.3 riastrad { 881 1.3 riastrad return !(list_empty(&p->per_device_data)); 882 1.3 riastrad } 883 1.3 riastrad 884 1.3 riastrad /* Create specific handle mapped to mem from process local memory idr 885 1.3 riastrad * Assumes that the process lock is held. 886 1.3 riastrad */ 887 1.3 riastrad int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, 888 1.3 riastrad void *mem) 889 1.3 riastrad { 890 1.3 riastrad return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL); 891 1.3 riastrad } 892 1.3 riastrad 893 1.3 riastrad /* Translate specific handle from process local memory idr 894 1.3 riastrad * Assumes that the process lock is held. 895 1.3 riastrad */ 896 1.3 riastrad void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, 897 1.3 riastrad int handle) 898 1.3 riastrad { 899 1.3 riastrad if (handle < 0) 900 1.3 riastrad return NULL; 901 1.3 riastrad 902 1.3 riastrad return idr_find(&pdd->alloc_idr, handle); 903 1.3 riastrad } 904 1.3 riastrad 905 1.3 riastrad /* Remove specific handle from process local memory idr 906 1.3 riastrad * Assumes that the process lock is held. 907 1.3 riastrad */ 908 1.3 riastrad void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, 909 1.3 riastrad int handle) 910 1.3 riastrad { 911 1.3 riastrad if (handle >= 0) 912 1.3 riastrad idr_remove(&pdd->alloc_idr, handle); 913 1.3 riastrad } 914 1.3 riastrad 915 1.3 riastrad /* This increments the process->ref counter. */ 916 1.3 riastrad struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) 917 1.3 riastrad { 918 1.3 riastrad struct kfd_process *p, *ret_p = NULL; 919 1.3 riastrad unsigned int temp; 920 1.3 riastrad 921 1.3 riastrad int idx = srcu_read_lock(&kfd_processes_srcu); 922 1.1 riastrad 923 1.3 riastrad hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 924 1.3 riastrad if (p->pasid == pasid) { 925 1.3 riastrad kref_get(&p->ref); 926 1.3 riastrad ret_p = p; 927 1.3 riastrad break; 928 1.3 riastrad } 929 1.3 riastrad } 930 1.3 riastrad 931 1.3 riastrad srcu_read_unlock(&kfd_processes_srcu, idx); 932 1.3 riastrad 933 1.3 riastrad return ret_p; 934 1.1 riastrad } 935 1.1 riastrad 936 1.3 riastrad /* This increments the process->ref counter. */ 937 1.3 riastrad struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) 938 1.1 riastrad { 939 1.1 riastrad struct kfd_process *p; 940 1.3 riastrad 941 1.3 riastrad int idx = srcu_read_lock(&kfd_processes_srcu); 942 1.3 riastrad 943 1.3 riastrad p = find_process_by_mm(mm); 944 1.3 riastrad if (p) 945 1.3 riastrad kref_get(&p->ref); 946 1.3 riastrad 947 1.3 riastrad srcu_read_unlock(&kfd_processes_srcu, idx); 948 1.3 riastrad 949 1.3 riastrad return p; 950 1.3 riastrad } 951 1.3 riastrad 952 1.3 riastrad /* process_evict_queues - Evict all user queues of a process 953 1.3 riastrad * 954 1.3 riastrad * Eviction is reference-counted per process-device. This means multiple 955 1.3 riastrad * evictions from different sources can be nested safely. 956 1.3 riastrad */ 957 1.3 riastrad int kfd_process_evict_queues(struct kfd_process *p) 958 1.3 riastrad { 959 1.1 riastrad struct kfd_process_device *pdd; 960 1.3 riastrad int r = 0; 961 1.3 riastrad unsigned int n_evicted = 0; 962 1.1 riastrad 963 1.3 riastrad list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 964 1.3 riastrad r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, 965 1.3 riastrad &pdd->qpd); 966 1.3 riastrad if (r) { 967 1.3 riastrad pr_err("Failed to evict process queues\n"); 968 1.3 riastrad goto fail; 969 1.3 riastrad } 970 1.3 riastrad n_evicted++; 971 1.3 riastrad } 972 1.1 riastrad 973 1.3 riastrad return r; 974 1.1 riastrad 975 1.3 riastrad fail: 976 1.3 riastrad /* To keep state consistent, roll back partial eviction by 977 1.3 riastrad * restoring queues 978 1.1 riastrad */ 979 1.3 riastrad list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 980 1.3 riastrad if (n_evicted == 0) 981 1.3 riastrad break; 982 1.3 riastrad if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, 983 1.3 riastrad &pdd->qpd)) 984 1.3 riastrad pr_err("Failed to restore queues\n"); 985 1.3 riastrad 986 1.3 riastrad n_evicted--; 987 1.3 riastrad } 988 1.3 riastrad 989 1.3 riastrad return r; 990 1.3 riastrad } 991 1.3 riastrad 992 1.3 riastrad /* process_restore_queues - Restore all user queues of a process */ 993 1.3 riastrad int kfd_process_restore_queues(struct kfd_process *p) 994 1.3 riastrad { 995 1.3 riastrad struct kfd_process_device *pdd; 996 1.3 riastrad int r, ret = 0; 997 1.3 riastrad 998 1.3 riastrad list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 999 1.3 riastrad r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, 1000 1.3 riastrad &pdd->qpd); 1001 1.3 riastrad if (r) { 1002 1.3 riastrad pr_err("Failed to restore process queues\n"); 1003 1.3 riastrad if (!ret) 1004 1.3 riastrad ret = r; 1005 1.3 riastrad } 1006 1.3 riastrad } 1007 1.3 riastrad 1008 1.3 riastrad return ret; 1009 1.3 riastrad } 1010 1.3 riastrad 1011 1.3 riastrad static void evict_process_worker(struct work_struct *work) 1012 1.3 riastrad { 1013 1.3 riastrad int ret; 1014 1.3 riastrad struct kfd_process *p; 1015 1.3 riastrad struct delayed_work *dwork; 1016 1.1 riastrad 1017 1.3 riastrad dwork = to_delayed_work(work); 1018 1.1 riastrad 1019 1.3 riastrad /* Process termination destroys this worker thread. So during the 1020 1.3 riastrad * lifetime of this thread, kfd_process p will be valid 1021 1.3 riastrad */ 1022 1.3 riastrad p = container_of(dwork, struct kfd_process, eviction_work); 1023 1.3 riastrad WARN_ONCE(p->last_eviction_seqno != p->ef->seqno, 1024 1.3 riastrad "Eviction fence mismatch\n"); 1025 1.3 riastrad 1026 1.3 riastrad /* Narrow window of overlap between restore and evict work 1027 1.3 riastrad * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos 1028 1.3 riastrad * unreserves KFD BOs, it is possible to evicted again. But 1029 1.3 riastrad * restore has few more steps of finish. So lets wait for any 1030 1.3 riastrad * previous restore work to complete 1031 1.3 riastrad */ 1032 1.3 riastrad flush_delayed_work(&p->restore_work); 1033 1.1 riastrad 1034 1.3 riastrad pr_debug("Started evicting pasid 0x%x\n", p->pasid); 1035 1.3 riastrad ret = kfd_process_evict_queues(p); 1036 1.3 riastrad if (!ret) { 1037 1.3 riastrad dma_fence_signal(p->ef); 1038 1.3 riastrad dma_fence_put(p->ef); 1039 1.3 riastrad p->ef = NULL; 1040 1.3 riastrad queue_delayed_work(kfd_restore_wq, &p->restore_work, 1041 1.3 riastrad msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); 1042 1.3 riastrad 1043 1.3 riastrad pr_debug("Finished evicting pasid 0x%x\n", p->pasid); 1044 1.3 riastrad } else 1045 1.3 riastrad pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); 1046 1.3 riastrad } 1047 1.1 riastrad 1048 1.3 riastrad static void restore_process_worker(struct work_struct *work) 1049 1.3 riastrad { 1050 1.3 riastrad struct delayed_work *dwork; 1051 1.3 riastrad struct kfd_process *p; 1052 1.3 riastrad int ret = 0; 1053 1.1 riastrad 1054 1.3 riastrad dwork = to_delayed_work(work); 1055 1.1 riastrad 1056 1.3 riastrad /* Process termination destroys this worker thread. So during the 1057 1.3 riastrad * lifetime of this thread, kfd_process p will be valid 1058 1.3 riastrad */ 1059 1.3 riastrad p = container_of(dwork, struct kfd_process, restore_work); 1060 1.3 riastrad pr_debug("Started restoring pasid 0x%x\n", p->pasid); 1061 1.1 riastrad 1062 1.3 riastrad /* Setting last_restore_timestamp before successful restoration. 1063 1.3 riastrad * Otherwise this would have to be set by KGD (restore_process_bos) 1064 1.3 riastrad * before KFD BOs are unreserved. If not, the process can be evicted 1065 1.3 riastrad * again before the timestamp is set. 1066 1.3 riastrad * If restore fails, the timestamp will be set again in the next 1067 1.3 riastrad * attempt. This would mean that the minimum GPU quanta would be 1068 1.3 riastrad * PROCESS_ACTIVE_TIME_MS - (time to execute the following two 1069 1.3 riastrad * functions) 1070 1.3 riastrad */ 1071 1.1 riastrad 1072 1.3 riastrad p->last_restore_timestamp = get_jiffies_64(); 1073 1.3 riastrad ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, 1074 1.3 riastrad &p->ef); 1075 1.3 riastrad if (ret) { 1076 1.3 riastrad pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", 1077 1.3 riastrad p->pasid, PROCESS_BACK_OFF_TIME_MS); 1078 1.3 riastrad ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, 1079 1.3 riastrad msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); 1080 1.3 riastrad WARN(!ret, "reschedule restore work failed\n"); 1081 1.3 riastrad return; 1082 1.3 riastrad } 1083 1.1 riastrad 1084 1.3 riastrad ret = kfd_process_restore_queues(p); 1085 1.3 riastrad if (!ret) 1086 1.3 riastrad pr_debug("Finished restoring pasid 0x%x\n", p->pasid); 1087 1.3 riastrad else 1088 1.3 riastrad pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); 1089 1.3 riastrad } 1090 1.1 riastrad 1091 1.3 riastrad void kfd_suspend_all_processes(void) 1092 1.3 riastrad { 1093 1.3 riastrad struct kfd_process *p; 1094 1.3 riastrad unsigned int temp; 1095 1.3 riastrad int idx = srcu_read_lock(&kfd_processes_srcu); 1096 1.1 riastrad 1097 1.3 riastrad hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1098 1.3 riastrad cancel_delayed_work_sync(&p->eviction_work); 1099 1.3 riastrad cancel_delayed_work_sync(&p->restore_work); 1100 1.1 riastrad 1101 1.3 riastrad if (kfd_process_evict_queues(p)) 1102 1.3 riastrad pr_err("Failed to suspend process 0x%x\n", p->pasid); 1103 1.3 riastrad dma_fence_signal(p->ef); 1104 1.3 riastrad dma_fence_put(p->ef); 1105 1.3 riastrad p->ef = NULL; 1106 1.3 riastrad } 1107 1.1 riastrad srcu_read_unlock(&kfd_processes_srcu, idx); 1108 1.1 riastrad } 1109 1.1 riastrad 1110 1.3 riastrad int kfd_resume_all_processes(void) 1111 1.1 riastrad { 1112 1.3 riastrad struct kfd_process *p; 1113 1.3 riastrad unsigned int temp; 1114 1.3 riastrad int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); 1115 1.3 riastrad 1116 1.3 riastrad hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1117 1.3 riastrad if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { 1118 1.3 riastrad pr_err("Restore process %d failed during resume\n", 1119 1.3 riastrad p->pasid); 1120 1.3 riastrad ret = -EFAULT; 1121 1.3 riastrad } 1122 1.3 riastrad } 1123 1.3 riastrad srcu_read_unlock(&kfd_processes_srcu, idx); 1124 1.3 riastrad return ret; 1125 1.1 riastrad } 1126 1.1 riastrad 1127 1.3 riastrad int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, 1128 1.3 riastrad struct vm_area_struct *vma) 1129 1.1 riastrad { 1130 1.3 riastrad struct kfd_process_device *pdd; 1131 1.3 riastrad struct qcm_process_device *qpd; 1132 1.3 riastrad 1133 1.3 riastrad if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { 1134 1.3 riastrad pr_err("Incorrect CWSR mapping size.\n"); 1135 1.3 riastrad return -EINVAL; 1136 1.3 riastrad } 1137 1.3 riastrad 1138 1.3 riastrad pdd = kfd_get_process_device_data(dev, process); 1139 1.3 riastrad if (!pdd) 1140 1.3 riastrad return -EINVAL; 1141 1.3 riastrad qpd = &pdd->qpd; 1142 1.3 riastrad 1143 1.3 riastrad qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1144 1.3 riastrad get_order(KFD_CWSR_TBA_TMA_SIZE)); 1145 1.3 riastrad if (!qpd->cwsr_kaddr) { 1146 1.3 riastrad pr_err("Error allocating per process CWSR buffer.\n"); 1147 1.3 riastrad return -ENOMEM; 1148 1.3 riastrad } 1149 1.3 riastrad 1150 1.3 riastrad vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND 1151 1.3 riastrad | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; 1152 1.3 riastrad /* Mapping pages to user process */ 1153 1.3 riastrad return remap_pfn_range(vma, vma->vm_start, 1154 1.3 riastrad PFN_DOWN(__pa(qpd->cwsr_kaddr)), 1155 1.3 riastrad KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); 1156 1.1 riastrad } 1157 1.1 riastrad 1158 1.3 riastrad void kfd_flush_tlb(struct kfd_process_device *pdd) 1159 1.1 riastrad { 1160 1.3 riastrad struct kfd_dev *dev = pdd->dev; 1161 1.3 riastrad 1162 1.3 riastrad if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 1163 1.3 riastrad /* Nothing to flush until a VMID is assigned, which 1164 1.3 riastrad * only happens when the first queue is created. 1165 1.3 riastrad */ 1166 1.3 riastrad if (pdd->qpd.vmid) 1167 1.3 riastrad amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, 1168 1.3 riastrad pdd->qpd.vmid); 1169 1.3 riastrad } else { 1170 1.3 riastrad amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, 1171 1.3 riastrad pdd->process->pasid); 1172 1.3 riastrad } 1173 1.1 riastrad } 1174 1.1 riastrad 1175 1.3 riastrad #if defined(CONFIG_DEBUG_FS) 1176 1.3 riastrad 1177 1.3 riastrad int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) 1178 1.1 riastrad { 1179 1.1 riastrad struct kfd_process *p; 1180 1.1 riastrad unsigned int temp; 1181 1.3 riastrad int r = 0; 1182 1.1 riastrad 1183 1.1 riastrad int idx = srcu_read_lock(&kfd_processes_srcu); 1184 1.1 riastrad 1185 1.1 riastrad hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1186 1.3 riastrad seq_printf(m, "Process %d PASID 0x%x:\n", 1187 1.3 riastrad p->lead_thread->tgid, p->pasid); 1188 1.3 riastrad 1189 1.3 riastrad mutex_lock(&p->mutex); 1190 1.3 riastrad r = pqm_debugfs_mqds(m, &p->pqm); 1191 1.3 riastrad mutex_unlock(&p->mutex); 1192 1.3 riastrad 1193 1.3 riastrad if (r) 1194 1.1 riastrad break; 1195 1.1 riastrad } 1196 1.1 riastrad 1197 1.1 riastrad srcu_read_unlock(&kfd_processes_srcu, idx); 1198 1.1 riastrad 1199 1.3 riastrad return r; 1200 1.1 riastrad } 1201 1.3 riastrad 1202 1.3 riastrad #endif 1203 1.3 riastrad 1204