1 1.1 riastrad /* $NetBSD: amdgpu_sync.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2014 Advanced Micro Devices, Inc. 5 1.1 riastrad * All Rights Reserved. 6 1.1 riastrad * 7 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 8 1.1 riastrad * copy of this software and associated documentation files (the 9 1.1 riastrad * "Software"), to deal in the Software without restriction, including 10 1.1 riastrad * without limitation the rights to use, copy, modify, merge, publish, 11 1.1 riastrad * distribute, sub license, and/or sell copies of the Software, and to 12 1.1 riastrad * permit persons to whom the Software is furnished to do so, subject to 13 1.1 riastrad * the following conditions: 14 1.1 riastrad * 15 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 1.1 riastrad * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 1.1 riastrad * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 1.1 riastrad * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 1.1 riastrad * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 1.1 riastrad * 23 1.1 riastrad * The above copyright notice and this permission notice (including the 24 1.1 riastrad * next paragraph) shall be included in all copies or substantial portions 25 1.1 riastrad * of the Software. 26 1.1 riastrad * 27 1.1 riastrad */ 28 1.1 riastrad /* 29 1.1 riastrad * Authors: 30 1.1 riastrad * Christian Knig <christian.koenig (at) amd.com> 31 1.1 riastrad */ 32 1.1 riastrad 33 1.1 riastrad #include <sys/cdefs.h> 34 1.1 riastrad __KERNEL_RCSID(0, "$NetBSD: amdgpu_sync.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $"); 35 1.1 riastrad 36 1.1 riastrad #include "amdgpu.h" 37 1.1 riastrad #include "amdgpu_trace.h" 38 1.3 riastrad #include "amdgpu_amdkfd.h" 39 1.1 riastrad 40 1.1 riastrad struct amdgpu_sync_entry { 41 1.1 riastrad struct hlist_node node; 42 1.3 riastrad struct dma_fence *fence; 43 1.3 riastrad bool explicit; 44 1.1 riastrad }; 45 1.1 riastrad 46 1.3 riastrad static struct kmem_cache *amdgpu_sync_slab; 47 1.3 riastrad 48 1.1 riastrad /** 49 1.1 riastrad * amdgpu_sync_create - zero init sync object 50 1.1 riastrad * 51 1.1 riastrad * @sync: sync object to initialize 52 1.1 riastrad * 53 1.1 riastrad * Just clear the sync object for now. 54 1.1 riastrad */ 55 1.1 riastrad void amdgpu_sync_create(struct amdgpu_sync *sync) 56 1.1 riastrad { 57 1.1 riastrad hash_init(sync->fences); 58 1.1 riastrad sync->last_vm_update = NULL; 59 1.1 riastrad } 60 1.1 riastrad 61 1.3 riastrad /** 62 1.3 riastrad * amdgpu_sync_same_dev - test if fence belong to us 63 1.3 riastrad * 64 1.3 riastrad * @adev: amdgpu device to use for the test 65 1.3 riastrad * @f: fence to test 66 1.3 riastrad * 67 1.3 riastrad * Test if the fence was issued by us. 68 1.3 riastrad */ 69 1.3 riastrad static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, 70 1.3 riastrad struct dma_fence *f) 71 1.1 riastrad { 72 1.3 riastrad struct drm_sched_fence *s_fence = to_drm_sched_fence(f); 73 1.1 riastrad 74 1.1 riastrad if (s_fence) { 75 1.1 riastrad struct amdgpu_ring *ring; 76 1.1 riastrad 77 1.1 riastrad ring = container_of(s_fence->sched, struct amdgpu_ring, sched); 78 1.1 riastrad return ring->adev == adev; 79 1.1 riastrad } 80 1.1 riastrad 81 1.1 riastrad return false; 82 1.1 riastrad } 83 1.1 riastrad 84 1.3 riastrad /** 85 1.3 riastrad * amdgpu_sync_get_owner - extract the owner of a fence 86 1.3 riastrad * 87 1.3 riastrad * @fence: fence get the owner from 88 1.3 riastrad * 89 1.3 riastrad * Extract who originally created the fence. 90 1.3 riastrad */ 91 1.3 riastrad static void *amdgpu_sync_get_owner(struct dma_fence *f) 92 1.1 riastrad { 93 1.3 riastrad struct drm_sched_fence *s_fence; 94 1.3 riastrad struct amdgpu_amdkfd_fence *kfd_fence; 95 1.3 riastrad 96 1.3 riastrad if (!f) 97 1.3 riastrad return AMDGPU_FENCE_OWNER_UNDEFINED; 98 1.3 riastrad 99 1.3 riastrad s_fence = to_drm_sched_fence(f); 100 1.1 riastrad if (s_fence) 101 1.3 riastrad return s_fence->owner; 102 1.3 riastrad 103 1.3 riastrad kfd_fence = to_amdgpu_amdkfd_fence(f); 104 1.3 riastrad if (kfd_fence) 105 1.3 riastrad return AMDGPU_FENCE_OWNER_KFD; 106 1.3 riastrad 107 1.3 riastrad return AMDGPU_FENCE_OWNER_UNDEFINED; 108 1.1 riastrad } 109 1.1 riastrad 110 1.3 riastrad /** 111 1.3 riastrad * amdgpu_sync_keep_later - Keep the later fence 112 1.3 riastrad * 113 1.3 riastrad * @keep: existing fence to test 114 1.3 riastrad * @fence: new fence 115 1.3 riastrad * 116 1.3 riastrad * Either keep the existing fence or the new one, depending which one is later. 117 1.3 riastrad */ 118 1.3 riastrad static void amdgpu_sync_keep_later(struct dma_fence **keep, 119 1.3 riastrad struct dma_fence *fence) 120 1.1 riastrad { 121 1.3 riastrad if (*keep && dma_fence_is_later(*keep, fence)) 122 1.1 riastrad return; 123 1.1 riastrad 124 1.3 riastrad dma_fence_put(*keep); 125 1.3 riastrad *keep = dma_fence_get(fence); 126 1.3 riastrad } 127 1.3 riastrad 128 1.3 riastrad /** 129 1.3 riastrad * amdgpu_sync_add_later - add the fence to the hash 130 1.3 riastrad * 131 1.3 riastrad * @sync: sync object to add the fence to 132 1.3 riastrad * @f: fence to add 133 1.3 riastrad * 134 1.3 riastrad * Tries to add the fence to an existing hash entry. Returns true when an entry 135 1.3 riastrad * was found, false otherwise. 136 1.3 riastrad */ 137 1.3 riastrad static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, 138 1.3 riastrad bool explicit) 139 1.3 riastrad { 140 1.3 riastrad struct amdgpu_sync_entry *e; 141 1.3 riastrad 142 1.3 riastrad hash_for_each_possible(sync->fences, e, node, f->context) { 143 1.3 riastrad if (unlikely(e->fence->context != f->context)) 144 1.3 riastrad continue; 145 1.3 riastrad 146 1.3 riastrad amdgpu_sync_keep_later(&e->fence, f); 147 1.3 riastrad 148 1.3 riastrad /* Preserve eplicit flag to not loose pipe line sync */ 149 1.3 riastrad e->explicit |= explicit; 150 1.3 riastrad 151 1.3 riastrad return true; 152 1.3 riastrad } 153 1.3 riastrad return false; 154 1.1 riastrad } 155 1.1 riastrad 156 1.1 riastrad /** 157 1.1 riastrad * amdgpu_sync_fence - remember to sync to this fence 158 1.1 riastrad * 159 1.1 riastrad * @sync: sync object to add fence to 160 1.3 riastrad * @f: fence to sync to 161 1.3 riastrad * @explicit: if this is an explicit dependency 162 1.1 riastrad * 163 1.3 riastrad * Add the fence to the sync object. 164 1.1 riastrad */ 165 1.3 riastrad int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, 166 1.3 riastrad bool explicit) 167 1.1 riastrad { 168 1.1 riastrad struct amdgpu_sync_entry *e; 169 1.1 riastrad 170 1.1 riastrad if (!f) 171 1.1 riastrad return 0; 172 1.1 riastrad 173 1.3 riastrad if (amdgpu_sync_add_later(sync, f, explicit)) 174 1.3 riastrad return 0; 175 1.1 riastrad 176 1.3 riastrad e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); 177 1.3 riastrad if (!e) 178 1.3 riastrad return -ENOMEM; 179 1.1 riastrad 180 1.3 riastrad e->explicit = explicit; 181 1.1 riastrad 182 1.3 riastrad hash_add(sync->fences, &e->node, f->context); 183 1.3 riastrad e->fence = dma_fence_get(f); 184 1.1 riastrad return 0; 185 1.1 riastrad } 186 1.1 riastrad 187 1.3 riastrad /** 188 1.3 riastrad * amdgpu_sync_vm_fence - remember to sync to this VM fence 189 1.3 riastrad * 190 1.3 riastrad * @adev: amdgpu device 191 1.3 riastrad * @sync: sync object to add fence to 192 1.3 riastrad * @fence: the VM fence to add 193 1.3 riastrad * 194 1.3 riastrad * Add the fence to the sync object and remember it as VM update. 195 1.3 riastrad */ 196 1.3 riastrad int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) 197 1.1 riastrad { 198 1.3 riastrad if (!fence) 199 1.3 riastrad return 0; 200 1.1 riastrad 201 1.3 riastrad amdgpu_sync_keep_later(&sync->last_vm_update, fence); 202 1.3 riastrad return amdgpu_sync_fence(sync, fence, false); 203 1.1 riastrad } 204 1.1 riastrad 205 1.1 riastrad /** 206 1.3 riastrad * amdgpu_sync_resv - sync to a reservation object 207 1.1 riastrad * 208 1.1 riastrad * @sync: sync object to add fences from reservation object to 209 1.1 riastrad * @resv: reservation object with embedded fence 210 1.3 riastrad * @explicit_sync: true if we should only sync to the exclusive fence 211 1.1 riastrad * 212 1.3 riastrad * Sync to the fence 213 1.1 riastrad */ 214 1.1 riastrad int amdgpu_sync_resv(struct amdgpu_device *adev, 215 1.1 riastrad struct amdgpu_sync *sync, 216 1.3 riastrad struct dma_resv *resv, 217 1.3 riastrad void *owner, bool explicit_sync) 218 1.1 riastrad { 219 1.3 riastrad struct dma_resv_list *flist; 220 1.3 riastrad struct dma_fence *f; 221 1.1 riastrad void *fence_owner; 222 1.1 riastrad unsigned i; 223 1.1 riastrad int r = 0; 224 1.1 riastrad 225 1.1 riastrad if (resv == NULL) 226 1.1 riastrad return -EINVAL; 227 1.1 riastrad 228 1.1 riastrad /* always sync to the exclusive fence */ 229 1.3 riastrad f = dma_resv_get_excl(resv); 230 1.3 riastrad r = amdgpu_sync_fence(sync, f, false); 231 1.1 riastrad 232 1.3 riastrad flist = dma_resv_get_list(resv); 233 1.1 riastrad if (!flist || r) 234 1.1 riastrad return r; 235 1.1 riastrad 236 1.1 riastrad for (i = 0; i < flist->shared_count; ++i) { 237 1.1 riastrad f = rcu_dereference_protected(flist->shared[i], 238 1.3 riastrad dma_resv_held(resv)); 239 1.3 riastrad /* We only want to trigger KFD eviction fences on 240 1.3 riastrad * evict or move jobs. Skip KFD fences otherwise. 241 1.3 riastrad */ 242 1.3 riastrad fence_owner = amdgpu_sync_get_owner(f); 243 1.3 riastrad if (fence_owner == AMDGPU_FENCE_OWNER_KFD && 244 1.3 riastrad owner != AMDGPU_FENCE_OWNER_UNDEFINED) 245 1.3 riastrad continue; 246 1.3 riastrad 247 1.1 riastrad if (amdgpu_sync_same_dev(adev, f)) { 248 1.3 riastrad /* VM updates only sync with moves but not with user 249 1.3 riastrad * command submissions or KFD evictions fences 250 1.1 riastrad */ 251 1.3 riastrad if (owner == AMDGPU_FENCE_OWNER_VM && 252 1.3 riastrad fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) 253 1.1 riastrad continue; 254 1.1 riastrad 255 1.3 riastrad /* Ignore fence from the same owner and explicit one as 256 1.1 riastrad * long as it isn't undefined. 257 1.1 riastrad */ 258 1.1 riastrad if (owner != AMDGPU_FENCE_OWNER_UNDEFINED && 259 1.3 riastrad (fence_owner == owner || explicit_sync)) 260 1.1 riastrad continue; 261 1.1 riastrad } 262 1.1 riastrad 263 1.3 riastrad r = amdgpu_sync_fence(sync, f, false); 264 1.1 riastrad if (r) 265 1.1 riastrad break; 266 1.1 riastrad } 267 1.1 riastrad return r; 268 1.1 riastrad } 269 1.1 riastrad 270 1.3 riastrad /** 271 1.3 riastrad * amdgpu_sync_peek_fence - get the next fence not signaled yet 272 1.3 riastrad * 273 1.3 riastrad * @sync: the sync object 274 1.3 riastrad * @ring: optional ring to use for test 275 1.3 riastrad * 276 1.3 riastrad * Returns the next fence not signaled yet without removing it from the sync 277 1.3 riastrad * object. 278 1.3 riastrad */ 279 1.3 riastrad struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, 280 1.3 riastrad struct amdgpu_ring *ring) 281 1.1 riastrad { 282 1.1 riastrad struct amdgpu_sync_entry *e; 283 1.1 riastrad struct hlist_node *tmp; 284 1.1 riastrad int i; 285 1.1 riastrad 286 1.1 riastrad hash_for_each_safe(sync->fences, i, tmp, e, node) { 287 1.3 riastrad struct dma_fence *f = e->fence; 288 1.3 riastrad struct drm_sched_fence *s_fence = to_drm_sched_fence(f); 289 1.1 riastrad 290 1.3 riastrad if (dma_fence_is_signaled(f)) { 291 1.3 riastrad hash_del(&e->node); 292 1.3 riastrad dma_fence_put(f); 293 1.3 riastrad kmem_cache_free(amdgpu_sync_slab, e); 294 1.3 riastrad continue; 295 1.3 riastrad } 296 1.3 riastrad if (ring && s_fence) { 297 1.3 riastrad /* For fences from the same ring it is sufficient 298 1.3 riastrad * when they are scheduled. 299 1.3 riastrad */ 300 1.3 riastrad if (s_fence->sched == &ring->sched) { 301 1.3 riastrad if (dma_fence_is_signaled(&s_fence->scheduled)) 302 1.3 riastrad continue; 303 1.1 riastrad 304 1.3 riastrad return &s_fence->scheduled; 305 1.3 riastrad } 306 1.3 riastrad } 307 1.1 riastrad 308 1.3 riastrad return f; 309 1.3 riastrad } 310 1.1 riastrad 311 1.1 riastrad return NULL; 312 1.1 riastrad } 313 1.1 riastrad 314 1.3 riastrad /** 315 1.3 riastrad * amdgpu_sync_get_fence - get the next fence from the sync object 316 1.3 riastrad * 317 1.3 riastrad * @sync: sync object to use 318 1.3 riastrad * @explicit: true if the next fence is explicit 319 1.3 riastrad * 320 1.3 riastrad * Get and removes the next fence from the sync object not signaled yet. 321 1.3 riastrad */ 322 1.3 riastrad struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit) 323 1.1 riastrad { 324 1.1 riastrad struct amdgpu_sync_entry *e; 325 1.1 riastrad struct hlist_node *tmp; 326 1.3 riastrad struct dma_fence *f; 327 1.3 riastrad int i; 328 1.3 riastrad hash_for_each_safe(sync->fences, i, tmp, e, node) { 329 1.1 riastrad 330 1.3 riastrad f = e->fence; 331 1.3 riastrad if (explicit) 332 1.3 riastrad *explicit = e->explicit; 333 1.1 riastrad 334 1.1 riastrad hash_del(&e->node); 335 1.3 riastrad kmem_cache_free(amdgpu_sync_slab, e); 336 1.1 riastrad 337 1.3 riastrad if (!dma_fence_is_signaled(f)) 338 1.3 riastrad return f; 339 1.1 riastrad 340 1.3 riastrad dma_fence_put(f); 341 1.1 riastrad } 342 1.3 riastrad return NULL; 343 1.1 riastrad } 344 1.1 riastrad 345 1.1 riastrad /** 346 1.3 riastrad * amdgpu_sync_clone - clone a sync object 347 1.1 riastrad * 348 1.3 riastrad * @source: sync object to clone 349 1.3 riastrad * @clone: pointer to destination sync object 350 1.1 riastrad * 351 1.3 riastrad * Adds references to all unsignaled fences in @source to @clone. Also 352 1.3 riastrad * removes signaled fences from @source while at it. 353 1.1 riastrad */ 354 1.3 riastrad int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) 355 1.1 riastrad { 356 1.3 riastrad struct amdgpu_sync_entry *e; 357 1.3 riastrad struct hlist_node *tmp; 358 1.3 riastrad struct dma_fence *f; 359 1.1 riastrad int i, r; 360 1.1 riastrad 361 1.3 riastrad hash_for_each_safe(source->fences, i, tmp, e, node) { 362 1.3 riastrad f = e->fence; 363 1.3 riastrad if (!dma_fence_is_signaled(f)) { 364 1.3 riastrad r = amdgpu_sync_fence(clone, f, e->explicit); 365 1.1 riastrad if (r) 366 1.1 riastrad return r; 367 1.3 riastrad } else { 368 1.3 riastrad hash_del(&e->node); 369 1.3 riastrad dma_fence_put(f); 370 1.3 riastrad kmem_cache_free(amdgpu_sync_slab, e); 371 1.1 riastrad } 372 1.3 riastrad } 373 1.3 riastrad 374 1.3 riastrad dma_fence_put(clone->last_vm_update); 375 1.3 riastrad clone->last_vm_update = dma_fence_get(source->last_vm_update); 376 1.1 riastrad 377 1.3 riastrad return 0; 378 1.3 riastrad } 379 1.1 riastrad 380 1.3 riastrad int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) 381 1.3 riastrad { 382 1.3 riastrad struct amdgpu_sync_entry *e; 383 1.3 riastrad struct hlist_node *tmp; 384 1.3 riastrad int i, r; 385 1.1 riastrad 386 1.3 riastrad hash_for_each_safe(sync->fences, i, tmp, e, node) { 387 1.3 riastrad r = dma_fence_wait(e->fence, intr); 388 1.1 riastrad if (r) 389 1.1 riastrad return r; 390 1.1 riastrad 391 1.3 riastrad hash_del(&e->node); 392 1.3 riastrad dma_fence_put(e->fence); 393 1.3 riastrad kmem_cache_free(amdgpu_sync_slab, e); 394 1.1 riastrad } 395 1.1 riastrad 396 1.1 riastrad return 0; 397 1.1 riastrad } 398 1.1 riastrad 399 1.1 riastrad /** 400 1.1 riastrad * amdgpu_sync_free - free the sync object 401 1.1 riastrad * 402 1.1 riastrad * @sync: sync object to use 403 1.1 riastrad * 404 1.3 riastrad * Free the sync object. 405 1.1 riastrad */ 406 1.3 riastrad void amdgpu_sync_free(struct amdgpu_sync *sync) 407 1.1 riastrad { 408 1.1 riastrad struct amdgpu_sync_entry *e; 409 1.1 riastrad struct hlist_node *tmp; 410 1.1 riastrad unsigned i; 411 1.1 riastrad 412 1.1 riastrad hash_for_each_safe(sync->fences, i, tmp, e, node) { 413 1.1 riastrad hash_del(&e->node); 414 1.3 riastrad dma_fence_put(e->fence); 415 1.3 riastrad kmem_cache_free(amdgpu_sync_slab, e); 416 1.1 riastrad } 417 1.1 riastrad 418 1.3 riastrad dma_fence_put(sync->last_vm_update); 419 1.3 riastrad } 420 1.3 riastrad 421 1.3 riastrad /** 422 1.3 riastrad * amdgpu_sync_init - init sync object subsystem 423 1.3 riastrad * 424 1.3 riastrad * Allocate the slab allocator. 425 1.3 riastrad */ 426 1.3 riastrad int amdgpu_sync_init(void) 427 1.3 riastrad { 428 1.3 riastrad amdgpu_sync_slab = kmem_cache_create( 429 1.3 riastrad "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0, 430 1.3 riastrad SLAB_HWCACHE_ALIGN, NULL); 431 1.3 riastrad if (!amdgpu_sync_slab) 432 1.3 riastrad return -ENOMEM; 433 1.1 riastrad 434 1.3 riastrad return 0; 435 1.3 riastrad } 436 1.1 riastrad 437 1.3 riastrad /** 438 1.3 riastrad * amdgpu_sync_fini - fini sync object subsystem 439 1.3 riastrad * 440 1.3 riastrad * Free the slab allocator. 441 1.3 riastrad */ 442 1.3 riastrad void amdgpu_sync_fini(void) 443 1.3 riastrad { 444 1.3 riastrad kmem_cache_destroy(amdgpu_sync_slab); 445 1.1 riastrad } 446