1 1.5 riastrad /* $NetBSD: amdgpu_sa.c,v 1.5 2022/10/08 19:06:30 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2011 Red Hat Inc. 5 1.1 riastrad * All Rights Reserved. 6 1.1 riastrad * 7 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 8 1.1 riastrad * copy of this software and associated documentation files (the 9 1.1 riastrad * "Software"), to deal in the Software without restriction, including 10 1.1 riastrad * without limitation the rights to use, copy, modify, merge, publish, 11 1.1 riastrad * distribute, sub license, and/or sell copies of the Software, and to 12 1.1 riastrad * permit persons to whom the Software is furnished to do so, subject to 13 1.1 riastrad * the following conditions: 14 1.1 riastrad * 15 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 1.1 riastrad * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 1.1 riastrad * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 1.1 riastrad * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 1.1 riastrad * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 1.1 riastrad * 23 1.1 riastrad * The above copyright notice and this permission notice (including the 24 1.1 riastrad * next paragraph) shall be included in all copies or substantial portions 25 1.1 riastrad * of the Software. 26 1.1 riastrad * 27 1.1 riastrad */ 28 1.1 riastrad /* 29 1.1 riastrad * Authors: 30 1.1 riastrad * Jerome Glisse <glisse (at) freedesktop.org> 31 1.1 riastrad */ 32 1.1 riastrad /* Algorithm: 33 1.1 riastrad * 34 1.1 riastrad * We store the last allocated bo in "hole", we always try to allocate 35 1.1 riastrad * after the last allocated bo. Principle is that in a linear GPU ring 36 1.1 riastrad * progression was is after last is the oldest bo we allocated and thus 37 1.1 riastrad * the first one that should no longer be in use by the GPU. 38 1.1 riastrad * 39 1.1 riastrad * If it's not the case we skip over the bo after last to the closest 40 1.1 riastrad * done bo if such one exist. If none exist and we are not asked to 41 1.1 riastrad * block we report failure to allocate. 42 1.1 riastrad * 43 1.1 riastrad * If we are asked to block we wait on all the oldest fence of all 44 1.1 riastrad * rings. We just wait for any of those fence to complete. 45 1.1 riastrad */ 46 1.4 riastrad 47 1.1 riastrad #include <sys/cdefs.h> 48 1.5 riastrad __KERNEL_RCSID(0, "$NetBSD: amdgpu_sa.c,v 1.5 2022/10/08 19:06:30 riastradh Exp $"); 49 1.1 riastrad 50 1.1 riastrad #include "amdgpu.h" 51 1.1 riastrad 52 1.1 riastrad static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo); 53 1.1 riastrad static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager); 54 1.1 riastrad 55 1.1 riastrad int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, 56 1.1 riastrad struct amdgpu_sa_manager *sa_manager, 57 1.1 riastrad unsigned size, u32 align, u32 domain) 58 1.1 riastrad { 59 1.1 riastrad int i, r; 60 1.1 riastrad 61 1.3 riastrad spin_lock_init(&sa_manager->wq_lock); 62 1.3 riastrad DRM_INIT_WAITQUEUE(&sa_manager->wq, "amdsabom"); 63 1.1 riastrad sa_manager->bo = NULL; 64 1.1 riastrad sa_manager->size = size; 65 1.1 riastrad sa_manager->domain = domain; 66 1.1 riastrad sa_manager->align = align; 67 1.1 riastrad sa_manager->hole = &sa_manager->olist; 68 1.1 riastrad INIT_LIST_HEAD(&sa_manager->olist); 69 1.4 riastrad for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 70 1.1 riastrad INIT_LIST_HEAD(&sa_manager->flist[i]); 71 1.1 riastrad 72 1.4 riastrad r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo, 73 1.4 riastrad &sa_manager->gpu_addr, &sa_manager->cpu_ptr); 74 1.1 riastrad if (r) { 75 1.1 riastrad dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); 76 1.1 riastrad return r; 77 1.1 riastrad } 78 1.1 riastrad 79 1.4 riastrad memset(sa_manager->cpu_ptr, 0, sa_manager->size); 80 1.1 riastrad return r; 81 1.1 riastrad } 82 1.1 riastrad 83 1.1 riastrad void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, 84 1.4 riastrad struct amdgpu_sa_manager *sa_manager) 85 1.1 riastrad { 86 1.1 riastrad struct amdgpu_sa_bo *sa_bo, *tmp; 87 1.1 riastrad 88 1.4 riastrad if (sa_manager->bo == NULL) { 89 1.4 riastrad dev_err(adev->dev, "no bo for sa manager\n"); 90 1.4 riastrad return; 91 1.4 riastrad } 92 1.4 riastrad 93 1.1 riastrad if (!list_empty(&sa_manager->olist)) { 94 1.1 riastrad sa_manager->hole = &sa_manager->olist, 95 1.1 riastrad amdgpu_sa_bo_try_free(sa_manager); 96 1.1 riastrad if (!list_empty(&sa_manager->olist)) { 97 1.1 riastrad dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n"); 98 1.1 riastrad } 99 1.1 riastrad } 100 1.1 riastrad list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) { 101 1.1 riastrad amdgpu_sa_bo_remove_locked(sa_bo); 102 1.1 riastrad } 103 1.4 riastrad 104 1.4 riastrad amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr); 105 1.1 riastrad sa_manager->size = 0; 106 1.3 riastrad DRM_DESTROY_WAITQUEUE(&sa_manager->wq); 107 1.3 riastrad spin_lock_destroy(&sa_manager->wq_lock); 108 1.1 riastrad } 109 1.1 riastrad 110 1.1 riastrad static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) 111 1.1 riastrad { 112 1.1 riastrad struct amdgpu_sa_manager *sa_manager = sa_bo->manager; 113 1.1 riastrad if (sa_manager->hole == &sa_bo->olist) { 114 1.1 riastrad sa_manager->hole = sa_bo->olist.prev; 115 1.1 riastrad } 116 1.1 riastrad list_del_init(&sa_bo->olist); 117 1.1 riastrad list_del_init(&sa_bo->flist); 118 1.4 riastrad dma_fence_put(sa_bo->fence); 119 1.1 riastrad kfree(sa_bo); 120 1.1 riastrad } 121 1.1 riastrad 122 1.1 riastrad static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager) 123 1.1 riastrad { 124 1.1 riastrad struct amdgpu_sa_bo *sa_bo, *tmp; 125 1.1 riastrad 126 1.1 riastrad if (sa_manager->hole->next == &sa_manager->olist) 127 1.1 riastrad return; 128 1.1 riastrad 129 1.1 riastrad sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); 130 1.1 riastrad list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { 131 1.1 riastrad if (sa_bo->fence == NULL || 132 1.4 riastrad !dma_fence_is_signaled(sa_bo->fence)) { 133 1.1 riastrad return; 134 1.1 riastrad } 135 1.1 riastrad amdgpu_sa_bo_remove_locked(sa_bo); 136 1.1 riastrad } 137 1.1 riastrad } 138 1.1 riastrad 139 1.1 riastrad static inline unsigned amdgpu_sa_bo_hole_soffset(struct amdgpu_sa_manager *sa_manager) 140 1.1 riastrad { 141 1.1 riastrad struct list_head *hole = sa_manager->hole; 142 1.1 riastrad 143 1.1 riastrad if (hole != &sa_manager->olist) { 144 1.1 riastrad return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset; 145 1.1 riastrad } 146 1.1 riastrad return 0; 147 1.1 riastrad } 148 1.1 riastrad 149 1.1 riastrad static inline unsigned amdgpu_sa_bo_hole_eoffset(struct amdgpu_sa_manager *sa_manager) 150 1.1 riastrad { 151 1.1 riastrad struct list_head *hole = sa_manager->hole; 152 1.1 riastrad 153 1.1 riastrad if (hole->next != &sa_manager->olist) { 154 1.1 riastrad return list_entry(hole->next, struct amdgpu_sa_bo, olist)->soffset; 155 1.1 riastrad } 156 1.1 riastrad return sa_manager->size; 157 1.1 riastrad } 158 1.1 riastrad 159 1.1 riastrad static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager *sa_manager, 160 1.1 riastrad struct amdgpu_sa_bo *sa_bo, 161 1.1 riastrad unsigned size, unsigned align) 162 1.1 riastrad { 163 1.1 riastrad unsigned soffset, eoffset, wasted; 164 1.1 riastrad 165 1.1 riastrad soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 166 1.1 riastrad eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager); 167 1.1 riastrad wasted = (align - (soffset % align)) % align; 168 1.1 riastrad 169 1.1 riastrad if ((eoffset - soffset) >= (size + wasted)) { 170 1.1 riastrad soffset += wasted; 171 1.1 riastrad 172 1.1 riastrad sa_bo->manager = sa_manager; 173 1.1 riastrad sa_bo->soffset = soffset; 174 1.1 riastrad sa_bo->eoffset = soffset + size; 175 1.1 riastrad list_add(&sa_bo->olist, sa_manager->hole); 176 1.1 riastrad INIT_LIST_HEAD(&sa_bo->flist); 177 1.1 riastrad sa_manager->hole = &sa_bo->olist; 178 1.1 riastrad return true; 179 1.1 riastrad } 180 1.1 riastrad return false; 181 1.1 riastrad } 182 1.1 riastrad 183 1.1 riastrad /** 184 1.1 riastrad * amdgpu_sa_event - Check if we can stop waiting 185 1.1 riastrad * 186 1.1 riastrad * @sa_manager: pointer to the sa_manager 187 1.1 riastrad * @size: number of bytes we want to allocate 188 1.1 riastrad * @align: alignment we need to match 189 1.1 riastrad * 190 1.1 riastrad * Check if either there is a fence we can wait for or 191 1.1 riastrad * enough free memory to satisfy the allocation directly 192 1.1 riastrad */ 193 1.1 riastrad static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager, 194 1.1 riastrad unsigned size, unsigned align) 195 1.1 riastrad { 196 1.1 riastrad unsigned soffset, eoffset, wasted; 197 1.1 riastrad int i; 198 1.1 riastrad 199 1.4 riastrad for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 200 1.4 riastrad if (!list_empty(&sa_manager->flist[i])) 201 1.1 riastrad return true; 202 1.1 riastrad 203 1.1 riastrad soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 204 1.1 riastrad eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager); 205 1.1 riastrad wasted = (align - (soffset % align)) % align; 206 1.1 riastrad 207 1.1 riastrad if ((eoffset - soffset) >= (size + wasted)) { 208 1.1 riastrad return true; 209 1.1 riastrad } 210 1.1 riastrad 211 1.1 riastrad return false; 212 1.1 riastrad } 213 1.1 riastrad 214 1.1 riastrad static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, 215 1.4 riastrad struct dma_fence **fences, 216 1.1 riastrad unsigned *tries) 217 1.1 riastrad { 218 1.1 riastrad struct amdgpu_sa_bo *best_bo = NULL; 219 1.1 riastrad unsigned i, soffset, best, tmp; 220 1.1 riastrad 221 1.1 riastrad /* if hole points to the end of the buffer */ 222 1.1 riastrad if (sa_manager->hole->next == &sa_manager->olist) { 223 1.1 riastrad /* try again with its beginning */ 224 1.1 riastrad sa_manager->hole = &sa_manager->olist; 225 1.1 riastrad return true; 226 1.1 riastrad } 227 1.1 riastrad 228 1.1 riastrad soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 229 1.1 riastrad /* to handle wrap around we add sa_manager->size */ 230 1.1 riastrad best = sa_manager->size * 2; 231 1.1 riastrad /* go over all fence list and try to find the closest sa_bo 232 1.1 riastrad * of the current last 233 1.1 riastrad */ 234 1.4 riastrad for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { 235 1.1 riastrad struct amdgpu_sa_bo *sa_bo; 236 1.1 riastrad 237 1.4 riastrad fences[i] = NULL; 238 1.4 riastrad 239 1.4 riastrad if (list_empty(&sa_manager->flist[i])) 240 1.1 riastrad continue; 241 1.1 riastrad 242 1.1 riastrad sa_bo = list_first_entry(&sa_manager->flist[i], 243 1.1 riastrad struct amdgpu_sa_bo, flist); 244 1.1 riastrad 245 1.4 riastrad if (!dma_fence_is_signaled(sa_bo->fence)) { 246 1.1 riastrad fences[i] = sa_bo->fence; 247 1.1 riastrad continue; 248 1.1 riastrad } 249 1.1 riastrad 250 1.1 riastrad /* limit the number of tries each ring gets */ 251 1.1 riastrad if (tries[i] > 2) { 252 1.1 riastrad continue; 253 1.1 riastrad } 254 1.1 riastrad 255 1.1 riastrad tmp = sa_bo->soffset; 256 1.1 riastrad if (tmp < soffset) { 257 1.1 riastrad /* wrap around, pretend it's after */ 258 1.1 riastrad tmp += sa_manager->size; 259 1.1 riastrad } 260 1.1 riastrad tmp -= soffset; 261 1.1 riastrad if (tmp < best) { 262 1.1 riastrad /* this sa bo is the closest one */ 263 1.1 riastrad best = tmp; 264 1.1 riastrad best_bo = sa_bo; 265 1.1 riastrad } 266 1.1 riastrad } 267 1.1 riastrad 268 1.1 riastrad if (best_bo) { 269 1.4 riastrad uint32_t idx = best_bo->fence->context; 270 1.4 riastrad 271 1.4 riastrad idx %= AMDGPU_SA_NUM_FENCE_LISTS; 272 1.1 riastrad ++tries[idx]; 273 1.1 riastrad sa_manager->hole = best_bo->olist.prev; 274 1.1 riastrad 275 1.1 riastrad /* we knew that this one is signaled, 276 1.1 riastrad so it's save to remote it */ 277 1.1 riastrad amdgpu_sa_bo_remove_locked(best_bo); 278 1.1 riastrad return true; 279 1.1 riastrad } 280 1.1 riastrad return false; 281 1.1 riastrad } 282 1.1 riastrad 283 1.1 riastrad int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, 284 1.1 riastrad struct amdgpu_sa_bo **sa_bo, 285 1.1 riastrad unsigned size, unsigned align) 286 1.1 riastrad { 287 1.4 riastrad struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS]; 288 1.4 riastrad unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS]; 289 1.1 riastrad unsigned count; 290 1.1 riastrad int i, r; 291 1.1 riastrad signed long t; 292 1.1 riastrad 293 1.4 riastrad if (WARN_ON_ONCE(align > sa_manager->align)) 294 1.4 riastrad return -EINVAL; 295 1.4 riastrad 296 1.4 riastrad if (WARN_ON_ONCE(size > sa_manager->size)) 297 1.4 riastrad return -EINVAL; 298 1.1 riastrad 299 1.1 riastrad *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL); 300 1.4 riastrad if (!(*sa_bo)) 301 1.1 riastrad return -ENOMEM; 302 1.1 riastrad (*sa_bo)->manager = sa_manager; 303 1.1 riastrad (*sa_bo)->fence = NULL; 304 1.1 riastrad INIT_LIST_HEAD(&(*sa_bo)->olist); 305 1.1 riastrad INIT_LIST_HEAD(&(*sa_bo)->flist); 306 1.1 riastrad 307 1.3 riastrad spin_lock(&sa_manager->wq_lock); 308 1.1 riastrad do { 309 1.4 riastrad for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 310 1.1 riastrad tries[i] = 0; 311 1.1 riastrad 312 1.1 riastrad do { 313 1.1 riastrad amdgpu_sa_bo_try_free(sa_manager); 314 1.1 riastrad 315 1.1 riastrad if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo, 316 1.1 riastrad size, align)) { 317 1.3 riastrad spin_unlock(&sa_manager->wq_lock); 318 1.1 riastrad return 0; 319 1.1 riastrad } 320 1.1 riastrad 321 1.1 riastrad /* see if we can skip over some allocations */ 322 1.1 riastrad } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); 323 1.1 riastrad 324 1.4 riastrad for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 325 1.1 riastrad if (fences[i]) 326 1.4 riastrad fences[count++] = dma_fence_get(fences[i]); 327 1.1 riastrad 328 1.1 riastrad if (count) { 329 1.3 riastrad spin_unlock(&sa_manager->wq_lock); 330 1.4 riastrad t = dma_fence_wait_any_timeout(fences, count, false, 331 1.4 riastrad MAX_SCHEDULE_TIMEOUT, 332 1.4 riastrad NULL); 333 1.1 riastrad for (i = 0; i < count; ++i) 334 1.4 riastrad dma_fence_put(fences[i]); 335 1.1 riastrad 336 1.1 riastrad r = (t > 0) ? 0 : t; 337 1.3 riastrad spin_lock(&sa_manager->wq_lock); 338 1.1 riastrad } else { 339 1.1 riastrad /* if we have nothing to wait for block */ 340 1.3 riastrad DRM_SPIN_WAIT_UNTIL(r, &sa_manager->wq, 341 1.3 riastrad &sa_manager->wq_lock, 342 1.3 riastrad amdgpu_sa_event(sa_manager, size, align)); 343 1.1 riastrad } 344 1.1 riastrad 345 1.1 riastrad } while (!r); 346 1.1 riastrad 347 1.3 riastrad spin_unlock(&sa_manager->wq_lock); 348 1.1 riastrad kfree(*sa_bo); 349 1.1 riastrad *sa_bo = NULL; 350 1.1 riastrad return r; 351 1.1 riastrad } 352 1.1 riastrad 353 1.1 riastrad void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, 354 1.4 riastrad struct dma_fence *fence) 355 1.1 riastrad { 356 1.1 riastrad struct amdgpu_sa_manager *sa_manager; 357 1.1 riastrad 358 1.1 riastrad if (sa_bo == NULL || *sa_bo == NULL) { 359 1.1 riastrad return; 360 1.1 riastrad } 361 1.1 riastrad 362 1.1 riastrad sa_manager = (*sa_bo)->manager; 363 1.3 riastrad spin_lock(&sa_manager->wq_lock); 364 1.4 riastrad if (fence && !dma_fence_is_signaled(fence)) { 365 1.1 riastrad uint32_t idx; 366 1.4 riastrad 367 1.4 riastrad (*sa_bo)->fence = dma_fence_get(fence); 368 1.4 riastrad idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS; 369 1.1 riastrad list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); 370 1.1 riastrad } else { 371 1.1 riastrad amdgpu_sa_bo_remove_locked(*sa_bo); 372 1.1 riastrad } 373 1.3 riastrad DRM_SPIN_WAKEUP_ALL(&sa_manager->wq, &sa_manager->wq_lock); 374 1.3 riastrad spin_unlock(&sa_manager->wq_lock); 375 1.1 riastrad *sa_bo = NULL; 376 1.1 riastrad } 377 1.1 riastrad 378 1.1 riastrad #if defined(CONFIG_DEBUG_FS) 379 1.1 riastrad 380 1.1 riastrad void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, 381 1.1 riastrad struct seq_file *m) 382 1.1 riastrad { 383 1.1 riastrad struct amdgpu_sa_bo *i; 384 1.1 riastrad 385 1.1 riastrad spin_lock(&sa_manager->wq.lock); 386 1.1 riastrad list_for_each_entry(i, &sa_manager->olist, olist) { 387 1.1 riastrad uint64_t soffset = i->soffset + sa_manager->gpu_addr; 388 1.1 riastrad uint64_t eoffset = i->eoffset + sa_manager->gpu_addr; 389 1.1 riastrad if (&i->olist == sa_manager->hole) { 390 1.1 riastrad seq_printf(m, ">"); 391 1.1 riastrad } else { 392 1.1 riastrad seq_printf(m, " "); 393 1.1 riastrad } 394 1.1 riastrad seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", 395 1.1 riastrad soffset, eoffset, eoffset - soffset); 396 1.4 riastrad 397 1.1 riastrad if (i->fence) 398 1.4 riastrad seq_printf(m, " protected by 0x%016llx on context %llu", 399 1.4 riastrad i->fence->seqno, i->fence->context); 400 1.4 riastrad 401 1.1 riastrad seq_printf(m, "\n"); 402 1.1 riastrad } 403 1.1 riastrad spin_unlock(&sa_manager->wq.lock); 404 1.1 riastrad } 405 1.1 riastrad #endif 406