1/* 2 * Copyright 2019 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors (Collabora): 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 */ 26#include <errno.h> 27#include <stdio.h> 28#include <fcntl.h> 29#include <xf86drm.h> 30#include <pthread.h> 31#include "drm-uapi/panfrost_drm.h" 32 33#include "pan_bo.h" 34#include "pan_device.h" 35#include "pan_util.h" 36#include "wrap.h" 37 38#include "os/os_mman.h" 39 40#include "util/u_inlines.h" 41#include "util/u_math.h" 42 43/* This file implements a userspace BO cache. Allocating and freeing 44 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips 45 * adds more work than we would like at this point. So caching BOs in userspace 46 * solves both of these problems and does not require kernel updates. 47 * 48 * Cached BOs are sorted into a bucket based on rounding their size down to the 49 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo 50 * objects. Putting a BO into the cache is accomplished by adding it to the 51 * corresponding bucket. Getting a BO from the cache consists of finding the 52 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a 53 * BO and removing it from the bucket. We special case evicting all BOs from 54 * the cache, since that's what helpful in practice and avoids extra logic 55 * around the linked list. 56 */ 57 58static struct panfrost_bo * 59panfrost_bo_alloc(struct panfrost_device *dev, size_t size, 60 uint32_t flags, const char *label) 61{ 62 struct drm_panfrost_create_bo create_bo = { .size = size }; 63 struct panfrost_bo *bo; 64 int ret; 65 66 if (dev->kernel_version->version_major > 1 || 67 dev->kernel_version->version_minor >= 1) { 68 if (flags & PAN_BO_GROWABLE) 69 create_bo.flags |= PANFROST_BO_HEAP; 70 if (!(flags & PAN_BO_EXECUTE)) 71 create_bo.flags |= PANFROST_BO_NOEXEC; 72 } 73 74 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo); 75 if (ret) { 76 fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n"); 77 return NULL; 78 } 79 80 bo = pan_lookup_bo(dev, create_bo.handle); 81 assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo))); 82 83 bo->size = create_bo.size; 84 bo->ptr.gpu = create_bo.offset; 85 bo->gem_handle = create_bo.handle; 86 bo->flags = flags; 87 bo->dev = dev; 88 bo->label = label; 89 return bo; 90} 91 92static void 93panfrost_bo_free(struct panfrost_bo *bo) 94{ 95 struct drm_gem_close gem_close = { .handle = bo->gem_handle }; 96 int ret; 97 98 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close); 99 if (ret) { 100 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n"); 101 assert(0); 102 } 103 104 /* BO will be freed with the sparse array, but zero to indicate free */ 105 memset(bo, 0, sizeof(*bo)); 106} 107 108/* Returns true if the BO is ready, false otherwise. 109 * access_type is encoding the type of access one wants to ensure is done. 110 * Waiting is always done for writers, but if wait_readers is set then readers 111 * are also waited for. 112 */ 113bool 114panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers) 115{ 116 struct drm_panfrost_wait_bo req = { 117 .handle = bo->gem_handle, 118 .timeout_ns = timeout_ns, 119 }; 120 int ret; 121 122 /* If the BO has been exported or imported we can't rely on the cached 123 * state, we need to call the WAIT_BO ioctl. 124 */ 125 if (!(bo->flags & PAN_BO_SHARED)) { 126 /* If ->gpu_access is 0, the BO is idle, no need to wait. */ 127 if (!bo->gpu_access) 128 return true; 129 130 /* If the caller only wants to wait for writers and no 131 * writes are pending, we don't have to wait. 132 */ 133 if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE)) 134 return true; 135 } 136 137 /* The ioctl returns >= 0 value when the BO we are waiting for is ready 138 * -1 otherwise. 139 */ 140 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req); 141 if (ret != -1) { 142 /* Set gpu_access to 0 so that the next call to bo_wait() 143 * doesn't have to call the WAIT_BO ioctl. 144 */ 145 bo->gpu_access = 0; 146 return true; 147 } 148 149 /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed 150 * is invalid, which shouldn't happen here. 151 */ 152 assert(errno == ETIMEDOUT || errno == EBUSY); 153 return false; 154} 155 156/* Helper to calculate the bucket index of a BO */ 157 158static unsigned 159pan_bucket_index(unsigned size) 160{ 161 /* Round down to POT to compute a bucket index */ 162 163 unsigned bucket_index = util_logbase2(size); 164 165 /* Clamp the bucket index; all huge allocations will be 166 * sorted into the largest bucket */ 167 168 bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET); 169 170 /* The minimum bucket size must equal the minimum allocation 171 * size; the maximum we clamped */ 172 173 assert(bucket_index >= MIN_BO_CACHE_BUCKET); 174 assert(bucket_index <= MAX_BO_CACHE_BUCKET); 175 176 /* Reindex from 0 */ 177 return (bucket_index - MIN_BO_CACHE_BUCKET); 178} 179 180static struct list_head * 181pan_bucket(struct panfrost_device *dev, unsigned size) 182{ 183 return &dev->bo_cache.buckets[pan_bucket_index(size)]; 184} 185 186/* Tries to fetch a BO of sufficient size with the appropriate flags from the 187 * BO cache. If it succeeds, it returns that BO and removes the BO from the 188 * cache. If it fails, it returns NULL signaling the caller to allocate a new 189 * BO. */ 190 191static struct panfrost_bo * 192panfrost_bo_cache_fetch(struct panfrost_device *dev, 193 size_t size, uint32_t flags, const char *label, 194 bool dontwait) 195{ 196 pthread_mutex_lock(&dev->bo_cache.lock); 197 struct list_head *bucket = pan_bucket(dev, size); 198 struct panfrost_bo *bo = NULL; 199 200 /* Iterate the bucket looking for something suitable */ 201 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, 202 bucket_link) { 203 if (entry->size < size || entry->flags != flags) 204 continue; 205 206 /* If the oldest BO in the cache is busy, likely so is 207 * everything newer, so bail. */ 208 if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, 209 PAN_BO_ACCESS_RW)) 210 break; 211 212 struct drm_panfrost_madvise madv = { 213 .handle = entry->gem_handle, 214 .madv = PANFROST_MADV_WILLNEED, 215 }; 216 int ret; 217 218 /* This one works, splice it out of the cache */ 219 list_del(&entry->bucket_link); 220 list_del(&entry->lru_link); 221 222 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv); 223 if (!ret && !madv.retained) { 224 panfrost_bo_free(entry); 225 continue; 226 } 227 /* Let's go! */ 228 bo = entry; 229 bo->label = label; 230 break; 231 } 232 pthread_mutex_unlock(&dev->bo_cache.lock); 233 234 return bo; 235} 236 237static void 238panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev) 239{ 240 struct timespec time; 241 242 clock_gettime(CLOCK_MONOTONIC, &time); 243 list_for_each_entry_safe(struct panfrost_bo, entry, 244 &dev->bo_cache.lru, lru_link) { 245 /* We want all entries that have been used more than 1 sec 246 * ago to be dropped, others can be kept. 247 * Note the <= 2 check and not <= 1. It's here to account for 248 * the fact that we're only testing ->tv_sec, not ->tv_nsec. 249 * That means we might keep entries that are between 1 and 2 250 * seconds old, but we don't really care, as long as unused BOs 251 * are dropped at some point. 252 */ 253 if (time.tv_sec - entry->last_used <= 2) 254 break; 255 256 list_del(&entry->bucket_link); 257 list_del(&entry->lru_link); 258 panfrost_bo_free(entry); 259 } 260} 261 262/* Tries to add a BO to the cache. Returns if it was 263 * successful */ 264 265static bool 266panfrost_bo_cache_put(struct panfrost_bo *bo) 267{ 268 struct panfrost_device *dev = bo->dev; 269 270 if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE) 271 return false; 272 273 /* Must be first */ 274 pthread_mutex_lock(&dev->bo_cache.lock); 275 276 struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096)); 277 struct drm_panfrost_madvise madv; 278 struct timespec time; 279 280 madv.handle = bo->gem_handle; 281 madv.madv = PANFROST_MADV_DONTNEED; 282 madv.retained = 0; 283 284 drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv); 285 286 /* Add us to the bucket */ 287 list_addtail(&bo->bucket_link, bucket); 288 289 /* Add us to the LRU list and update the last_used field. */ 290 list_addtail(&bo->lru_link, &dev->bo_cache.lru); 291 clock_gettime(CLOCK_MONOTONIC, &time); 292 bo->last_used = time.tv_sec; 293 294 /* Let's do some cleanup in the BO cache while we hold the 295 * lock. 296 */ 297 panfrost_bo_cache_evict_stale_bos(dev); 298 299 /* Update the label to help debug BO cache memory usage issues */ 300 bo->label = "Unused (BO cache)"; 301 302 /* Must be last */ 303 pthread_mutex_unlock(&dev->bo_cache.lock); 304 return true; 305} 306 307/* Evicts all BOs from the cache. Called during context 308 * destroy or during low-memory situations (to free up 309 * memory that may be unused by us just sitting in our 310 * cache, but still reserved from the perspective of the 311 * OS) */ 312 313void 314panfrost_bo_cache_evict_all( 315 struct panfrost_device *dev) 316{ 317 pthread_mutex_lock(&dev->bo_cache.lock); 318 for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) { 319 struct list_head *bucket = &dev->bo_cache.buckets[i]; 320 321 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, 322 bucket_link) { 323 list_del(&entry->bucket_link); 324 list_del(&entry->lru_link); 325 panfrost_bo_free(entry); 326 } 327 } 328 pthread_mutex_unlock(&dev->bo_cache.lock); 329} 330 331void 332panfrost_bo_mmap(struct panfrost_bo *bo) 333{ 334 struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle }; 335 int ret; 336 337 if (bo->ptr.cpu) 338 return; 339 340 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo); 341 if (ret) { 342 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n"); 343 assert(0); 344 } 345 346 bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, 347 bo->dev->fd, mmap_bo.offset); 348 if (bo->ptr.cpu == MAP_FAILED) { 349 bo->ptr.cpu = NULL; 350 fprintf(stderr, 351 "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n", 352 bo->ptr.cpu, (long long)bo->size, bo->dev->fd, 353 (long long)mmap_bo.offset); 354 } 355} 356 357static void 358panfrost_bo_munmap(struct panfrost_bo *bo) 359{ 360 if (!bo->ptr.cpu) 361 return; 362 363 if (os_munmap((void *) (uintptr_t)bo->ptr.cpu, bo->size)) { 364 perror("munmap"); 365 abort(); 366 } 367 368 bo->ptr.cpu = NULL; 369} 370 371struct panfrost_bo * 372panfrost_bo_create(struct panfrost_device *dev, size_t size, 373 uint32_t flags, const char *label) 374{ 375 struct panfrost_bo *bo; 376 377 /* Kernel will fail (confusingly) with EPERM otherwise */ 378 assert(size > 0); 379 380 /* To maximize BO cache usage, don't allocate tiny BOs */ 381 size = ALIGN_POT(size, 4096); 382 383 /* GROWABLE BOs cannot be mmapped */ 384 if (flags & PAN_BO_GROWABLE) 385 assert(flags & PAN_BO_INVISIBLE); 386 387 /* Before creating a BO, we first want to check the cache but without 388 * waiting for BO readiness (BOs in the cache can still be referenced 389 * by jobs that are not finished yet). 390 * If the cached allocation fails we fall back on fresh BO allocation, 391 * and if that fails too, we try one more time to allocate from the 392 * cache, but this time we accept to wait. 393 */ 394 bo = panfrost_bo_cache_fetch(dev, size, flags, label, true); 395 if (!bo) 396 bo = panfrost_bo_alloc(dev, size, flags, label); 397 if (!bo) 398 bo = panfrost_bo_cache_fetch(dev, size, flags, label, false); 399 400 if (!bo) 401 fprintf(stderr, "BO creation failed\n"); 402 403 assert(bo); 404 405 /* Only mmap now if we know we need to. For CPU-invisible buffers, we 406 * never map since we don't care about their contents; they're purely 407 * for GPU-internal use. But we do trace them anyway. */ 408 409 if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP))) 410 panfrost_bo_mmap(bo); 411 412 p_atomic_set(&bo->refcnt, 1); 413 414 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) { 415 if (flags & PAN_BO_INVISIBLE) 416 pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL); 417 else if (!(flags & PAN_BO_DELAY_MMAP)) 418 pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL); 419 } 420 421 return bo; 422} 423 424void 425panfrost_bo_reference(struct panfrost_bo *bo) 426{ 427 if (bo) { 428 ASSERTED int count = p_atomic_inc_return(&bo->refcnt); 429 assert(count != 1); 430 } 431} 432 433void 434panfrost_bo_unreference(struct panfrost_bo *bo) 435{ 436 if (!bo) 437 return; 438 439 /* Don't return to cache if there are still references */ 440 if (p_atomic_dec_return(&bo->refcnt)) 441 return; 442 443 struct panfrost_device *dev = bo->dev; 444 445 pthread_mutex_lock(&dev->bo_map_lock); 446 447 /* Someone might have imported this BO while we were waiting for the 448 * lock, let's make sure it's still not referenced before freeing it. 449 */ 450 if (p_atomic_read(&bo->refcnt) == 0) { 451 /* When the reference count goes to zero, we need to cleanup */ 452 panfrost_bo_munmap(bo); 453 454 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) 455 pandecode_inject_free(bo->ptr.gpu, bo->size); 456 457 /* Rather than freeing the BO now, we'll cache the BO for later 458 * allocations if we're allowed to. 459 */ 460 if (!panfrost_bo_cache_put(bo)) 461 panfrost_bo_free(bo); 462 463 } 464 pthread_mutex_unlock(&dev->bo_map_lock); 465} 466 467struct panfrost_bo * 468panfrost_bo_import(struct panfrost_device *dev, int fd) 469{ 470 struct panfrost_bo *bo; 471 struct drm_panfrost_get_bo_offset get_bo_offset = {0,}; 472 ASSERTED int ret; 473 unsigned gem_handle; 474 475 ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle); 476 assert(!ret); 477 478 pthread_mutex_lock(&dev->bo_map_lock); 479 bo = pan_lookup_bo(dev, gem_handle); 480 481 if (!bo->dev) { 482 get_bo_offset.handle = gem_handle; 483 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset); 484 assert(!ret); 485 486 bo->dev = dev; 487 bo->ptr.gpu = (mali_ptr) get_bo_offset.offset; 488 bo->size = lseek(fd, 0, SEEK_END); 489 /* Sometimes this can fail and return -1. size of -1 is not 490 * a nice thing for mmap to try mmap. Be more robust also 491 * for zero sized maps and fail nicely too 492 */ 493 if ((bo->size == 0) || (bo->size == (size_t)-1)) { 494 pthread_mutex_unlock(&dev->bo_map_lock); 495 return NULL; 496 } 497 bo->flags = PAN_BO_SHARED; 498 bo->gem_handle = gem_handle; 499 p_atomic_set(&bo->refcnt, 1); 500 // TODO map and unmap on demand? 501 panfrost_bo_mmap(bo); 502 } else { 503 /* bo->refcnt == 0 can happen if the BO 504 * was being released but panfrost_bo_import() acquired the 505 * lock before panfrost_bo_unreference(). In that case, refcnt 506 * is 0 and we can't use panfrost_bo_reference() directly, we 507 * have to re-initialize the refcnt(). 508 * Note that panfrost_bo_unreference() checks 509 * refcnt value just after acquiring the lock to 510 * make sure the object is not freed if panfrost_bo_import() 511 * acquired it in the meantime. 512 */ 513 if (p_atomic_read(&bo->refcnt) == 0) 514 p_atomic_set(&bo->refcnt, 1); 515 else 516 panfrost_bo_reference(bo); 517 assert(bo->ptr.cpu); 518 } 519 pthread_mutex_unlock(&dev->bo_map_lock); 520 521 return bo; 522} 523 524int 525panfrost_bo_export(struct panfrost_bo *bo) 526{ 527 struct drm_prime_handle args = { 528 .handle = bo->gem_handle, 529 .flags = DRM_CLOEXEC, 530 }; 531 532 int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); 533 if (ret == -1) 534 return -1; 535 536 bo->flags |= PAN_BO_SHARED; 537 return args.fd; 538} 539 540