1 /* $NetBSD: i915_scheduler.c,v 1.8 2021/12/21 12:06:29 tnn Exp $ */ 2 3 /* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright 2018 Intel Corporation 7 */ 8 9 #include <sys/cdefs.h> 10 __KERNEL_RCSID(0, "$NetBSD: i915_scheduler.c,v 1.8 2021/12/21 12:06:29 tnn Exp $"); 11 12 #include <linux/mutex.h> 13 14 #include "i915_drv.h" 15 #include "i915_globals.h" 16 #include "i915_request.h" 17 #include "i915_scheduler.h" 18 19 #include <linux/nbsd-namespace.h> 20 21 static struct i915_global_scheduler { 22 struct i915_global base; 23 struct kmem_cache *slab_dependencies; 24 struct kmem_cache *slab_priorities; 25 } global; 26 27 #ifdef __NetBSD__ 28 static spinlock_t schedule_lock; 29 spinlock_t *const i915_schedule_lock = &schedule_lock; 30 #else 31 static DEFINE_SPINLOCK(schedule_lock); 32 #endif 33 34 static const struct i915_request * 35 node_to_request(const struct i915_sched_node *node) 36 { 37 return const_container_of(node, struct i915_request, sched); 38 } 39 40 static inline bool node_started(const struct i915_sched_node *node) 41 { 42 return i915_request_started(node_to_request(node)); 43 } 44 45 static inline bool node_signaled(const struct i915_sched_node *node) 46 { 47 return i915_request_completed(node_to_request(node)); 48 } 49 50 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 51 { 52 return rb_entry(rb, struct i915_priolist, node); 53 } 54 55 static void assert_priolists(struct intel_engine_execlists * const execlists) 56 { 57 struct rb_node *rb; 58 long last_prio, i; 59 60 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 61 return; 62 63 GEM_BUG_ON(rb_first_cached(&execlists->queue) != 64 rb_first(&execlists->queue.rb_root)); 65 66 last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1; 67 for (rb = rb_first_cached(&execlists->queue); 68 rb; 69 rb = rb_next2(&execlists->queue.rb_root, rb)) { 70 const struct i915_priolist *p = to_priolist(rb); 71 72 GEM_BUG_ON(p->priority >= last_prio); 73 last_prio = p->priority; 74 75 GEM_BUG_ON(!p->used); 76 for (i = 0; i < ARRAY_SIZE(p->requests); i++) { 77 if (list_empty(&p->requests[i])) 78 continue; 79 80 GEM_BUG_ON(!(p->used & BIT(i))); 81 } 82 } 83 } 84 85 #ifdef __NetBSD__ 86 87 static int 88 compare_priolists(void *cookie, const void *va, const void *vb) 89 { 90 const struct i915_priolist *a = va; 91 const struct i915_priolist *b = vb; 92 93 if (a->priority > b->priority) 94 return -1; 95 if (a->priority < b->priority) 96 return +1; 97 return 0; 98 } 99 100 static int 101 compare_priolist_key(void *cookie, const void *vp, const void *vk) 102 { 103 const struct i915_priolist *p = vp; 104 const int *priorityp = vk, priority = *priorityp; 105 106 if (p->priority > priority) 107 return -1; 108 if (p->priority < priority) 109 return +1; 110 return 0; 111 } 112 113 static const rb_tree_ops_t i915_priolist_rb_ops = { 114 .rbto_compare_nodes = compare_priolists, 115 .rbto_compare_key = compare_priolist_key, 116 .rbto_node_offset = offsetof(struct i915_priolist, node), 117 }; 118 119 #endif 120 121 void 122 i915_sched_init(struct intel_engine_execlists *execlists) 123 { 124 125 #ifdef __NetBSD__ 126 rb_tree_init(&execlists->queue.rb_root.rbr_tree, 127 &i915_priolist_rb_ops); 128 #else 129 execlists->queue = RB_ROOT_CACHED; 130 #endif 131 } 132 133 struct list_head * 134 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) 135 { 136 struct intel_engine_execlists * const execlists = &engine->execlists; 137 struct i915_priolist *p; 138 struct rb_node **parent, *rb; 139 bool first = true; 140 int idx, i; 141 142 lockdep_assert_held(&engine->active.lock); 143 assert_priolists(execlists); 144 145 /* buckets sorted from highest [in slot 0] to lowest priority */ 146 idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; 147 prio >>= I915_USER_PRIORITY_SHIFT; 148 if (unlikely(execlists->no_priolist)) 149 prio = I915_PRIORITY_NORMAL; 150 151 find_priolist: 152 #ifdef __NetBSD__ 153 /* XXX */ 154 __USE(first); 155 __USE(parent); 156 __USE(rb); 157 p = rb_tree_find_node(&execlists->queue.rb_root.rbr_tree, &prio); 158 if (p) 159 goto out; 160 #else 161 /* most positive priority is scheduled first, equal priorities fifo */ 162 rb = NULL; 163 parent = &execlists->queue.rb_root.rb_node; 164 while (*parent) { 165 rb = *parent; 166 p = to_priolist(rb); 167 if (prio > p->priority) { 168 parent = &rb->rb_left; 169 } else if (prio < p->priority) { 170 parent = &rb->rb_right; 171 first = false; 172 } else { 173 goto out; 174 } 175 } 176 #endif 177 178 if (prio == I915_PRIORITY_NORMAL) { 179 p = &execlists->default_priolist; 180 } else { 181 p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); 182 /* Convert an allocation failure to a priority bump */ 183 if (unlikely(!p)) { 184 prio = I915_PRIORITY_NORMAL; /* recurses just once */ 185 186 /* To maintain ordering with all rendering, after an 187 * allocation failure we have to disable all scheduling. 188 * Requests will then be executed in fifo, and schedule 189 * will ensure that dependencies are emitted in fifo. 190 * There will be still some reordering with existing 191 * requests, so if userspace lied about their 192 * dependencies that reordering may be visible. 193 */ 194 execlists->no_priolist = true; 195 goto find_priolist; 196 } 197 } 198 199 p->priority = prio; 200 for (i = 0; i < ARRAY_SIZE(p->requests); i++) 201 INIT_LIST_HEAD(&p->requests[i]); 202 #ifdef __NetBSD__ 203 struct i915_priolist *collision __diagused; 204 collision = rb_tree_insert_node(&execlists->queue.rb_root.rbr_tree, 205 p); 206 KASSERT(collision == p); 207 #else 208 rb_link_node(&p->node, rb, parent); 209 rb_insert_color_cached(&p->node, &execlists->queue, first); 210 #endif 211 p->used = 0; 212 213 out: 214 p->used |= BIT(idx); 215 return &p->requests[idx]; 216 } 217 218 void __i915_priolist_free(struct i915_priolist *p) 219 { 220 kmem_cache_free(global.slab_priorities, p); 221 } 222 223 struct sched_cache { 224 struct list_head *priolist; 225 }; 226 227 static struct intel_engine_cs * 228 sched_lock_engine(const struct i915_sched_node *node, 229 struct intel_engine_cs *locked, 230 struct sched_cache *cache) 231 { 232 const struct i915_request *rq = node_to_request(node); 233 struct intel_engine_cs *engine; 234 235 GEM_BUG_ON(!locked); 236 237 /* 238 * Virtual engines complicate acquiring the engine timeline lock, 239 * as their rq->engine pointer is not stable until under that 240 * engine lock. The simple ploy we use is to take the lock then 241 * check that the rq still belongs to the newly locked engine. 242 */ 243 while (locked != (engine = READ_ONCE(rq->engine))) { 244 spin_unlock(&locked->active.lock); 245 memset(cache, 0, sizeof(*cache)); 246 spin_lock(&engine->active.lock); 247 locked = engine; 248 } 249 250 GEM_BUG_ON(locked != engine); 251 return locked; 252 } 253 254 static inline int rq_prio(const struct i915_request *rq) 255 { 256 return rq->sched.attr.priority | __NO_PREEMPTION; 257 } 258 259 static inline bool need_preempt(int prio, int active) 260 { 261 /* 262 * Allow preemption of low -> normal -> high, but we do 263 * not allow low priority tasks to preempt other low priority 264 * tasks under the impression that latency for low priority 265 * tasks does not matter (as much as background throughput), 266 * so kiss. 267 */ 268 return prio >= max(I915_PRIORITY_NORMAL, active); 269 } 270 271 static void kick_submission(struct intel_engine_cs *engine, 272 const struct i915_request *rq, 273 int prio) 274 { 275 const struct i915_request *inflight; 276 277 /* 278 * We only need to kick the tasklet once for the high priority 279 * new context we add into the queue. 280 */ 281 if (prio <= engine->execlists.queue_priority_hint) 282 return; 283 284 rcu_read_lock(); 285 286 /* Nothing currently active? We're overdue for a submission! */ 287 inflight = execlists_active(&engine->execlists); 288 if (!inflight) 289 goto unlock; 290 291 /* 292 * If we are already the currently executing context, don't 293 * bother evaluating if we should preempt ourselves. 294 */ 295 if (inflight->context == rq->context) 296 goto unlock; 297 298 engine->execlists.queue_priority_hint = prio; 299 if (need_preempt(prio, rq_prio(inflight))) 300 tasklet_hi_schedule(&engine->execlists.tasklet); 301 302 unlock: 303 rcu_read_unlock(); 304 } 305 306 static void __i915_schedule(struct i915_sched_node *node, 307 const struct i915_sched_attr *attr) 308 { 309 struct intel_engine_cs *engine; 310 struct i915_dependency *dep, *p; 311 struct i915_dependency stack; 312 const int prio = attr->priority; 313 struct sched_cache cache; 314 LIST_HEAD(dfs); 315 316 /* Needed in order to use the temporary link inside i915_dependency */ 317 lockdep_assert_held(&schedule_lock); 318 GEM_BUG_ON(prio == I915_PRIORITY_INVALID); 319 320 if (prio <= READ_ONCE(node->attr.priority)) 321 return; 322 323 if (node_signaled(node)) 324 return; 325 326 stack.signaler = node; 327 list_add(&stack.dfs_link, &dfs); 328 329 /* 330 * Recursively bump all dependent priorities to match the new request. 331 * 332 * A naive approach would be to use recursion: 333 * static void update_priorities(struct i915_sched_node *node, prio) { 334 * list_for_each_entry(dep, &node->signalers_list, signal_link) 335 * update_priorities(dep->signal, prio) 336 * queue_request(node); 337 * } 338 * but that may have unlimited recursion depth and so runs a very 339 * real risk of overunning the kernel stack. Instead, we build 340 * a flat list of all dependencies starting with the current request. 341 * As we walk the list of dependencies, we add all of its dependencies 342 * to the end of the list (this may include an already visited 343 * request) and continue to walk onwards onto the new dependencies. The 344 * end result is a topological list of requests in reverse order, the 345 * last element in the list is the request we must execute first. 346 */ 347 list_for_each_entry(dep, &dfs, dfs_link) { 348 struct i915_sched_node *node = dep->signaler; 349 350 /* If we are already flying, we know we have no signalers */ 351 if (node_started(node)) 352 continue; 353 354 /* 355 * Within an engine, there can be no cycle, but we may 356 * refer to the same dependency chain multiple times 357 * (redundant dependencies are not eliminated) and across 358 * engines. 359 */ 360 list_for_each_entry(p, &node->signalers_list, signal_link) { 361 GEM_BUG_ON(p == dep); /* no cycles! */ 362 363 if (node_signaled(p->signaler)) 364 continue; 365 366 if (prio > READ_ONCE(p->signaler->attr.priority)) 367 list_move_tail(&p->dfs_link, &dfs); 368 } 369 } 370 371 /* 372 * If we didn't need to bump any existing priorities, and we haven't 373 * yet submitted this request (i.e. there is no potential race with 374 * execlists_submit_request()), we can set our own priority and skip 375 * acquiring the engine locks. 376 */ 377 if (node->attr.priority == I915_PRIORITY_INVALID) { 378 GEM_BUG_ON(!list_empty(&node->link)); 379 node->attr = *attr; 380 381 if (stack.dfs_link.next == stack.dfs_link.prev) 382 return; 383 384 __list_del_entry(&stack.dfs_link); 385 } 386 387 memset(&cache, 0, sizeof(cache)); 388 engine = node_to_request(node)->engine; 389 spin_lock(&engine->active.lock); 390 391 /* Fifo and depth-first replacement ensure our deps execute before us */ 392 engine = sched_lock_engine(node, engine, &cache); 393 list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { 394 INIT_LIST_HEAD(&dep->dfs_link); 395 396 node = dep->signaler; 397 engine = sched_lock_engine(node, engine, &cache); 398 lockdep_assert_held(&engine->active.lock); 399 400 /* Recheck after acquiring the engine->timeline.lock */ 401 if (prio <= node->attr.priority || node_signaled(node)) 402 continue; 403 404 GEM_BUG_ON(node_to_request(node)->engine != engine); 405 406 node->attr.priority = prio; 407 408 /* 409 * Once the request is ready, it will be placed into the 410 * priority lists and then onto the HW runlist. Before the 411 * request is ready, it does not contribute to our preemption 412 * decisions and we can safely ignore it, as it will, and 413 * any preemption required, be dealt with upon submission. 414 * See engine->submit_request() 415 */ 416 if (list_empty(&node->link)) 417 continue; 418 419 if (i915_request_in_priority_queue(node_to_request(node))) { 420 if (!cache.priolist) 421 cache.priolist = 422 i915_sched_lookup_priolist(engine, 423 prio); 424 list_move_tail(&node->link, cache.priolist); 425 } 426 427 /* Defer (tasklet) submission until after all of our updates. */ 428 kick_submission(engine, node_to_request(node), prio); 429 } 430 431 spin_unlock(&engine->active.lock); 432 } 433 434 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) 435 { 436 spin_lock_irq(&schedule_lock); 437 __i915_schedule(&rq->sched, attr); 438 spin_unlock_irq(&schedule_lock); 439 } 440 441 static void __bump_priority(struct i915_sched_node *node, unsigned int bump) 442 { 443 struct i915_sched_attr attr = node->attr; 444 445 attr.priority |= bump; 446 __i915_schedule(node, &attr); 447 } 448 449 void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) 450 { 451 unsigned long flags; 452 453 GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); 454 if (READ_ONCE(rq->sched.attr.priority) & bump) 455 return; 456 457 spin_lock_irqsave(&schedule_lock, flags); 458 __bump_priority(&rq->sched, bump); 459 spin_unlock_irqrestore(&schedule_lock, flags); 460 } 461 462 void i915_sched_node_init(struct i915_sched_node *node) 463 { 464 INIT_LIST_HEAD(&node->signalers_list); 465 INIT_LIST_HEAD(&node->waiters_list); 466 INIT_LIST_HEAD(&node->link); 467 468 i915_sched_node_reinit(node); 469 } 470 471 void i915_sched_node_reinit(struct i915_sched_node *node) 472 { 473 node->attr.priority = I915_PRIORITY_INVALID; 474 node->semaphores = 0; 475 node->flags = 0; 476 477 GEM_BUG_ON(!list_empty(&node->signalers_list)); 478 GEM_BUG_ON(!list_empty(&node->waiters_list)); 479 GEM_BUG_ON(!list_empty(&node->link)); 480 } 481 482 static struct i915_dependency * 483 i915_dependency_alloc(void) 484 { 485 return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL); 486 } 487 488 static void 489 i915_dependency_free(struct i915_dependency *dep) 490 { 491 kmem_cache_free(global.slab_dependencies, dep); 492 } 493 494 bool __i915_sched_node_add_dependency(struct i915_sched_node *node, 495 struct i915_sched_node *signal, 496 struct i915_dependency *dep, 497 unsigned long flags) 498 { 499 bool ret = false; 500 501 spin_lock_irq(&schedule_lock); 502 503 if (!node_signaled(signal)) { 504 INIT_LIST_HEAD(&dep->dfs_link); 505 dep->signaler = signal; 506 dep->waiter = node; 507 dep->flags = flags; 508 509 /* Keep track of whether anyone on this chain has a semaphore */ 510 if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN && 511 !node_started(signal)) 512 node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; 513 514 /* All set, now publish. Beware the lockless walkers. */ 515 list_add(&dep->signal_link, &node->signalers_list); 516 list_add_rcu(&dep->wait_link, &signal->waiters_list); 517 518 /* 519 * As we do not allow WAIT to preempt inflight requests, 520 * once we have executed a request, along with triggering 521 * any execution callbacks, we must preserve its ordering 522 * within the non-preemptible FIFO. 523 */ 524 BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); 525 if (flags & I915_DEPENDENCY_EXTERNAL) 526 __bump_priority(signal, __NO_PREEMPTION); 527 528 ret = true; 529 } 530 531 spin_unlock_irq(&schedule_lock); 532 533 return ret; 534 } 535 536 int i915_sched_node_add_dependency(struct i915_sched_node *node, 537 struct i915_sched_node *signal) 538 { 539 struct i915_dependency *dep; 540 541 dep = i915_dependency_alloc(); 542 if (!dep) 543 return -ENOMEM; 544 545 if (!__i915_sched_node_add_dependency(node, signal, dep, 546 I915_DEPENDENCY_EXTERNAL | 547 I915_DEPENDENCY_ALLOC)) 548 i915_dependency_free(dep); 549 550 return 0; 551 } 552 553 void i915_sched_node_fini(struct i915_sched_node *node) 554 { 555 struct i915_dependency *dep, *tmp; 556 557 spin_lock_irq(&schedule_lock); 558 559 /* 560 * Everyone we depended upon (the fences we wait to be signaled) 561 * should retire before us and remove themselves from our list. 562 * However, retirement is run independently on each timeline and 563 * so we may be called out-of-order. 564 */ 565 list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { 566 GEM_BUG_ON(!list_empty(&dep->dfs_link)); 567 568 list_del(&dep->wait_link); 569 if (dep->flags & I915_DEPENDENCY_ALLOC) 570 i915_dependency_free(dep); 571 } 572 INIT_LIST_HEAD(&node->signalers_list); 573 574 /* Remove ourselves from everyone who depends upon us */ 575 list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { 576 GEM_BUG_ON(dep->signaler != node); 577 GEM_BUG_ON(!list_empty(&dep->dfs_link)); 578 579 list_del(&dep->signal_link); 580 if (dep->flags & I915_DEPENDENCY_ALLOC) 581 i915_dependency_free(dep); 582 } 583 INIT_LIST_HEAD(&node->waiters_list); 584 585 spin_unlock_irq(&schedule_lock); 586 } 587 588 static void i915_global_scheduler_shrink(void) 589 { 590 kmem_cache_shrink(global.slab_dependencies); 591 kmem_cache_shrink(global.slab_priorities); 592 } 593 594 static void i915_global_scheduler_exit(void) 595 { 596 kmem_cache_destroy(global.slab_dependencies); 597 kmem_cache_destroy(global.slab_priorities); 598 } 599 600 static struct i915_global_scheduler global = { { 601 .shrink = i915_global_scheduler_shrink, 602 .exit = i915_global_scheduler_exit, 603 } }; 604 605 int __init i915_global_scheduler_init(void) 606 { 607 global.slab_dependencies = KMEM_CACHE(i915_dependency, 608 SLAB_HWCACHE_ALIGN); 609 if (!global.slab_dependencies) 610 return -ENOMEM; 611 612 global.slab_priorities = KMEM_CACHE(i915_priolist, 613 SLAB_HWCACHE_ALIGN); 614 if (!global.slab_priorities) 615 goto err_priorities; 616 617 i915_global_register(&global.base); 618 return 0; 619 620 err_priorities: 621 kmem_cache_destroy(global.slab_priorities); 622 return -ENOMEM; 623 } 624