i915_scheduler.c revision 1.2 1 /* $NetBSD: i915_scheduler.c,v 1.2 2021/12/18 23:45:28 riastradh Exp $ */
2
3 /*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright 2018 Intel Corporation
7 */
8
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: i915_scheduler.c,v 1.2 2021/12/18 23:45:28 riastradh Exp $");
11
12 #include <linux/mutex.h>
13
14 #include "i915_drv.h"
15 #include "i915_globals.h"
16 #include "i915_request.h"
17 #include "i915_scheduler.h"
18
19 static struct i915_global_scheduler {
20 struct i915_global base;
21 struct kmem_cache *slab_dependencies;
22 struct kmem_cache *slab_priorities;
23 } global;
24
25 static DEFINE_SPINLOCK(schedule_lock);
26
27 static const struct i915_request *
28 node_to_request(const struct i915_sched_node *node)
29 {
30 return container_of(node, const struct i915_request, sched);
31 }
32
33 static inline bool node_started(const struct i915_sched_node *node)
34 {
35 return i915_request_started(node_to_request(node));
36 }
37
38 static inline bool node_signaled(const struct i915_sched_node *node)
39 {
40 return i915_request_completed(node_to_request(node));
41 }
42
43 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
44 {
45 return rb_entry(rb, struct i915_priolist, node);
46 }
47
48 static void assert_priolists(struct intel_engine_execlists * const execlists)
49 {
50 struct rb_node *rb;
51 long last_prio, i;
52
53 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
54 return;
55
56 GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
57 rb_first(&execlists->queue.rb_root));
58
59 last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
60 for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
61 const struct i915_priolist *p = to_priolist(rb);
62
63 GEM_BUG_ON(p->priority >= last_prio);
64 last_prio = p->priority;
65
66 GEM_BUG_ON(!p->used);
67 for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
68 if (list_empty(&p->requests[i]))
69 continue;
70
71 GEM_BUG_ON(!(p->used & BIT(i)));
72 }
73 }
74 }
75
76 struct list_head *
77 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
78 {
79 struct intel_engine_execlists * const execlists = &engine->execlists;
80 struct i915_priolist *p;
81 struct rb_node **parent, *rb;
82 bool first = true;
83 int idx, i;
84
85 lockdep_assert_held(&engine->active.lock);
86 assert_priolists(execlists);
87
88 /* buckets sorted from highest [in slot 0] to lowest priority */
89 idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
90 prio >>= I915_USER_PRIORITY_SHIFT;
91 if (unlikely(execlists->no_priolist))
92 prio = I915_PRIORITY_NORMAL;
93
94 find_priolist:
95 /* most positive priority is scheduled first, equal priorities fifo */
96 rb = NULL;
97 parent = &execlists->queue.rb_root.rb_node;
98 while (*parent) {
99 rb = *parent;
100 p = to_priolist(rb);
101 if (prio > p->priority) {
102 parent = &rb->rb_left;
103 } else if (prio < p->priority) {
104 parent = &rb->rb_right;
105 first = false;
106 } else {
107 goto out;
108 }
109 }
110
111 if (prio == I915_PRIORITY_NORMAL) {
112 p = &execlists->default_priolist;
113 } else {
114 p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
115 /* Convert an allocation failure to a priority bump */
116 if (unlikely(!p)) {
117 prio = I915_PRIORITY_NORMAL; /* recurses just once */
118
119 /* To maintain ordering with all rendering, after an
120 * allocation failure we have to disable all scheduling.
121 * Requests will then be executed in fifo, and schedule
122 * will ensure that dependencies are emitted in fifo.
123 * There will be still some reordering with existing
124 * requests, so if userspace lied about their
125 * dependencies that reordering may be visible.
126 */
127 execlists->no_priolist = true;
128 goto find_priolist;
129 }
130 }
131
132 p->priority = prio;
133 for (i = 0; i < ARRAY_SIZE(p->requests); i++)
134 INIT_LIST_HEAD(&p->requests[i]);
135 rb_link_node(&p->node, rb, parent);
136 rb_insert_color_cached(&p->node, &execlists->queue, first);
137 p->used = 0;
138
139 out:
140 p->used |= BIT(idx);
141 return &p->requests[idx];
142 }
143
144 void __i915_priolist_free(struct i915_priolist *p)
145 {
146 kmem_cache_free(global.slab_priorities, p);
147 }
148
149 struct sched_cache {
150 struct list_head *priolist;
151 };
152
153 static struct intel_engine_cs *
154 sched_lock_engine(const struct i915_sched_node *node,
155 struct intel_engine_cs *locked,
156 struct sched_cache *cache)
157 {
158 const struct i915_request *rq = node_to_request(node);
159 struct intel_engine_cs *engine;
160
161 GEM_BUG_ON(!locked);
162
163 /*
164 * Virtual engines complicate acquiring the engine timeline lock,
165 * as their rq->engine pointer is not stable until under that
166 * engine lock. The simple ploy we use is to take the lock then
167 * check that the rq still belongs to the newly locked engine.
168 */
169 while (locked != (engine = READ_ONCE(rq->engine))) {
170 spin_unlock(&locked->active.lock);
171 memset(cache, 0, sizeof(*cache));
172 spin_lock(&engine->active.lock);
173 locked = engine;
174 }
175
176 GEM_BUG_ON(locked != engine);
177 return locked;
178 }
179
180 static inline int rq_prio(const struct i915_request *rq)
181 {
182 return rq->sched.attr.priority | __NO_PREEMPTION;
183 }
184
185 static inline bool need_preempt(int prio, int active)
186 {
187 /*
188 * Allow preemption of low -> normal -> high, but we do
189 * not allow low priority tasks to preempt other low priority
190 * tasks under the impression that latency for low priority
191 * tasks does not matter (as much as background throughput),
192 * so kiss.
193 */
194 return prio >= max(I915_PRIORITY_NORMAL, active);
195 }
196
197 static void kick_submission(struct intel_engine_cs *engine,
198 const struct i915_request *rq,
199 int prio)
200 {
201 const struct i915_request *inflight;
202
203 /*
204 * We only need to kick the tasklet once for the high priority
205 * new context we add into the queue.
206 */
207 if (prio <= engine->execlists.queue_priority_hint)
208 return;
209
210 rcu_read_lock();
211
212 /* Nothing currently active? We're overdue for a submission! */
213 inflight = execlists_active(&engine->execlists);
214 if (!inflight)
215 goto unlock;
216
217 /*
218 * If we are already the currently executing context, don't
219 * bother evaluating if we should preempt ourselves.
220 */
221 if (inflight->context == rq->context)
222 goto unlock;
223
224 engine->execlists.queue_priority_hint = prio;
225 if (need_preempt(prio, rq_prio(inflight)))
226 tasklet_hi_schedule(&engine->execlists.tasklet);
227
228 unlock:
229 rcu_read_unlock();
230 }
231
232 static void __i915_schedule(struct i915_sched_node *node,
233 const struct i915_sched_attr *attr)
234 {
235 struct intel_engine_cs *engine;
236 struct i915_dependency *dep, *p;
237 struct i915_dependency stack;
238 const int prio = attr->priority;
239 struct sched_cache cache;
240 LIST_HEAD(dfs);
241
242 /* Needed in order to use the temporary link inside i915_dependency */
243 lockdep_assert_held(&schedule_lock);
244 GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
245
246 if (prio <= READ_ONCE(node->attr.priority))
247 return;
248
249 if (node_signaled(node))
250 return;
251
252 stack.signaler = node;
253 list_add(&stack.dfs_link, &dfs);
254
255 /*
256 * Recursively bump all dependent priorities to match the new request.
257 *
258 * A naive approach would be to use recursion:
259 * static void update_priorities(struct i915_sched_node *node, prio) {
260 * list_for_each_entry(dep, &node->signalers_list, signal_link)
261 * update_priorities(dep->signal, prio)
262 * queue_request(node);
263 * }
264 * but that may have unlimited recursion depth and so runs a very
265 * real risk of overunning the kernel stack. Instead, we build
266 * a flat list of all dependencies starting with the current request.
267 * As we walk the list of dependencies, we add all of its dependencies
268 * to the end of the list (this may include an already visited
269 * request) and continue to walk onwards onto the new dependencies. The
270 * end result is a topological list of requests in reverse order, the
271 * last element in the list is the request we must execute first.
272 */
273 list_for_each_entry(dep, &dfs, dfs_link) {
274 struct i915_sched_node *node = dep->signaler;
275
276 /* If we are already flying, we know we have no signalers */
277 if (node_started(node))
278 continue;
279
280 /*
281 * Within an engine, there can be no cycle, but we may
282 * refer to the same dependency chain multiple times
283 * (redundant dependencies are not eliminated) and across
284 * engines.
285 */
286 list_for_each_entry(p, &node->signalers_list, signal_link) {
287 GEM_BUG_ON(p == dep); /* no cycles! */
288
289 if (node_signaled(p->signaler))
290 continue;
291
292 if (prio > READ_ONCE(p->signaler->attr.priority))
293 list_move_tail(&p->dfs_link, &dfs);
294 }
295 }
296
297 /*
298 * If we didn't need to bump any existing priorities, and we haven't
299 * yet submitted this request (i.e. there is no potential race with
300 * execlists_submit_request()), we can set our own priority and skip
301 * acquiring the engine locks.
302 */
303 if (node->attr.priority == I915_PRIORITY_INVALID) {
304 GEM_BUG_ON(!list_empty(&node->link));
305 node->attr = *attr;
306
307 if (stack.dfs_link.next == stack.dfs_link.prev)
308 return;
309
310 __list_del_entry(&stack.dfs_link);
311 }
312
313 memset(&cache, 0, sizeof(cache));
314 engine = node_to_request(node)->engine;
315 spin_lock(&engine->active.lock);
316
317 /* Fifo and depth-first replacement ensure our deps execute before us */
318 engine = sched_lock_engine(node, engine, &cache);
319 list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
320 INIT_LIST_HEAD(&dep->dfs_link);
321
322 node = dep->signaler;
323 engine = sched_lock_engine(node, engine, &cache);
324 lockdep_assert_held(&engine->active.lock);
325
326 /* Recheck after acquiring the engine->timeline.lock */
327 if (prio <= node->attr.priority || node_signaled(node))
328 continue;
329
330 GEM_BUG_ON(node_to_request(node)->engine != engine);
331
332 node->attr.priority = prio;
333
334 /*
335 * Once the request is ready, it will be placed into the
336 * priority lists and then onto the HW runlist. Before the
337 * request is ready, it does not contribute to our preemption
338 * decisions and we can safely ignore it, as it will, and
339 * any preemption required, be dealt with upon submission.
340 * See engine->submit_request()
341 */
342 if (list_empty(&node->link))
343 continue;
344
345 if (i915_request_in_priority_queue(node_to_request(node))) {
346 if (!cache.priolist)
347 cache.priolist =
348 i915_sched_lookup_priolist(engine,
349 prio);
350 list_move_tail(&node->link, cache.priolist);
351 }
352
353 /* Defer (tasklet) submission until after all of our updates. */
354 kick_submission(engine, node_to_request(node), prio);
355 }
356
357 spin_unlock(&engine->active.lock);
358 }
359
360 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
361 {
362 spin_lock_irq(&schedule_lock);
363 __i915_schedule(&rq->sched, attr);
364 spin_unlock_irq(&schedule_lock);
365 }
366
367 static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
368 {
369 struct i915_sched_attr attr = node->attr;
370
371 attr.priority |= bump;
372 __i915_schedule(node, &attr);
373 }
374
375 void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
376 {
377 unsigned long flags;
378
379 GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
380 if (READ_ONCE(rq->sched.attr.priority) & bump)
381 return;
382
383 spin_lock_irqsave(&schedule_lock, flags);
384 __bump_priority(&rq->sched, bump);
385 spin_unlock_irqrestore(&schedule_lock, flags);
386 }
387
388 void i915_sched_node_init(struct i915_sched_node *node)
389 {
390 INIT_LIST_HEAD(&node->signalers_list);
391 INIT_LIST_HEAD(&node->waiters_list);
392 INIT_LIST_HEAD(&node->link);
393
394 i915_sched_node_reinit(node);
395 }
396
397 void i915_sched_node_reinit(struct i915_sched_node *node)
398 {
399 node->attr.priority = I915_PRIORITY_INVALID;
400 node->semaphores = 0;
401 node->flags = 0;
402
403 GEM_BUG_ON(!list_empty(&node->signalers_list));
404 GEM_BUG_ON(!list_empty(&node->waiters_list));
405 GEM_BUG_ON(!list_empty(&node->link));
406 }
407
408 static struct i915_dependency *
409 i915_dependency_alloc(void)
410 {
411 return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
412 }
413
414 static void
415 i915_dependency_free(struct i915_dependency *dep)
416 {
417 kmem_cache_free(global.slab_dependencies, dep);
418 }
419
420 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
421 struct i915_sched_node *signal,
422 struct i915_dependency *dep,
423 unsigned long flags)
424 {
425 bool ret = false;
426
427 spin_lock_irq(&schedule_lock);
428
429 if (!node_signaled(signal)) {
430 INIT_LIST_HEAD(&dep->dfs_link);
431 dep->signaler = signal;
432 dep->waiter = node;
433 dep->flags = flags;
434
435 /* Keep track of whether anyone on this chain has a semaphore */
436 if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
437 !node_started(signal))
438 node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
439
440 /* All set, now publish. Beware the lockless walkers. */
441 list_add(&dep->signal_link, &node->signalers_list);
442 list_add_rcu(&dep->wait_link, &signal->waiters_list);
443
444 /*
445 * As we do not allow WAIT to preempt inflight requests,
446 * once we have executed a request, along with triggering
447 * any execution callbacks, we must preserve its ordering
448 * within the non-preemptible FIFO.
449 */
450 BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
451 if (flags & I915_DEPENDENCY_EXTERNAL)
452 __bump_priority(signal, __NO_PREEMPTION);
453
454 ret = true;
455 }
456
457 spin_unlock_irq(&schedule_lock);
458
459 return ret;
460 }
461
462 int i915_sched_node_add_dependency(struct i915_sched_node *node,
463 struct i915_sched_node *signal)
464 {
465 struct i915_dependency *dep;
466
467 dep = i915_dependency_alloc();
468 if (!dep)
469 return -ENOMEM;
470
471 if (!__i915_sched_node_add_dependency(node, signal, dep,
472 I915_DEPENDENCY_EXTERNAL |
473 I915_DEPENDENCY_ALLOC))
474 i915_dependency_free(dep);
475
476 return 0;
477 }
478
479 void i915_sched_node_fini(struct i915_sched_node *node)
480 {
481 struct i915_dependency *dep, *tmp;
482
483 spin_lock_irq(&schedule_lock);
484
485 /*
486 * Everyone we depended upon (the fences we wait to be signaled)
487 * should retire before us and remove themselves from our list.
488 * However, retirement is run independently on each timeline and
489 * so we may be called out-of-order.
490 */
491 list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
492 GEM_BUG_ON(!list_empty(&dep->dfs_link));
493
494 list_del(&dep->wait_link);
495 if (dep->flags & I915_DEPENDENCY_ALLOC)
496 i915_dependency_free(dep);
497 }
498 INIT_LIST_HEAD(&node->signalers_list);
499
500 /* Remove ourselves from everyone who depends upon us */
501 list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
502 GEM_BUG_ON(dep->signaler != node);
503 GEM_BUG_ON(!list_empty(&dep->dfs_link));
504
505 list_del(&dep->signal_link);
506 if (dep->flags & I915_DEPENDENCY_ALLOC)
507 i915_dependency_free(dep);
508 }
509 INIT_LIST_HEAD(&node->waiters_list);
510
511 spin_unlock_irq(&schedule_lock);
512 }
513
514 static void i915_global_scheduler_shrink(void)
515 {
516 kmem_cache_shrink(global.slab_dependencies);
517 kmem_cache_shrink(global.slab_priorities);
518 }
519
520 static void i915_global_scheduler_exit(void)
521 {
522 kmem_cache_destroy(global.slab_dependencies);
523 kmem_cache_destroy(global.slab_priorities);
524 }
525
526 static struct i915_global_scheduler global = { {
527 .shrink = i915_global_scheduler_shrink,
528 .exit = i915_global_scheduler_exit,
529 } };
530
531 int __init i915_global_scheduler_init(void)
532 {
533 global.slab_dependencies = KMEM_CACHE(i915_dependency,
534 SLAB_HWCACHE_ALIGN);
535 if (!global.slab_dependencies)
536 return -ENOMEM;
537
538 global.slab_priorities = KMEM_CACHE(i915_priolist,
539 SLAB_HWCACHE_ALIGN);
540 if (!global.slab_priorities)
541 goto err_priorities;
542
543 i915_global_register(&global.base);
544 return 0;
545
546 err_priorities:
547 kmem_cache_destroy(global.slab_priorities);
548 return -ENOMEM;
549 }
550