scheduler.c revision 1.50 1 /* $NetBSD: scheduler.c,v 1.50 2020/02/15 18:12:15 ad Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.50 2020/02/15 18:12:15 ad Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/cpu.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/select.h>
39 #include <sys/systm.h>
40
41 #include <rump-sys/kern.h>
42
43 #include <rump/rumpuser.h>
44
45 static struct rumpcpu {
46 /* needed in fastpath */
47 struct cpu_info *rcpu_ci;
48 void *rcpu_prevlwp;
49
50 /* needed in slowpath */
51 struct rumpuser_mtx *rcpu_mtx;
52 struct rumpuser_cv *rcpu_cv;
53 int rcpu_wanted;
54
55 /* offset 20 (P=4) or 36 (P=8) here */
56
57 /*
58 * Some stats. Not really that necessary, but we should
59 * have room. Note that these overflow quite fast, so need
60 * to be collected often.
61 */
62 unsigned int rcpu_fastpath;
63 unsigned int rcpu_slowpath;
64 unsigned int rcpu_migrated;
65
66 /* offset 32 (P=4) or 50 (P=8) */
67
68 int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
69 } rcpu_storage[MAXCPUS];
70
71 static inline struct rumpcpu *
72 cpuinfo_to_rumpcpu(struct cpu_info *ci)
73 {
74
75 return &rcpu_storage[cpu_index(ci)];
76 }
77
78 struct cpu_info rump_bootcpu;
79
80 #define RCPULWP_BUSY ((void *)-1)
81 #define RCPULWP_WANTED ((void *)-2)
82
83 static struct rumpuser_mtx *lwp0mtx;
84 static struct rumpuser_cv *lwp0cv;
85 static unsigned nextcpu;
86
87 kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */
88
89 static bool lwp0isbusy = false;
90
91 /*
92 * Keep some stats.
93 *
94 * Keeping track of there is not really critical for speed, unless
95 * stats happen to be on a different cache line (CACHE_LINE_SIZE is
96 * really just a coarse estimate), so default for the performant case
97 * (i.e. no stats).
98 */
99 #ifdef RUMPSCHED_STATS
100 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
101 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
102 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
103 #else
104 #define SCHED_FASTPATH(rcpu)
105 #define SCHED_SLOWPATH(rcpu)
106 #define SCHED_MIGRATED(rcpu)
107 #endif
108
109 struct cpu_info *
110 cpu_lookup(u_int index)
111 {
112
113 return rcpu_storage[index].rcpu_ci;
114 }
115
116 static inline struct rumpcpu *
117 getnextcpu(void)
118 {
119 unsigned newcpu;
120
121 newcpu = atomic_inc_uint_nv(&nextcpu);
122 if (__predict_false(ncpu > UINT_MAX/2))
123 atomic_and_uint(&nextcpu, 0);
124 newcpu = newcpu % ncpu;
125
126 return &rcpu_storage[newcpu];
127 }
128
129 /* this could/should be mi_attach_cpu? */
130 void
131 rump_cpus_bootstrap(int *nump)
132 {
133 int num = *nump;
134
135 if (num > MAXCPUS) {
136 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) "
137 "available (adjusted)\n", num, MAXCPUS);
138 num = MAXCPUS;
139 }
140
141 cpu_setmodel("rumpcore (virtual)");
142
143 mi_cpu_init();
144
145 /* attach first cpu for bootstrap */
146 rump_cpu_attach(&rump_bootcpu);
147 ncpu = 1;
148 *nump = num;
149 }
150
151 void
152 rump_scheduler_init(int numcpu)
153 {
154 struct rumpcpu *rcpu;
155 struct cpu_info *ci;
156 int i;
157
158 rumpuser_mutex_init(&lwp0mtx, RUMPUSER_MTX_SPIN);
159 rumpuser_cv_init(&lwp0cv);
160 for (i = 0; i < numcpu; i++) {
161 if (i == 0) {
162 ci = &rump_bootcpu;
163 } else {
164 ci = kmem_zalloc(sizeof(*ci), KM_SLEEP);
165 ci->ci_index = i;
166 }
167
168 rcpu = &rcpu_storage[i];
169 rcpu->rcpu_ci = ci;
170 rcpu->rcpu_wanted = 0;
171 rumpuser_cv_init(&rcpu->rcpu_cv);
172 rumpuser_mutex_init(&rcpu->rcpu_mtx, RUMPUSER_MTX_SPIN);
173
174 ci->ci_schedstate.spc_mutex =
175 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
176 ci->ci_schedstate.spc_flags = SPCF_RUNNING;
177 }
178
179 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_SCHED);
180 }
181
182 /*
183 * condvar ops using scheduler lock as the rumpuser interlock.
184 */
185 void
186 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
187 {
188 struct lwp *l = curlwp;
189 struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu);
190
191 /* mutex will be taken and released in cpu schedule/unschedule */
192 rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
193 }
194
195 int
196 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
197 {
198 struct lwp *l = curlwp;
199 struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu);
200
201 /* mutex will be taken and released in cpu schedule/unschedule */
202 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
203 ts->tv_sec, ts->tv_nsec);
204 }
205
206 static void
207 lwp0busy(void)
208 {
209
210 /* busy lwp0 */
211 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC);
212 rumpuser_mutex_enter_nowrap(lwp0mtx);
213 while (lwp0isbusy)
214 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
215 lwp0isbusy = true;
216 rumpuser_mutex_exit(lwp0mtx);
217 }
218
219 static void
220 lwp0rele(void)
221 {
222
223 rumpuser_mutex_enter_nowrap(lwp0mtx);
224 KASSERT(lwp0isbusy == true);
225 lwp0isbusy = false;
226 rumpuser_cv_signal(lwp0cv);
227 rumpuser_mutex_exit(lwp0mtx);
228 }
229
230 /*
231 * rump_schedule: ensure that the calling host thread has a valid lwp context.
232 * ie. ensure that curlwp != NULL. Also, ensure that there
233 * a 1:1 mapping between the lwp and rump kernel cpu.
234 */
235 void
236 rump_schedule()
237 {
238 struct lwp *l;
239
240 /*
241 * If there is no dedicated lwp, allocate a temp one and
242 * set it to be free'd upon unschedule(). Use lwp0 context
243 * for reserving the necessary resources. Don't optimize
244 * for this case -- anyone who cares about performance will
245 * start a real thread.
246 */
247 if (__predict_true((l = curlwp) != NULL)) {
248 rump_schedule_cpu(l);
249 LWP_CACHE_CREDS(l, l->l_proc);
250 } else {
251 lwp0busy();
252
253 /* schedule cpu and use lwp0 */
254 rump_schedule_cpu(&lwp0);
255 rump_lwproc_curlwp_set(&lwp0);
256
257 /* allocate thread, switch to it, and release lwp0 */
258 l = rump__lwproc_alloclwp(initproc);
259 rump_lwproc_switch(l);
260 lwp0rele();
261
262 /*
263 * mark new thread dead-on-unschedule. this
264 * means that we'll be running with l_refcnt == 0.
265 * relax, it's fine.
266 */
267 rump_lwproc_releaselwp();
268 }
269 }
270
271 void
272 rump_schedule_cpu(struct lwp *l)
273 {
274
275 rump_schedule_cpu_interlock(l, NULL);
276 }
277
278 /*
279 * Schedule a CPU. This optimizes for the case where we schedule
280 * the same thread often, and we have nCPU >= nFrequently-Running-Thread
281 * (where CPU is virtual rump cpu, not host CPU).
282 */
283 void
284 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
285 {
286 struct rumpcpu *rcpu;
287 struct cpu_info *ci;
288 void *old;
289 bool domigrate;
290 bool bound = l->l_pflag & LP_BOUND;
291
292 l->l_stat = LSRUN;
293
294 /*
295 * First, try fastpath: if we were the previous user of the
296 * CPU, everything is in order cachewise and we can just
297 * proceed to use it.
298 *
299 * If we are a different thread (i.e. CAS fails), we must go
300 * through a memory barrier to ensure we get a truthful
301 * view of the world.
302 */
303
304 KASSERT(l->l_target_cpu != NULL);
305 rcpu = cpuinfo_to_rumpcpu(l->l_target_cpu);
306 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
307 if (interlock == rcpu->rcpu_mtx)
308 rumpuser_mutex_exit(rcpu->rcpu_mtx);
309 SCHED_FASTPATH(rcpu);
310 /* jones, you're the man */
311 goto fastlane;
312 }
313
314 /*
315 * Else, it's the slowpath for us. First, determine if we
316 * can migrate.
317 */
318 if (ncpu == 1)
319 domigrate = false;
320 else
321 domigrate = true;
322
323 /* Take lock. This acts as a load barrier too. */
324 if (interlock != rcpu->rcpu_mtx)
325 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
326
327 for (;;) {
328 SCHED_SLOWPATH(rcpu);
329 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
330
331 /* CPU is free? */
332 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
333 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
334 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
335 break;
336 }
337 }
338
339 /*
340 * Do we want to migrate once?
341 * This may need a slightly better algorithm, or we
342 * might cache pingpong eternally for non-frequent
343 * threads.
344 */
345 if (domigrate && !bound) {
346 domigrate = false;
347 SCHED_MIGRATED(rcpu);
348 rumpuser_mutex_exit(rcpu->rcpu_mtx);
349 rcpu = getnextcpu();
350 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
351 continue;
352 }
353
354 /* Want CPU, wait until it's released an retry */
355 rcpu->rcpu_wanted++;
356 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
357 rcpu->rcpu_wanted--;
358 }
359 rumpuser_mutex_exit(rcpu->rcpu_mtx);
360
361 fastlane:
362 ci = rcpu->rcpu_ci;
363 l->l_cpu = l->l_target_cpu = ci;
364 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
365 l->l_ncsw++;
366 l->l_stat = LSONPROC;
367
368 /*
369 * No interrupts, so ci_curlwp === cpu_onproc.
370 * Okay, we could make an attempt to not set cpu_onproc
371 * in the case that an interrupt is scheduled immediately
372 * after a user proc, but leave that for later.
373 */
374 ci->ci_curlwp = ci->ci_onproc = l;
375 }
376
377 void
378 rump_unschedule()
379 {
380 struct lwp *l = curlwp;
381 #ifdef DIAGNOSTIC
382 int nlock;
383
384 KERNEL_UNLOCK_ALL(l, &nlock);
385 KASSERT(nlock == 0);
386 #endif
387
388 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
389 rump_unschedule_cpu(l);
390 l->l_mutex = &unruntime_lock;
391 l->l_stat = LSSTOP;
392
393 /*
394 * Check special conditions:
395 * 1) do we need to free the lwp which just unscheduled?
396 * (locking order: lwp0, cpu)
397 * 2) do we want to clear curlwp for the current host thread
398 */
399 if (__predict_false(l->l_flag & LW_WEXIT)) {
400 lwp0busy();
401
402 /* Now that we have lwp0, we can schedule a CPU again */
403 rump_schedule_cpu(l);
404
405 /* switch to lwp0. this frees the old thread */
406 KASSERT(l->l_flag & LW_WEXIT);
407 rump_lwproc_switch(&lwp0);
408
409 /* release lwp0 */
410 rump_unschedule_cpu(&lwp0);
411 lwp0.l_mutex = &unruntime_lock;
412 lwp0.l_pflag &= ~LP_RUNNING;
413 lwp0rele();
414 rump_lwproc_curlwp_clear(&lwp0);
415
416 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
417 rump_lwproc_curlwp_clear(l);
418 l->l_flag &= ~LW_RUMP_CLEAR;
419 }
420 }
421
422 void
423 rump_unschedule_cpu(struct lwp *l)
424 {
425
426 rump_unschedule_cpu_interlock(l, NULL);
427 }
428
429 void
430 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
431 {
432
433 if ((l->l_pflag & LP_INTR) == 0)
434 rump_softint_run(l->l_cpu);
435 rump_unschedule_cpu1(l, interlock);
436 }
437
438 void
439 rump_unschedule_cpu1(struct lwp *l, void *interlock)
440 {
441 struct rumpcpu *rcpu;
442 struct cpu_info *ci;
443 void *old;
444
445 ci = l->l_cpu;
446 ci->ci_curlwp = ci->ci_onproc = NULL;
447 rcpu = cpuinfo_to_rumpcpu(ci);
448
449 KASSERT(rcpu->rcpu_ci == ci);
450
451 /*
452 * Make sure all stores are seen before the CPU release. This
453 * is relevant only in the non-fastpath scheduling case, but
454 * we don't know here if that's going to happen, so need to
455 * expect the worst.
456 *
457 * If the scheduler interlock was requested by the caller, we
458 * need to obtain it before we release the CPU. Otherwise, we risk a
459 * race condition where another thread is scheduled onto the
460 * rump kernel CPU before our current thread can
461 * grab the interlock.
462 */
463 if (interlock == rcpu->rcpu_mtx)
464 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
465 else
466 membar_exit();
467
468 /* Release the CPU. */
469 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
470
471 /* No waiters? No problems. We're outta here. */
472 if (old == RCPULWP_BUSY) {
473 return;
474 }
475
476 KASSERT(old == RCPULWP_WANTED);
477
478 /*
479 * Ok, things weren't so snappy.
480 *
481 * Snailpath: take lock and signal anyone waiting for this CPU.
482 */
483
484 if (interlock != rcpu->rcpu_mtx)
485 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
486 if (rcpu->rcpu_wanted)
487 rumpuser_cv_broadcast(rcpu->rcpu_cv);
488 if (interlock != rcpu->rcpu_mtx)
489 rumpuser_mutex_exit(rcpu->rcpu_mtx);
490 }
491
492 /* Give up and retake CPU (perhaps a different one) */
493 void
494 yield()
495 {
496 struct lwp *l = curlwp;
497 int nlocks;
498
499 KERNEL_UNLOCK_ALL(l, &nlocks);
500 rump_unschedule_cpu(l);
501 rump_schedule_cpu(l);
502 KERNEL_LOCK(nlocks, l);
503 }
504
505 void
506 preempt()
507 {
508
509 yield();
510 }
511
512 bool
513 kpreempt(uintptr_t where)
514 {
515
516 return false;
517 }
518
519 /*
520 * There is no kernel thread preemption in rump currently. But call
521 * the implementing macros anyway in case they grow some side-effects
522 * down the road.
523 */
524 void
525 kpreempt_disable(void)
526 {
527
528 KPREEMPT_DISABLE(curlwp);
529 }
530
531 void
532 kpreempt_enable(void)
533 {
534
535 KPREEMPT_ENABLE(curlwp);
536 }
537
538 bool
539 kpreempt_disabled(void)
540 {
541 #if 0
542 const lwp_t *l = curlwp;
543
544 return l->l_nopreempt != 0 || l->l_stat == LSZOMB ||
545 (l->l_flag & LW_IDLE) != 0 || cpu_kpreempt_disabled();
546 #endif
547 /* XXX: emulate cpu_kpreempt_disabled() */
548 return true;
549 }
550
551 void
552 suspendsched(void)
553 {
554
555 /*
556 * Could wait until everyone is out and block further entries,
557 * but skip that for now.
558 */
559 }
560
561 void
562 sched_nice(struct proc *p, int level)
563 {
564
565 /* nothing to do for now */
566 }
567
568 void
569 setrunnable(struct lwp *l)
570 {
571
572 sched_enqueue(l);
573 }
574
575 void
576 sched_enqueue(struct lwp *l)
577 {
578
579 rump_thread_allow(l);
580 }
581
582 void
583 sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock)
584 {
585
586 }
587
588 void
589 sched_resched_lwp(struct lwp *l, bool unlock)
590 {
591
592 }
593
594 void
595 sched_dequeue(struct lwp *l)
596 {
597
598 panic("sched_dequeue not implemented");
599 }
600