scheduler.c revision 1.31 1 /* $NetBSD: scheduler.c,v 1.31 2013/04/27 16:32:57 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.31 2013/04/27 16:32:57 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/cpu.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/select.h>
39 #include <sys/systm.h>
40
41 #include <rump/rumpuser.h>
42
43 #include "rump_private.h"
44
45 static struct cpu_info rump_cpus[MAXCPUS];
46 static struct rumpcpu {
47 /* needed in fastpath */
48 struct cpu_info *rcpu_ci;
49 void *rcpu_prevlwp;
50
51 /* needed in slowpath */
52 struct rumpuser_mtx *rcpu_mtx;
53 struct rumpuser_cv *rcpu_cv;
54 int rcpu_wanted;
55
56 /* offset 20 (P=4) or 36 (P=8) here */
57
58 /*
59 * Some stats. Not really that necessary, but we should
60 * have room. Note that these overflow quite fast, so need
61 * to be collected often.
62 */
63 unsigned int rcpu_fastpath;
64 unsigned int rcpu_slowpath;
65 unsigned int rcpu_migrated;
66
67 /* offset 32 (P=4) or 50 (P=8) */
68
69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
70 } rcpu_storage[MAXCPUS];
71
72 struct cpu_info *rump_cpu = &rump_cpus[0];
73 kcpuset_t *kcpuset_attached = NULL;
74 kcpuset_t *kcpuset_running = NULL;
75 int ncpu;
76
77 #define RCPULWP_BUSY ((void *)-1)
78 #define RCPULWP_WANTED ((void *)-2)
79
80 static struct rumpuser_mtx *lwp0mtx;
81 static struct rumpuser_cv *lwp0cv;
82 static unsigned nextcpu;
83
84 kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */
85
86 static bool lwp0isbusy = false;
87
88 /*
89 * Keep some stats.
90 *
91 * Keeping track of there is not really critical for speed, unless
92 * stats happen to be on a different cache line (CACHE_LINE_SIZE is
93 * really just a coarse estimate), so default for the performant case
94 * (i.e. no stats).
95 */
96 #ifdef RUMPSCHED_STATS
97 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
98 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
99 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
100 #else
101 #define SCHED_FASTPATH(rcpu)
102 #define SCHED_SLOWPATH(rcpu)
103 #define SCHED_MIGRATED(rcpu)
104 #endif
105
106 struct cpu_info *
107 cpu_lookup(u_int index)
108 {
109
110 return &rump_cpus[index];
111 }
112
113 static inline struct rumpcpu *
114 getnextcpu(void)
115 {
116 unsigned newcpu;
117
118 newcpu = atomic_inc_uint_nv(&nextcpu);
119 if (__predict_false(ncpu > UINT_MAX/2))
120 atomic_and_uint(&nextcpu, 0);
121 newcpu = newcpu % ncpu;
122
123 return &rcpu_storage[newcpu];
124 }
125
126 /* this could/should be mi_attach_cpu? */
127 void
128 rump_cpus_bootstrap(int *nump)
129 {
130 struct cpu_info *ci;
131 int num = *nump;
132 int i;
133
134 if (num > MAXCPUS) {
135 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) "
136 "available (adjusted)\n", num, MAXCPUS);
137 num = MAXCPUS;
138 }
139
140 for (i = 0; i < num; i++) {
141 ci = &rump_cpus[i];
142 ci->ci_index = i;
143 }
144
145 kcpuset_create(&kcpuset_attached, true);
146 kcpuset_create(&kcpuset_running, true);
147
148 /* attach first cpu for bootstrap */
149 rump_cpu_attach(&rump_cpus[0]);
150 ncpu = 1;
151 *nump = num;
152 }
153
154 void
155 rump_scheduler_init(int numcpu)
156 {
157 struct rumpcpu *rcpu;
158 struct cpu_info *ci;
159 int i;
160
161 rumpuser_mutex_init(&lwp0mtx, RUMPUSER_MTX_SPIN);
162 rumpuser_cv_init(&lwp0cv);
163 for (i = 0; i < numcpu; i++) {
164 rcpu = &rcpu_storage[i];
165 ci = &rump_cpus[i];
166 rcpu->rcpu_ci = ci;
167 ci->ci_schedstate.spc_mutex =
168 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
169 ci->ci_schedstate.spc_flags = SPCF_RUNNING;
170 rcpu->rcpu_wanted = 0;
171 rumpuser_cv_init(&rcpu->rcpu_cv);
172 rumpuser_mutex_init(&rcpu->rcpu_mtx, RUMPUSER_MTX_SPIN);
173 }
174
175 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_NONE);
176 }
177
178 /*
179 * condvar ops using scheduler lock as the rumpuser interlock.
180 */
181 void
182 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
183 {
184 struct lwp *l = curlwp;
185 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
186
187 /* mutex will be taken and released in cpu schedule/unschedule */
188 rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
189 }
190
191 int
192 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
193 {
194 struct lwp *l = curlwp;
195 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
196
197 /* mutex will be taken and released in cpu schedule/unschedule */
198 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
199 ts->tv_sec, ts->tv_nsec);
200 }
201
202 static void
203 lwp0busy(void)
204 {
205
206 /* busy lwp0 */
207 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC);
208 rumpuser_mutex_enter_nowrap(lwp0mtx);
209 while (lwp0isbusy)
210 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
211 lwp0isbusy = true;
212 rumpuser_mutex_exit(lwp0mtx);
213 }
214
215 static void
216 lwp0rele(void)
217 {
218
219 rumpuser_mutex_enter_nowrap(lwp0mtx);
220 KASSERT(lwp0isbusy == true);
221 lwp0isbusy = false;
222 rumpuser_cv_signal(lwp0cv);
223 rumpuser_mutex_exit(lwp0mtx);
224 }
225
226 /*
227 * rump_schedule: ensure that the calling host thread has a valid lwp context.
228 * ie. ensure that rumpuser_get_curlwp() != NULL.
229 */
230 void
231 rump_schedule()
232 {
233 struct lwp *l;
234
235 /*
236 * If there is no dedicated lwp, allocate a temp one and
237 * set it to be free'd upon unschedule(). Use lwp0 context
238 * for reserving the necessary resources. Don't optimize
239 * for this case -- anyone who cares about performance will
240 * start a real thread.
241 */
242 if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) {
243 rump_schedule_cpu(l);
244 LWP_CACHE_CREDS(l, l->l_proc);
245 } else {
246 lwp0busy();
247
248 /* schedule cpu and use lwp0 */
249 rump_schedule_cpu(&lwp0);
250 rumpuser_set_curlwp(&lwp0);
251
252 /* allocate thread, switch to it, and release lwp0 */
253 l = rump__lwproc_alloclwp(initproc);
254 rump_lwproc_switch(l);
255 lwp0rele();
256
257 /*
258 * mark new thread dead-on-unschedule. this
259 * means that we'll be running with l_refcnt == 0.
260 * relax, it's fine.
261 */
262 rump_lwproc_releaselwp();
263 }
264 }
265
266 void
267 rump_schedule_cpu(struct lwp *l)
268 {
269
270 rump_schedule_cpu_interlock(l, NULL);
271 }
272
273 /*
274 * Schedule a CPU. This optimizes for the case where we schedule
275 * the same thread often, and we have nCPU >= nFrequently-Running-Thread
276 * (where CPU is virtual rump cpu, not host CPU).
277 */
278 void
279 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
280 {
281 struct rumpcpu *rcpu;
282 void *old;
283 bool domigrate;
284 bool bound = l->l_pflag & LP_BOUND;
285
286 l->l_stat = LSRUN;
287
288 /*
289 * First, try fastpath: if we were the previous user of the
290 * CPU, everything is in order cachewise and we can just
291 * proceed to use it.
292 *
293 * If we are a different thread (i.e. CAS fails), we must go
294 * through a memory barrier to ensure we get a truthful
295 * view of the world.
296 */
297
298 KASSERT(l->l_target_cpu != NULL);
299 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]];
300 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
301 if (interlock == rcpu->rcpu_mtx)
302 rumpuser_mutex_exit(rcpu->rcpu_mtx);
303 SCHED_FASTPATH(rcpu);
304 /* jones, you're the man */
305 goto fastlane;
306 }
307
308 /*
309 * Else, it's the slowpath for us. First, determine if we
310 * can migrate.
311 */
312 if (ncpu == 1)
313 domigrate = false;
314 else
315 domigrate = true;
316
317 /* Take lock. This acts as a load barrier too. */
318 if (interlock != rcpu->rcpu_mtx)
319 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
320
321 for (;;) {
322 SCHED_SLOWPATH(rcpu);
323 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
324
325 /* CPU is free? */
326 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
327 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
328 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
329 break;
330 }
331 }
332
333 /*
334 * Do we want to migrate once?
335 * This may need a slightly better algorithm, or we
336 * might cache pingpong eternally for non-frequent
337 * threads.
338 */
339 if (domigrate && !bound) {
340 domigrate = false;
341 SCHED_MIGRATED(rcpu);
342 rumpuser_mutex_exit(rcpu->rcpu_mtx);
343 rcpu = getnextcpu();
344 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
345 continue;
346 }
347
348 /* Want CPU, wait until it's released an retry */
349 rcpu->rcpu_wanted++;
350 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
351 rcpu->rcpu_wanted--;
352 }
353 rumpuser_mutex_exit(rcpu->rcpu_mtx);
354
355 fastlane:
356 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci;
357 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
358 l->l_ncsw++;
359 l->l_stat = LSONPROC;
360
361 rcpu->rcpu_ci->ci_curlwp = l;
362 }
363
364 void
365 rump_unschedule()
366 {
367 struct lwp *l = rumpuser_get_curlwp();
368 #ifdef DIAGNOSTIC
369 int nlock;
370
371 KERNEL_UNLOCK_ALL(l, &nlock);
372 KASSERT(nlock == 0);
373 #endif
374
375 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
376 rump_unschedule_cpu(l);
377 l->l_mutex = &unruntime_lock;
378 l->l_stat = LSSTOP;
379
380 /*
381 * Check special conditions:
382 * 1) do we need to free the lwp which just unscheduled?
383 * (locking order: lwp0, cpu)
384 * 2) do we want to clear curlwp for the current host thread
385 */
386 if (__predict_false(l->l_flag & LW_WEXIT)) {
387 lwp0busy();
388
389 /* Now that we have lwp0, we can schedule a CPU again */
390 rump_schedule_cpu(l);
391
392 /* switch to lwp0. this frees the old thread */
393 KASSERT(l->l_flag & LW_WEXIT);
394 rump_lwproc_switch(&lwp0);
395
396 /* release lwp0 */
397 rump_unschedule_cpu(&lwp0);
398 lwp0.l_mutex = &unruntime_lock;
399 lwp0.l_pflag &= ~LP_RUNNING;
400 lwp0rele();
401 rumpuser_set_curlwp(NULL);
402
403 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
404 rumpuser_set_curlwp(NULL);
405 l->l_flag &= ~LW_RUMP_CLEAR;
406 }
407 }
408
409 void
410 rump_unschedule_cpu(struct lwp *l)
411 {
412
413 rump_unschedule_cpu_interlock(l, NULL);
414 }
415
416 void
417 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
418 {
419
420 if ((l->l_pflag & LP_INTR) == 0)
421 rump_softint_run(l->l_cpu);
422 rump_unschedule_cpu1(l, interlock);
423 }
424
425 void
426 rump_unschedule_cpu1(struct lwp *l, void *interlock)
427 {
428 struct rumpcpu *rcpu;
429 struct cpu_info *ci;
430 void *old;
431
432 ci = l->l_cpu;
433 ci->ci_curlwp = NULL;
434 rcpu = &rcpu_storage[ci-&rump_cpus[0]];
435
436 KASSERT(rcpu->rcpu_ci == ci);
437
438 /*
439 * Make sure all stores are seen before the CPU release. This
440 * is relevant only in the non-fastpath scheduling case, but
441 * we don't know here if that's going to happen, so need to
442 * expect the worst.
443 *
444 * If the scheduler interlock was requested by the caller, we
445 * need to obtain it before we release the CPU. Otherwise, we risk a
446 * race condition where another thread is scheduled onto the
447 * rump kernel CPU before our current thread can
448 * grab the interlock.
449 */
450 if (interlock == rcpu->rcpu_mtx)
451 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
452 else
453 membar_exit();
454
455 /* Release the CPU. */
456 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
457
458 /* No waiters? No problems. We're outta here. */
459 if (old == RCPULWP_BUSY) {
460 return;
461 }
462
463 KASSERT(old == RCPULWP_WANTED);
464
465 /*
466 * Ok, things weren't so snappy.
467 *
468 * Snailpath: take lock and signal anyone waiting for this CPU.
469 */
470
471 if (interlock != rcpu->rcpu_mtx)
472 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
473 if (rcpu->rcpu_wanted)
474 rumpuser_cv_broadcast(rcpu->rcpu_cv);
475 if (interlock != rcpu->rcpu_mtx)
476 rumpuser_mutex_exit(rcpu->rcpu_mtx);
477 }
478
479 /* Give up and retake CPU (perhaps a different one) */
480 void
481 yield()
482 {
483 struct lwp *l = curlwp;
484 int nlocks;
485
486 KERNEL_UNLOCK_ALL(l, &nlocks);
487 rump_unschedule_cpu(l);
488 rump_schedule_cpu(l);
489 KERNEL_LOCK(nlocks, l);
490 }
491
492 void
493 preempt()
494 {
495
496 yield();
497 }
498
499 bool
500 kpreempt(uintptr_t where)
501 {
502
503 return false;
504 }
505
506 /*
507 * There is no kernel thread preemption in rump currently. But call
508 * the implementing macros anyway in case they grow some side-effects
509 * down the road.
510 */
511 void
512 kpreempt_disable(void)
513 {
514
515 //KPREEMPT_DISABLE(curlwp);
516 }
517
518 void
519 kpreempt_enable(void)
520 {
521
522 //KPREEMPT_ENABLE(curlwp);
523 }
524
525 void
526 suspendsched(void)
527 {
528
529 /*
530 * Could wait until everyone is out and block further entries,
531 * but skip that for now.
532 */
533 }
534
535 void
536 sched_nice(struct proc *p, int level)
537 {
538
539 /* nothing to do for now */
540 }
541