scheduler.c revision 1.25 1 /* $NetBSD: scheduler.c,v 1.25 2011/01/28 16:58:28 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.25 2011/01/28 16:58:28 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/cpu.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/select.h>
39 #include <sys/systm.h>
40
41 #include <rump/rumpuser.h>
42
43 #include "rump_private.h"
44
45 static struct cpu_info rump_cpus[MAXCPUS];
46 static struct rumpcpu {
47 /* needed in fastpath */
48 struct cpu_info *rcpu_ci;
49 void *rcpu_prevlwp;
50
51 /* needed in slowpath */
52 struct rumpuser_mtx *rcpu_mtx;
53 struct rumpuser_cv *rcpu_cv;
54 int rcpu_wanted;
55
56 /* offset 20 (P=4) or 36 (P=8) here */
57
58 /*
59 * Some stats. Not really that necessary, but we should
60 * have room. Note that these overflow quite fast, so need
61 * to be collected often.
62 */
63 unsigned int rcpu_fastpath;
64 unsigned int rcpu_slowpath;
65 unsigned int rcpu_migrated;
66
67 /* offset 32 (P=4) or 50 (P=8) */
68
69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
70 } rcpu_storage[MAXCPUS];
71 struct cpu_info *rump_cpu = &rump_cpus[0];
72 int ncpu;
73
74 #define RCPULWP_BUSY ((void *)-1)
75 #define RCPULWP_WANTED ((void *)-2)
76
77 static struct rumpuser_mtx *lwp0mtx;
78 static struct rumpuser_cv *lwp0cv;
79 static unsigned nextcpu;
80
81 kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */
82
83 static bool lwp0isbusy = false;
84
85 /*
86 * Keep some stats.
87 *
88 * Keeping track of there is not really critical for speed, unless
89 * stats happen to be on a different cache line (CACHE_LINE_SIZE is
90 * really just a coarse estimate), so default for the performant case
91 * (i.e. no stats).
92 */
93 #ifdef RUMPSCHED_STATS
94 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
95 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
96 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
97 #else
98 #define SCHED_FASTPATH(rcpu)
99 #define SCHED_SLOWPATH(rcpu)
100 #define SCHED_MIGRATED(rcpu)
101 #endif
102
103 struct cpu_info *
104 cpu_lookup(u_int index)
105 {
106
107 return &rump_cpus[index];
108 }
109
110 static inline struct rumpcpu *
111 getnextcpu(void)
112 {
113 unsigned newcpu;
114
115 newcpu = atomic_inc_uint_nv(&nextcpu);
116 if (__predict_false(ncpu > UINT_MAX/2))
117 atomic_and_uint(&nextcpu, 0);
118 newcpu = newcpu % ncpu;
119
120 return &rcpu_storage[newcpu];
121 }
122
123 /* this could/should be mi_attach_cpu? */
124 void
125 rump_cpus_bootstrap(int *nump)
126 {
127 struct rumpcpu *rcpu;
128 struct cpu_info *ci;
129 int num = *nump;
130 int i;
131
132 if (num > MAXCPUS) {
133 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) "
134 "available (adjusted)\n", num, MAXCPUS);
135 num = MAXCPUS;
136 }
137
138 for (i = 0; i < num; i++) {
139 rcpu = &rcpu_storage[i];
140 ci = &rump_cpus[i];
141 ci->ci_index = i;
142 }
143
144 /* attach first cpu for bootstrap */
145 rump_cpu_attach(&rump_cpus[0]);
146 ncpu = 1;
147 *nump = num;
148 }
149
150 void
151 rump_scheduler_init(int numcpu)
152 {
153 struct rumpcpu *rcpu;
154 struct cpu_info *ci;
155 int i;
156
157 rumpuser_mutex_init(&lwp0mtx);
158 rumpuser_cv_init(&lwp0cv);
159 for (i = 0; i < numcpu; i++) {
160 rcpu = &rcpu_storage[i];
161 ci = &rump_cpus[i];
162 rcpu->rcpu_ci = ci;
163 ci->ci_schedstate.spc_mutex =
164 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
165 ci->ci_schedstate.spc_flags = SPCF_RUNNING;
166 rcpu->rcpu_wanted = 0;
167 rumpuser_cv_init(&rcpu->rcpu_cv);
168 rumpuser_mutex_init(&rcpu->rcpu_mtx);
169 }
170
171 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_NONE);
172 }
173
174 /*
175 * condvar ops using scheduler lock as the rumpuser interlock.
176 */
177 void
178 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
179 {
180 struct lwp *l = curlwp;
181 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
182
183 /* mutex will be taken and released in cpu schedule/unschedule */
184 rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
185 }
186
187 int
188 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
189 {
190 struct lwp *l = curlwp;
191 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
192
193 /* mutex will be taken and released in cpu schedule/unschedule */
194 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
195 ts->tv_sec, ts->tv_nsec);
196 }
197
198 static void
199 lwp0busy(void)
200 {
201
202 /* busy lwp0 */
203 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC);
204 rumpuser_mutex_enter_nowrap(lwp0mtx);
205 while (lwp0isbusy)
206 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
207 lwp0isbusy = true;
208 rumpuser_mutex_exit(lwp0mtx);
209 }
210
211 static void
212 lwp0rele(void)
213 {
214
215 rumpuser_mutex_enter_nowrap(lwp0mtx);
216 KASSERT(lwp0isbusy == true);
217 lwp0isbusy = false;
218 rumpuser_cv_signal(lwp0cv);
219 rumpuser_mutex_exit(lwp0mtx);
220 }
221
222 void
223 rump_schedule()
224 {
225 struct lwp *l;
226
227 /*
228 * If there is no dedicated lwp, allocate a temp one and
229 * set it to be free'd upon unschedule(). Use lwp0 context
230 * for reserving the necessary resources. Don't optimize
231 * for this case -- anyone who cares about performance will
232 * start a real thread.
233 */
234 if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) {
235 rump_schedule_cpu(l);
236 LWP_CACHE_CREDS(l, l->l_proc);
237 } else {
238 lwp0busy();
239
240 /* schedule cpu and use lwp0 */
241 rump_schedule_cpu(&lwp0);
242 rumpuser_set_curlwp(&lwp0);
243
244 /* allocate thread, switch to it, and release lwp0 */
245 l = rump__lwproc_alloclwp(initproc);
246 rump_lwproc_switch(l);
247 lwp0rele();
248
249 /*
250 * mark new thread dead-on-unschedule. this
251 * means that we'll be running with l_refcnt == 0.
252 * relax, it's fine.
253 */
254 rump_lwproc_releaselwp();
255 }
256 }
257
258 void
259 rump_schedule_cpu(struct lwp *l)
260 {
261
262 rump_schedule_cpu_interlock(l, NULL);
263 }
264
265 /*
266 * Schedule a CPU. This optimizes for the case where we schedule
267 * the same thread often, and we have nCPU >= nFrequently-Running-Thread
268 * (where CPU is virtual rump cpu, not host CPU).
269 */
270 void
271 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
272 {
273 struct rumpcpu *rcpu;
274 void *old;
275 bool domigrate;
276 bool bound = l->l_pflag & LP_BOUND;
277
278 l->l_stat = LSRUN;
279
280 /*
281 * First, try fastpath: if we were the previous user of the
282 * CPU, everything is in order cachewise and we can just
283 * proceed to use it.
284 *
285 * If we are a different thread (i.e. CAS fails), we must go
286 * through a memory barrier to ensure we get a truthful
287 * view of the world.
288 */
289
290 KASSERT(l->l_target_cpu != NULL);
291 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]];
292 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
293 if (__predict_true(interlock == rcpu->rcpu_mtx))
294 rumpuser_mutex_exit(rcpu->rcpu_mtx);
295 SCHED_FASTPATH(rcpu);
296 /* jones, you're the man */
297 goto fastlane;
298 }
299
300 /*
301 * Else, it's the slowpath for us. First, determine if we
302 * can migrate.
303 */
304 if (ncpu == 1)
305 domigrate = false;
306 else
307 domigrate = true;
308
309 /* Take lock. This acts as a load barrier too. */
310 if (__predict_true(interlock != rcpu->rcpu_mtx))
311 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
312
313 for (;;) {
314 SCHED_SLOWPATH(rcpu);
315 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
316
317 /* CPU is free? */
318 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
319 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
320 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
321 break;
322 }
323 }
324
325 /*
326 * Do we want to migrate once?
327 * This may need a slightly better algorithm, or we
328 * might cache pingpong eternally for non-frequent
329 * threads.
330 */
331 if (domigrate && !bound) {
332 domigrate = false;
333 SCHED_MIGRATED(rcpu);
334 rumpuser_mutex_exit(rcpu->rcpu_mtx);
335 rcpu = getnextcpu();
336 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
337 continue;
338 }
339
340 /* Want CPU, wait until it's released an retry */
341 rcpu->rcpu_wanted++;
342 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
343 rcpu->rcpu_wanted--;
344 }
345 rumpuser_mutex_exit(rcpu->rcpu_mtx);
346
347 fastlane:
348 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci;
349 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
350 l->l_ncsw++;
351 l->l_stat = LSONPROC;
352
353 rcpu->rcpu_ci->ci_curlwp = l;
354 }
355
356 void
357 rump_unschedule()
358 {
359 struct lwp *l = rumpuser_get_curlwp();
360 #ifdef DIAGNOSTIC
361 int nlock;
362
363 KERNEL_UNLOCK_ALL(l, &nlock);
364 KASSERT(nlock == 0);
365 #endif
366
367 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
368 rump_unschedule_cpu(l);
369 l->l_mutex = &unruntime_lock;
370 l->l_stat = LSSTOP;
371
372 /*
373 * Check special conditions:
374 * 1) do we need to free the lwp which just unscheduled?
375 * (locking order: lwp0, cpu)
376 * 2) do we want to clear curlwp for the current host thread
377 */
378 if (__predict_false(l->l_flag & LW_WEXIT)) {
379 lwp0busy();
380
381 /* Now that we have lwp0, we can schedule a CPU again */
382 rump_schedule_cpu(l);
383
384 /* switch to lwp0. this frees the old thread */
385 KASSERT(l->l_flag & LW_WEXIT);
386 rump_lwproc_switch(&lwp0);
387
388 /* release lwp0 */
389 rump_unschedule_cpu(&lwp0);
390 lwp0.l_mutex = &unruntime_lock;
391 lwp0.l_pflag &= ~LP_RUNNING;
392 lwp0rele();
393 rumpuser_set_curlwp(NULL);
394
395 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
396 rumpuser_set_curlwp(NULL);
397 l->l_flag &= ~LW_RUMP_CLEAR;
398 }
399 }
400
401 void
402 rump_unschedule_cpu(struct lwp *l)
403 {
404
405 rump_unschedule_cpu_interlock(l, NULL);
406 }
407
408 void
409 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
410 {
411
412 if ((l->l_pflag & LP_INTR) == 0)
413 rump_softint_run(l->l_cpu);
414 rump_unschedule_cpu1(l, interlock);
415 }
416
417 void
418 rump_unschedule_cpu1(struct lwp *l, void *interlock)
419 {
420 struct rumpcpu *rcpu;
421 struct cpu_info *ci;
422 void *old;
423
424 ci = l->l_cpu;
425 ci->ci_curlwp = NULL;
426 rcpu = &rcpu_storage[ci-&rump_cpus[0]];
427
428 KASSERT(rcpu->rcpu_ci == ci);
429
430 /*
431 * Make sure all stores are seen before the CPU release. This
432 * is relevant only in the non-fastpath scheduling case, but
433 * we don't know here if that's going to happen, so need to
434 * expect the worst.
435 */
436 membar_exit();
437
438 /* Release the CPU. */
439 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
440
441 /* No waiters? No problems. We're outta here. */
442 if (old == RCPULWP_BUSY) {
443 /* Was the scheduler interlock requested? */
444 if (__predict_false(interlock == rcpu->rcpu_mtx))
445 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
446 return;
447 }
448
449 KASSERT(old == RCPULWP_WANTED);
450
451 /*
452 * Ok, things weren't so snappy.
453 *
454 * Snailpath: take lock and signal anyone waiting for this CPU.
455 */
456
457 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
458 if (rcpu->rcpu_wanted)
459 rumpuser_cv_broadcast(rcpu->rcpu_cv);
460
461 if (__predict_true(interlock != rcpu->rcpu_mtx))
462 rumpuser_mutex_exit(rcpu->rcpu_mtx);
463 }
464
465 /* Give up and retake CPU (perhaps a different one) */
466 void
467 yield()
468 {
469 struct lwp *l = curlwp;
470 int nlocks;
471
472 KERNEL_UNLOCK_ALL(l, &nlocks);
473 rump_unschedule_cpu(l);
474 rump_schedule_cpu(l);
475 KERNEL_LOCK(nlocks, l);
476 }
477
478 void
479 preempt()
480 {
481
482 yield();
483 }
484
485 bool
486 kpreempt(uintptr_t where)
487 {
488
489 return false;
490 }
491
492 /*
493 * There is no kernel thread preemption in rump currently. But call
494 * the implementing macros anyway in case they grow some side-effects
495 * down the road.
496 */
497 void
498 kpreempt_disable(void)
499 {
500
501 KPREEMPT_DISABLE(curlwp);
502 }
503
504 void
505 kpreempt_enable(void)
506 {
507
508 KPREEMPT_ENABLE(curlwp);
509 }
510
511 void
512 suspendsched(void)
513 {
514
515 /*
516 * Could wait until everyone is out and block further entries,
517 * but skip that for now.
518 */
519 }
520
521 void
522 sched_nice(struct proc *p, int level)
523 {
524
525 /* nothing to do for now */
526 }
527