scheduler.c revision 1.22 1 /* $NetBSD: scheduler.c,v 1.22 2010/11/21 22:01:15 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.22 2010/11/21 22:01:15 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/cpu.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/select.h>
39 #include <sys/systm.h>
40
41 #include <rump/rumpuser.h>
42
43 #include "rump_private.h"
44
45 static struct cpu_info rump_cpus[MAXCPUS];
46 static struct rumpcpu {
47 /* needed in fastpath */
48 struct cpu_info *rcpu_ci;
49 void *rcpu_prevlwp;
50
51 /* needed in slowpath */
52 struct rumpuser_mtx *rcpu_mtx;
53 struct rumpuser_cv *rcpu_cv;
54 int rcpu_wanted;
55
56 /* offset 20 (P=4) or 36 (P=8) here */
57
58 /*
59 * Some stats. Not really that necessary, but we should
60 * have room. Note that these overflow quite fast, so need
61 * to be collected often.
62 */
63 unsigned int rcpu_fastpath;
64 unsigned int rcpu_slowpath;
65 unsigned int rcpu_migrated;
66
67 /* offset 32 (P=4) or 50 (P=8) */
68
69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
70 } rcpu_storage[MAXCPUS];
71 struct cpu_info *rump_cpu = &rump_cpus[0];
72 int ncpu;
73
74 #define RCPULWP_BUSY ((void *)-1)
75 #define RCPULWP_WANTED ((void *)-2)
76
77 static struct rumpuser_mtx *lwp0mtx;
78 static struct rumpuser_cv *lwp0cv;
79 static unsigned nextcpu;
80
81 static bool lwp0isbusy = false;
82
83 /*
84 * Keep some stats.
85 *
86 * Keeping track of there is not really critical for speed, unless
87 * stats happen to be on a different cache line (CACHE_LINE_SIZE is
88 * really just a coarse estimate), so default for the performant case
89 * (i.e. no stats).
90 */
91 #ifdef RUMPSCHED_STATS
92 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
93 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
94 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
95 #else
96 #define SCHED_FASTPATH(rcpu)
97 #define SCHED_SLOWPATH(rcpu)
98 #define SCHED_MIGRATED(rcpu)
99 #endif
100
101 struct cpu_info *
102 cpu_lookup(u_int index)
103 {
104
105 return &rump_cpus[index];
106 }
107
108 static inline struct rumpcpu *
109 getnextcpu(void)
110 {
111 unsigned newcpu;
112
113 newcpu = atomic_inc_uint_nv(&nextcpu);
114 if (__predict_false(ncpu > UINT_MAX/2))
115 atomic_and_uint(&nextcpu, 0);
116 newcpu = newcpu % ncpu;
117
118 return &rcpu_storage[newcpu];
119 }
120
121 /* this could/should be mi_attach_cpu? */
122 void
123 rump_cpus_bootstrap(int *nump)
124 {
125 struct rumpcpu *rcpu;
126 struct cpu_info *ci;
127 int num = *nump;
128 int i;
129
130 if (num > MAXCPUS) {
131 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) "
132 "available (adjusted)\n", num, MAXCPUS);
133 num = MAXCPUS;
134 }
135
136 for (i = 0; i < num; i++) {
137 rcpu = &rcpu_storage[i];
138 ci = &rump_cpus[i];
139 ci->ci_index = i;
140 }
141
142 /* attach first cpu for bootstrap */
143 rump_cpu_attach(&rump_cpus[0]);
144 ncpu = 1;
145 *nump = num;
146 }
147
148 void
149 rump_scheduler_init(int numcpu)
150 {
151 struct rumpcpu *rcpu;
152 struct cpu_info *ci;
153 int i;
154
155 rumpuser_mutex_init(&lwp0mtx);
156 rumpuser_cv_init(&lwp0cv);
157 for (i = 0; i < numcpu; i++) {
158 rcpu = &rcpu_storage[i];
159 ci = &rump_cpus[i];
160 rcpu->rcpu_ci = ci;
161 ci->ci_schedstate.spc_mutex =
162 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
163 ci->ci_schedstate.spc_flags = SPCF_RUNNING;
164 rcpu->rcpu_wanted = 0;
165 rumpuser_cv_init(&rcpu->rcpu_cv);
166 rumpuser_mutex_init(&rcpu->rcpu_mtx);
167 }
168 }
169
170 /*
171 * condvar ops using scheduler lock as the rumpuser interlock.
172 */
173 void
174 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
175 {
176 struct lwp *l = curlwp;
177 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
178
179 /* mutex will be taken and released in cpu schedule/unschedule */
180 rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
181 }
182
183 int
184 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
185 {
186 struct lwp *l = curlwp;
187 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
188
189 /* mutex will be taken and released in cpu schedule/unschedule */
190 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
191 ts->tv_sec, ts->tv_nsec);
192 }
193
194 static void
195 lwp0busy(void)
196 {
197
198 /* busy lwp0 */
199 KASSERT(curlwp == NULL || curlwp->l_cpu == NULL);
200 rumpuser_mutex_enter_nowrap(lwp0mtx);
201 while (lwp0isbusy)
202 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
203 lwp0isbusy = true;
204 rumpuser_mutex_exit(lwp0mtx);
205 }
206
207 static void
208 lwp0rele(void)
209 {
210
211 rumpuser_mutex_enter_nowrap(lwp0mtx);
212 KASSERT(lwp0isbusy == true);
213 lwp0isbusy = false;
214 rumpuser_cv_signal(lwp0cv);
215 rumpuser_mutex_exit(lwp0mtx);
216 }
217
218 void
219 rump_schedule()
220 {
221 struct lwp *l;
222
223 /*
224 * If there is no dedicated lwp, allocate a temp one and
225 * set it to be free'd upon unschedule(). Use lwp0 context
226 * for reserving the necessary resources. Don't optimize
227 * for this case -- anyone who cares about performance will
228 * start a real thread.
229 */
230 if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) {
231 rump_schedule_cpu(l);
232 LWP_CACHE_CREDS(l, l->l_proc);
233 } else {
234 lwp0busy();
235
236 /* schedule cpu and use lwp0 */
237 rump_schedule_cpu(&lwp0);
238 rumpuser_set_curlwp(&lwp0);
239
240 /* allocate thread, switch to it, and release lwp0 */
241 l = rump__lwproc_alloclwp(initproc);
242 rump_lwproc_switch(l);
243 lwp0rele();
244
245 /*
246 * mark new thread dead-on-unschedule. this
247 * means that we'll be running with l_refcnt == 0.
248 * relax, it's fine.
249 */
250 rump_lwproc_releaselwp();
251 }
252 }
253
254 void
255 rump_schedule_cpu(struct lwp *l)
256 {
257
258 rump_schedule_cpu_interlock(l, NULL);
259 }
260
261 /*
262 * Schedule a CPU. This optimizes for the case where we schedule
263 * the same thread often, and we have nCPU >= nFrequently-Running-Thread
264 * (where CPU is virtual rump cpu, not host CPU).
265 */
266 void
267 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
268 {
269 struct rumpcpu *rcpu;
270 void *old;
271 bool domigrate;
272 bool bound = l->l_pflag & LP_BOUND;
273
274 /*
275 * First, try fastpath: if we were the previous user of the
276 * CPU, everything is in order cachewise and we can just
277 * proceed to use it.
278 *
279 * If we are a different thread (i.e. CAS fails), we must go
280 * through a memory barrier to ensure we get a truthful
281 * view of the world.
282 */
283
284 KASSERT(l->l_target_cpu != NULL);
285 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]];
286 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
287 if (__predict_true(interlock == rcpu->rcpu_mtx))
288 rumpuser_mutex_exit(rcpu->rcpu_mtx);
289 SCHED_FASTPATH(rcpu);
290 /* jones, you're the man */
291 goto fastlane;
292 }
293
294 /*
295 * Else, it's the slowpath for us. First, determine if we
296 * can migrate.
297 */
298 if (ncpu == 1)
299 domigrate = false;
300 else
301 domigrate = true;
302
303 /* Take lock. This acts as a load barrier too. */
304 if (__predict_true(interlock != rcpu->rcpu_mtx))
305 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
306
307 for (;;) {
308 SCHED_SLOWPATH(rcpu);
309 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
310
311 /* CPU is free? */
312 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
313 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
314 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
315 break;
316 }
317 }
318
319 /*
320 * Do we want to migrate once?
321 * This may need a slightly better algorithm, or we
322 * might cache pingpong eternally for non-frequent
323 * threads.
324 */
325 if (domigrate && !bound) {
326 domigrate = false;
327 SCHED_MIGRATED(rcpu);
328 rumpuser_mutex_exit(rcpu->rcpu_mtx);
329 rcpu = getnextcpu();
330 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
331 continue;
332 }
333
334 /* Want CPU, wait until it's released an retry */
335 rcpu->rcpu_wanted++;
336 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
337 rcpu->rcpu_wanted--;
338 }
339 rumpuser_mutex_exit(rcpu->rcpu_mtx);
340
341 fastlane:
342 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci;
343 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
344 l->l_ncsw++;
345 }
346
347 void
348 rump_unschedule()
349 {
350 struct lwp *l;
351
352 l = rumpuser_get_curlwp();
353 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
354 rump_unschedule_cpu(l);
355 l->l_mutex = NULL;
356
357 /*
358 * Check special conditions:
359 * 1) do we need to free the lwp which just unscheduled?
360 * (locking order: lwp0, cpu)
361 * 2) do we want to clear curlwp for the current host thread
362 */
363 if (__predict_false(l->l_flag & LW_WEXIT)) {
364 lwp0busy();
365
366 /* Now that we have lwp0, we can schedule a CPU again */
367 rump_schedule_cpu(l);
368
369 /* switch to lwp0. this frees the old thread */
370 KASSERT(l->l_flag & LW_WEXIT);
371 rump_lwproc_switch(&lwp0);
372
373 /* release lwp0 */
374 rump_unschedule_cpu(&lwp0);
375 lwp0.l_mutex = NULL;
376 lwp0.l_pflag &= ~LP_RUNNING;
377 lwp0rele();
378 rumpuser_set_curlwp(NULL);
379
380 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
381 rumpuser_set_curlwp(NULL);
382 l->l_flag &= ~LW_RUMP_CLEAR;
383 }
384 }
385
386 void
387 rump_unschedule_cpu(struct lwp *l)
388 {
389
390 rump_unschedule_cpu_interlock(l, NULL);
391 }
392
393 void
394 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
395 {
396
397 if ((l->l_pflag & LP_INTR) == 0)
398 rump_softint_run(l->l_cpu);
399 rump_unschedule_cpu1(l, interlock);
400 }
401
402 void
403 rump_unschedule_cpu1(struct lwp *l, void *interlock)
404 {
405 struct rumpcpu *rcpu;
406 struct cpu_info *ci;
407 void *old;
408
409 ci = l->l_cpu;
410 l->l_cpu = NULL;
411 rcpu = &rcpu_storage[ci-&rump_cpus[0]];
412
413 KASSERT(rcpu->rcpu_ci == ci);
414
415 /*
416 * Make sure all stores are seen before the CPU release. This
417 * is relevant only in the non-fastpath scheduling case, but
418 * we don't know here if that's going to happen, so need to
419 * expect the worst.
420 */
421 membar_exit();
422
423 /* Release the CPU. */
424 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
425
426 /* No waiters? No problems. We're outta here. */
427 if (old == RCPULWP_BUSY) {
428 /* Was the scheduler interlock requested? */
429 if (__predict_false(interlock == rcpu->rcpu_mtx))
430 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
431 return;
432 }
433
434 KASSERT(old == RCPULWP_WANTED);
435
436 /*
437 * Ok, things weren't so snappy.
438 *
439 * Snailpath: take lock and signal anyone waiting for this CPU.
440 */
441
442 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
443 if (rcpu->rcpu_wanted)
444 rumpuser_cv_broadcast(rcpu->rcpu_cv);
445
446 if (__predict_true(interlock != rcpu->rcpu_mtx))
447 rumpuser_mutex_exit(rcpu->rcpu_mtx);
448 }
449
450 /* Give up and retake CPU (perhaps a different one) */
451 void
452 yield()
453 {
454 struct lwp *l = curlwp;
455 int nlocks;
456
457 KERNEL_UNLOCK_ALL(l, &nlocks);
458 rump_unschedule_cpu(l);
459 rump_schedule_cpu(l);
460 KERNEL_LOCK(nlocks, l);
461 }
462
463 void
464 preempt()
465 {
466
467 yield();
468 }
469
470 bool
471 kpreempt(uintptr_t where)
472 {
473
474 return false;
475 }
476
477 /*
478 * There is no kernel thread preemption in rump currently. But call
479 * the implementing macros anyway in case they grow some side-effects
480 * down the road.
481 */
482 void
483 kpreempt_disable(void)
484 {
485
486 KPREEMPT_DISABLE(curlwp);
487 }
488
489 void
490 kpreempt_enable(void)
491 {
492
493 KPREEMPT_ENABLE(curlwp);
494 }
495
496 void
497 suspendsched(void)
498 {
499
500 /*
501 * Could wait until everyone is out and block further entries,
502 * but skip that for now.
503 */
504 }
505
506 void
507 sched_nice(struct proc *p, int level)
508 {
509
510 /* nothing to do for now */
511 }
512