scheduler.c revision 1.20 1 /* $NetBSD: scheduler.c,v 1.20 2010/09/07 07:59:48 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.20 2010/09/07 07:59:48 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/cpu.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/select.h>
39 #include <sys/systm.h>
40
41 #include <rump/rumpuser.h>
42
43 #include "rump_private.h"
44
45 static struct cpu_info rump_cpus[MAXCPUS];
46 static struct rumpcpu {
47 /* needed in fastpath */
48 struct cpu_info *rcpu_ci;
49 void *rcpu_prevlwp;
50
51 /* needed in slowpath */
52 struct rumpuser_mtx *rcpu_mtx;
53 struct rumpuser_cv *rcpu_cv;
54 int rcpu_wanted;
55
56 /* offset 20 (P=4) or 36 (P=8) here */
57
58 /*
59 * Some stats. Not really that necessary, but we should
60 * have room. Note that these overflow quite fast, so need
61 * to be collected often.
62 */
63 unsigned int rcpu_fastpath;
64 unsigned int rcpu_slowpath;
65 unsigned int rcpu_migrated;
66
67 /* offset 32 (P=4) or 50 (P=8) */
68
69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
70 } rcpu_storage[MAXCPUS];
71 struct cpu_info *rump_cpu = &rump_cpus[0];
72 int ncpu;
73
74 #define RCPULWP_BUSY ((void *)-1)
75 #define RCPULWP_WANTED ((void *)-2)
76
77 static struct rumpuser_mtx *lwp0mtx;
78 static struct rumpuser_cv *lwp0cv;
79 static unsigned nextcpu;
80
81 static bool lwp0isbusy = false;
82
83 /*
84 * Keep some stats.
85 *
86 * Keeping track of there is not really critical for speed, unless
87 * stats happen to be on a different cache line (CACHE_LINE_SIZE is
88 * really just a coarse estimate), so default for the performant case
89 * (i.e. no stats).
90 */
91 #ifdef RUMPSCHED_STATS
92 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
93 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
94 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
95 #else
96 #define SCHED_FASTPATH(rcpu)
97 #define SCHED_SLOWPATH(rcpu)
98 #define SCHED_MIGRATED(rcpu)
99 #endif
100
101 struct cpu_info *
102 cpu_lookup(u_int index)
103 {
104
105 return &rump_cpus[index];
106 }
107
108 static inline struct rumpcpu *
109 getnextcpu(void)
110 {
111 unsigned newcpu;
112
113 newcpu = atomic_inc_uint_nv(&nextcpu);
114 if (__predict_false(ncpu > UINT_MAX/2))
115 atomic_and_uint(&nextcpu, 0);
116 newcpu = newcpu % ncpu;
117
118 return &rcpu_storage[newcpu];
119 }
120
121 /* this could/should be mi_attach_cpu? */
122 void
123 rump_cpus_bootstrap(int num)
124 {
125 struct rumpcpu *rcpu;
126 struct cpu_info *ci;
127 int i;
128
129 if (num > MAXCPUS) {
130 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) available\n",
131 num, MAXCPUS);
132 num = MAXCPUS;
133 }
134
135 for (i = 0; i < num; i++) {
136 rcpu = &rcpu_storage[i];
137 ci = &rump_cpus[i];
138 ci->ci_index = i;
139 }
140
141 /* attach first cpu for bootstrap */
142 rump_cpu_attach(&rump_cpus[0]);
143 ncpu = 1;
144 }
145
146 void
147 rump_scheduler_init(int numcpu)
148 {
149 struct rumpcpu *rcpu;
150 struct cpu_info *ci;
151 int i;
152
153 rumpuser_mutex_init(&lwp0mtx);
154 rumpuser_cv_init(&lwp0cv);
155 for (i = 0; i < numcpu; i++) {
156 rcpu = &rcpu_storage[i];
157 ci = &rump_cpus[i];
158 rcpu->rcpu_ci = ci;
159 ci->ci_schedstate.spc_mutex =
160 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
161 ci->ci_schedstate.spc_flags = SPCF_RUNNING;
162 rcpu->rcpu_wanted = 0;
163 rumpuser_cv_init(&rcpu->rcpu_cv);
164 rumpuser_mutex_init(&rcpu->rcpu_mtx);
165 }
166 }
167
168 /*
169 * condvar ops using scheduler lock as the rumpuser interlock.
170 */
171 void
172 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
173 {
174 struct lwp *l = curlwp;
175 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
176
177 /* mutex will be taken and released in cpu schedule/unschedule */
178 rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
179 }
180
181 int
182 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
183 {
184 struct lwp *l = curlwp;
185 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
186
187 /* mutex will be taken and released in cpu schedule/unschedule */
188 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
189 ts->tv_sec, ts->tv_nsec);
190 }
191
192 static void
193 lwp0busy(void)
194 {
195
196 /* busy lwp0 */
197 KASSERT(curlwp == NULL || curlwp->l_cpu == NULL);
198 rumpuser_mutex_enter_nowrap(lwp0mtx);
199 while (lwp0isbusy)
200 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
201 lwp0isbusy = true;
202 rumpuser_mutex_exit(lwp0mtx);
203 }
204
205 static void
206 lwp0rele(void)
207 {
208
209 rumpuser_mutex_enter_nowrap(lwp0mtx);
210 KASSERT(lwp0isbusy == true);
211 lwp0isbusy = false;
212 rumpuser_cv_signal(lwp0cv);
213 rumpuser_mutex_exit(lwp0mtx);
214 }
215
216 void
217 rump_schedule()
218 {
219 struct lwp *l;
220
221 /*
222 * If there is no dedicated lwp, allocate a temp one and
223 * set it to be free'd upon unschedule(). Use lwp0 context
224 * for reserving the necessary resources. Don't optimize
225 * for this case -- anyone who cares about performance will
226 * start a real thread.
227 */
228 if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) {
229 rump_schedule_cpu(l);
230 LWP_CACHE_CREDS(l, l->l_proc);
231 } else {
232 lwp0busy();
233
234 /* schedule cpu and use lwp0 */
235 rump_schedule_cpu(&lwp0);
236 rumpuser_set_curlwp(&lwp0);
237
238 /* allocate thread, switch to it, and release lwp0 */
239 l = rump__lwproc_allockernlwp();
240 rump_lwproc_switch(l);
241 lwp0rele();
242
243 /*
244 * mark new thread dead-on-unschedule. this
245 * means that we'll be running with l_refcnt == 0.
246 * relax, it's fine.
247 */
248 rump_lwproc_releaselwp();
249 }
250 }
251
252 void
253 rump_schedule_cpu(struct lwp *l)
254 {
255
256 rump_schedule_cpu_interlock(l, NULL);
257 }
258
259 /*
260 * Schedule a CPU. This optimizes for the case where we schedule
261 * the same thread often, and we have nCPU >= nFrequently-Running-Thread
262 * (where CPU is virtual rump cpu, not host CPU).
263 */
264 void
265 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
266 {
267 struct rumpcpu *rcpu;
268 void *old;
269 bool domigrate;
270 bool bound = l->l_pflag & LP_BOUND;
271
272 /*
273 * First, try fastpath: if we were the previous user of the
274 * CPU, everything is in order cachewise and we can just
275 * proceed to use it.
276 *
277 * If we are a different thread (i.e. CAS fails), we must go
278 * through a memory barrier to ensure we get a truthful
279 * view of the world.
280 */
281
282 KASSERT(l->l_target_cpu != NULL);
283 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]];
284 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
285 if (__predict_true(interlock == rcpu->rcpu_mtx))
286 rumpuser_mutex_exit(rcpu->rcpu_mtx);
287 SCHED_FASTPATH(rcpu);
288 /* jones, you're the man */
289 goto fastlane;
290 }
291
292 /*
293 * Else, it's the slowpath for us. First, determine if we
294 * can migrate.
295 */
296 if (ncpu == 1)
297 domigrate = false;
298 else
299 domigrate = true;
300
301 /* Take lock. This acts as a load barrier too. */
302 if (__predict_true(interlock != rcpu->rcpu_mtx))
303 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
304
305 for (;;) {
306 SCHED_SLOWPATH(rcpu);
307 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
308
309 /* CPU is free? */
310 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
311 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
312 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
313 break;
314 }
315 }
316
317 /*
318 * Do we want to migrate once?
319 * This may need a slightly better algorithm, or we
320 * might cache pingpong eternally for non-frequent
321 * threads.
322 */
323 if (domigrate && !bound) {
324 domigrate = false;
325 SCHED_MIGRATED(rcpu);
326 rumpuser_mutex_exit(rcpu->rcpu_mtx);
327 rcpu = getnextcpu();
328 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
329 continue;
330 }
331
332 /* Want CPU, wait until it's released an retry */
333 rcpu->rcpu_wanted++;
334 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
335 rcpu->rcpu_wanted--;
336 }
337 rumpuser_mutex_exit(rcpu->rcpu_mtx);
338
339 fastlane:
340 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci;
341 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
342 l->l_ncsw++;
343 }
344
345 void
346 rump_unschedule()
347 {
348 struct lwp *l;
349
350 l = rumpuser_get_curlwp();
351 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
352 rump_unschedule_cpu(l);
353 l->l_mutex = NULL;
354
355 /*
356 * Check special conditions:
357 * 1) do we need to free the lwp which just unscheduled?
358 * (locking order: lwp0, cpu)
359 * 2) do we want to clear curlwp for the current host thread
360 */
361 if (__predict_false(l->l_flag & LW_WEXIT)) {
362 lwp0busy();
363
364 /* Now that we have lwp0, we can schedule a CPU again */
365 rump_schedule_cpu(l);
366
367 /* switch to lwp0. this frees the old thread */
368 KASSERT(l->l_flag & LW_WEXIT);
369 rump_lwproc_switch(&lwp0);
370
371 /* release lwp0 */
372 rump_unschedule_cpu(&lwp0);
373 lwp0.l_mutex = NULL;
374 lwp0.l_pflag &= ~LP_RUNNING;
375 lwp0rele();
376 rumpuser_set_curlwp(NULL);
377
378 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
379 rumpuser_set_curlwp(NULL);
380 l->l_flag &= ~LW_RUMP_CLEAR;
381 }
382 }
383
384 void
385 rump_unschedule_cpu(struct lwp *l)
386 {
387
388 rump_unschedule_cpu_interlock(l, NULL);
389 }
390
391 void
392 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
393 {
394
395 if ((l->l_pflag & LP_INTR) == 0)
396 rump_softint_run(l->l_cpu);
397 rump_unschedule_cpu1(l, interlock);
398 }
399
400 void
401 rump_unschedule_cpu1(struct lwp *l, void *interlock)
402 {
403 struct rumpcpu *rcpu;
404 struct cpu_info *ci;
405 void *old;
406
407 ci = l->l_cpu;
408 l->l_cpu = NULL;
409 rcpu = &rcpu_storage[ci-&rump_cpus[0]];
410
411 KASSERT(rcpu->rcpu_ci == ci);
412
413 /*
414 * Make sure all stores are seen before the CPU release. This
415 * is relevant only in the non-fastpath scheduling case, but
416 * we don't know here if that's going to happen, so need to
417 * expect the worst.
418 */
419 membar_exit();
420
421 /* Release the CPU. */
422 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
423
424 /* No waiters? No problems. We're outta here. */
425 if (old == RCPULWP_BUSY) {
426 /* Was the scheduler interlock requested? */
427 if (__predict_false(interlock == rcpu->rcpu_mtx))
428 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
429 return;
430 }
431
432 KASSERT(old == RCPULWP_WANTED);
433
434 /*
435 * Ok, things weren't so snappy.
436 *
437 * Snailpath: take lock and signal anyone waiting for this CPU.
438 */
439
440 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
441 if (rcpu->rcpu_wanted)
442 rumpuser_cv_broadcast(rcpu->rcpu_cv);
443
444 if (__predict_true(interlock != rcpu->rcpu_mtx))
445 rumpuser_mutex_exit(rcpu->rcpu_mtx);
446 }
447
448 /* Give up and retake CPU (perhaps a different one) */
449 void
450 yield()
451 {
452 struct lwp *l = curlwp;
453 int nlocks;
454
455 KERNEL_UNLOCK_ALL(l, &nlocks);
456 rump_unschedule_cpu(l);
457 rump_schedule_cpu(l);
458 KERNEL_LOCK(nlocks, l);
459 }
460
461 void
462 preempt()
463 {
464
465 yield();
466 }
467
468 bool
469 kpreempt(uintptr_t where)
470 {
471
472 return false;
473 }
474
475 /*
476 * There is no kernel thread preemption in rump currently. But call
477 * the implementing macros anyway in case they grow some side-effects
478 * down the road.
479 */
480 void
481 kpreempt_disable(void)
482 {
483
484 KPREEMPT_DISABLE(curlwp);
485 }
486
487 void
488 kpreempt_enable(void)
489 {
490
491 KPREEMPT_ENABLE(curlwp);
492 }
493
494 void
495 suspendsched(void)
496 {
497
498 /*
499 * Could wait until everyone is out and block further entries,
500 * but skip that for now.
501 */
502 }
503
504 void
505 sched_nice(struct proc *p, int level)
506 {
507
508 /* nothing to do for now */
509 }
510