scheduler.c revision 1.33 1 /* $NetBSD: scheduler.c,v 1.33 2013/05/02 19:15:01 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.33 2013/05/02 19:15:01 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/cpu.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/select.h>
39 #include <sys/systm.h>
40
41 #include <rump/rumpuser.h>
42
43 #include "rump_private.h"
44
45 static struct cpu_info rump_cpus[MAXCPUS];
46 static struct rumpcpu {
47 /* needed in fastpath */
48 struct cpu_info *rcpu_ci;
49 void *rcpu_prevlwp;
50
51 /* needed in slowpath */
52 struct rumpuser_mtx *rcpu_mtx;
53 struct rumpuser_cv *rcpu_cv;
54 int rcpu_wanted;
55
56 /* offset 20 (P=4) or 36 (P=8) here */
57
58 /*
59 * Some stats. Not really that necessary, but we should
60 * have room. Note that these overflow quite fast, so need
61 * to be collected often.
62 */
63 unsigned int rcpu_fastpath;
64 unsigned int rcpu_slowpath;
65 unsigned int rcpu_migrated;
66
67 /* offset 32 (P=4) or 50 (P=8) */
68
69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
70 } rcpu_storage[MAXCPUS];
71
72 struct cpu_info *rump_cpu = &rump_cpus[0];
73 kcpuset_t *kcpuset_attached = NULL;
74 kcpuset_t *kcpuset_running = NULL;
75 int ncpu;
76
77 #define RCPULWP_BUSY ((void *)-1)
78 #define RCPULWP_WANTED ((void *)-2)
79
80 static struct rumpuser_mtx *lwp0mtx;
81 static struct rumpuser_cv *lwp0cv;
82 static unsigned nextcpu;
83
84 kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */
85
86 static bool lwp0isbusy = false;
87
88 /*
89 * Keep some stats.
90 *
91 * Keeping track of there is not really critical for speed, unless
92 * stats happen to be on a different cache line (CACHE_LINE_SIZE is
93 * really just a coarse estimate), so default for the performant case
94 * (i.e. no stats).
95 */
96 #ifdef RUMPSCHED_STATS
97 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
98 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
99 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
100 #else
101 #define SCHED_FASTPATH(rcpu)
102 #define SCHED_SLOWPATH(rcpu)
103 #define SCHED_MIGRATED(rcpu)
104 #endif
105
106 struct cpu_info *
107 cpu_lookup(u_int index)
108 {
109
110 return &rump_cpus[index];
111 }
112
113 static inline struct rumpcpu *
114 getnextcpu(void)
115 {
116 unsigned newcpu;
117
118 newcpu = atomic_inc_uint_nv(&nextcpu);
119 if (__predict_false(ncpu > UINT_MAX/2))
120 atomic_and_uint(&nextcpu, 0);
121 newcpu = newcpu % ncpu;
122
123 return &rcpu_storage[newcpu];
124 }
125
126 /* this could/should be mi_attach_cpu? */
127 void
128 rump_cpus_bootstrap(int *nump)
129 {
130 struct cpu_info *ci;
131 int num = *nump;
132 int i;
133
134 if (num > MAXCPUS) {
135 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) "
136 "available (adjusted)\n", num, MAXCPUS);
137 num = MAXCPUS;
138 }
139
140 for (i = 0; i < num; i++) {
141 ci = &rump_cpus[i];
142 ci->ci_index = i;
143 }
144
145 kcpuset_create(&kcpuset_attached, true);
146 kcpuset_create(&kcpuset_running, true);
147
148 /* attach first cpu for bootstrap */
149 rump_cpu_attach(&rump_cpus[0]);
150 ncpu = 1;
151 *nump = num;
152 }
153
154 void
155 rump_scheduler_init(int numcpu)
156 {
157 struct rumpcpu *rcpu;
158 struct cpu_info *ci;
159 int i;
160
161 rumpuser_mutex_init(&lwp0mtx, RUMPUSER_MTX_SPIN);
162 rumpuser_cv_init(&lwp0cv);
163 for (i = 0; i < numcpu; i++) {
164 rcpu = &rcpu_storage[i];
165 ci = &rump_cpus[i];
166 rcpu->rcpu_ci = ci;
167 ci->ci_schedstate.spc_mutex =
168 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
169 ci->ci_schedstate.spc_flags = SPCF_RUNNING;
170 rcpu->rcpu_wanted = 0;
171 rumpuser_cv_init(&rcpu->rcpu_cv);
172 rumpuser_mutex_init(&rcpu->rcpu_mtx, RUMPUSER_MTX_SPIN);
173 }
174
175 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_SCHED);
176 }
177
178 /*
179 * condvar ops using scheduler lock as the rumpuser interlock.
180 */
181 void
182 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
183 {
184 struct lwp *l = curlwp;
185 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
186
187 /* mutex will be taken and released in cpu schedule/unschedule */
188 rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
189 }
190
191 int
192 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
193 {
194 struct lwp *l = curlwp;
195 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
196
197 /* mutex will be taken and released in cpu schedule/unschedule */
198 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
199 ts->tv_sec, ts->tv_nsec);
200 }
201
202 static void
203 lwp0busy(void)
204 {
205
206 /* busy lwp0 */
207 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC);
208 rumpuser_mutex_enter_nowrap(lwp0mtx);
209 while (lwp0isbusy)
210 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
211 lwp0isbusy = true;
212 rumpuser_mutex_exit(lwp0mtx);
213 }
214
215 static void
216 lwp0rele(void)
217 {
218
219 rumpuser_mutex_enter_nowrap(lwp0mtx);
220 KASSERT(lwp0isbusy == true);
221 lwp0isbusy = false;
222 rumpuser_cv_signal(lwp0cv);
223 rumpuser_mutex_exit(lwp0mtx);
224 }
225
226 /*
227 * rump_schedule: ensure that the calling host thread has a valid lwp context.
228 * ie. ensure that curlwp != NULL. Also, ensure that there
229 * a 1:1 mapping between the lwp and rump kernel cpu.
230 */
231 void
232 rump_schedule()
233 {
234 struct lwp *l;
235
236 /*
237 * If there is no dedicated lwp, allocate a temp one and
238 * set it to be free'd upon unschedule(). Use lwp0 context
239 * for reserving the necessary resources. Don't optimize
240 * for this case -- anyone who cares about performance will
241 * start a real thread.
242 */
243 if (__predict_true((l = rumpuser_curlwp()) != NULL)) {
244 rump_schedule_cpu(l);
245 LWP_CACHE_CREDS(l, l->l_proc);
246 } else {
247 lwp0busy();
248
249 /* schedule cpu and use lwp0 */
250 rump_schedule_cpu(&lwp0);
251 rumpuser_curlwpop(RUMPUSER_LWP_SET, &lwp0);
252
253 /* allocate thread, switch to it, and release lwp0 */
254 l = rump__lwproc_alloclwp(initproc);
255 rump_lwproc_switch(l);
256 lwp0rele();
257
258 /*
259 * mark new thread dead-on-unschedule. this
260 * means that we'll be running with l_refcnt == 0.
261 * relax, it's fine.
262 */
263 rump_lwproc_releaselwp();
264 }
265 }
266
267 void
268 rump_schedule_cpu(struct lwp *l)
269 {
270
271 rump_schedule_cpu_interlock(l, NULL);
272 }
273
274 /*
275 * Schedule a CPU. This optimizes for the case where we schedule
276 * the same thread often, and we have nCPU >= nFrequently-Running-Thread
277 * (where CPU is virtual rump cpu, not host CPU).
278 */
279 void
280 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
281 {
282 struct rumpcpu *rcpu;
283 void *old;
284 bool domigrate;
285 bool bound = l->l_pflag & LP_BOUND;
286
287 l->l_stat = LSRUN;
288
289 /*
290 * First, try fastpath: if we were the previous user of the
291 * CPU, everything is in order cachewise and we can just
292 * proceed to use it.
293 *
294 * If we are a different thread (i.e. CAS fails), we must go
295 * through a memory barrier to ensure we get a truthful
296 * view of the world.
297 */
298
299 KASSERT(l->l_target_cpu != NULL);
300 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]];
301 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
302 if (interlock == rcpu->rcpu_mtx)
303 rumpuser_mutex_exit(rcpu->rcpu_mtx);
304 SCHED_FASTPATH(rcpu);
305 /* jones, you're the man */
306 goto fastlane;
307 }
308
309 /*
310 * Else, it's the slowpath for us. First, determine if we
311 * can migrate.
312 */
313 if (ncpu == 1)
314 domigrate = false;
315 else
316 domigrate = true;
317
318 /* Take lock. This acts as a load barrier too. */
319 if (interlock != rcpu->rcpu_mtx)
320 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
321
322 for (;;) {
323 SCHED_SLOWPATH(rcpu);
324 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
325
326 /* CPU is free? */
327 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
328 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
329 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
330 break;
331 }
332 }
333
334 /*
335 * Do we want to migrate once?
336 * This may need a slightly better algorithm, or we
337 * might cache pingpong eternally for non-frequent
338 * threads.
339 */
340 if (domigrate && !bound) {
341 domigrate = false;
342 SCHED_MIGRATED(rcpu);
343 rumpuser_mutex_exit(rcpu->rcpu_mtx);
344 rcpu = getnextcpu();
345 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
346 continue;
347 }
348
349 /* Want CPU, wait until it's released an retry */
350 rcpu->rcpu_wanted++;
351 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
352 rcpu->rcpu_wanted--;
353 }
354 rumpuser_mutex_exit(rcpu->rcpu_mtx);
355
356 fastlane:
357 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci;
358 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
359 l->l_ncsw++;
360 l->l_stat = LSONPROC;
361
362 rcpu->rcpu_ci->ci_curlwp = l;
363 }
364
365 void
366 rump_unschedule()
367 {
368 struct lwp *l = rumpuser_curlwp();
369 #ifdef DIAGNOSTIC
370 int nlock;
371
372 KERNEL_UNLOCK_ALL(l, &nlock);
373 KASSERT(nlock == 0);
374 #endif
375
376 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
377 rump_unschedule_cpu(l);
378 l->l_mutex = &unruntime_lock;
379 l->l_stat = LSSTOP;
380
381 /*
382 * Check special conditions:
383 * 1) do we need to free the lwp which just unscheduled?
384 * (locking order: lwp0, cpu)
385 * 2) do we want to clear curlwp for the current host thread
386 */
387 if (__predict_false(l->l_flag & LW_WEXIT)) {
388 lwp0busy();
389
390 /* Now that we have lwp0, we can schedule a CPU again */
391 rump_schedule_cpu(l);
392
393 /* switch to lwp0. this frees the old thread */
394 KASSERT(l->l_flag & LW_WEXIT);
395 rump_lwproc_switch(&lwp0);
396
397 /* release lwp0 */
398 rump_unschedule_cpu(&lwp0);
399 lwp0.l_mutex = &unruntime_lock;
400 lwp0.l_pflag &= ~LP_RUNNING;
401 lwp0rele();
402 rumpuser_curlwpop(RUMPUSER_LWP_SET, NULL);
403
404 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
405 rumpuser_curlwpop(RUMPUSER_LWP_SET, NULL);
406 l->l_flag &= ~LW_RUMP_CLEAR;
407 }
408 }
409
410 void
411 rump_unschedule_cpu(struct lwp *l)
412 {
413
414 rump_unschedule_cpu_interlock(l, NULL);
415 }
416
417 void
418 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
419 {
420
421 if ((l->l_pflag & LP_INTR) == 0)
422 rump_softint_run(l->l_cpu);
423 rump_unschedule_cpu1(l, interlock);
424 }
425
426 void
427 rump_unschedule_cpu1(struct lwp *l, void *interlock)
428 {
429 struct rumpcpu *rcpu;
430 struct cpu_info *ci;
431 void *old;
432
433 ci = l->l_cpu;
434 ci->ci_curlwp = NULL;
435 rcpu = &rcpu_storage[ci-&rump_cpus[0]];
436
437 KASSERT(rcpu->rcpu_ci == ci);
438
439 /*
440 * Make sure all stores are seen before the CPU release. This
441 * is relevant only in the non-fastpath scheduling case, but
442 * we don't know here if that's going to happen, so need to
443 * expect the worst.
444 *
445 * If the scheduler interlock was requested by the caller, we
446 * need to obtain it before we release the CPU. Otherwise, we risk a
447 * race condition where another thread is scheduled onto the
448 * rump kernel CPU before our current thread can
449 * grab the interlock.
450 */
451 if (interlock == rcpu->rcpu_mtx)
452 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
453 else
454 membar_exit();
455
456 /* Release the CPU. */
457 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
458
459 /* No waiters? No problems. We're outta here. */
460 if (old == RCPULWP_BUSY) {
461 return;
462 }
463
464 KASSERT(old == RCPULWP_WANTED);
465
466 /*
467 * Ok, things weren't so snappy.
468 *
469 * Snailpath: take lock and signal anyone waiting for this CPU.
470 */
471
472 if (interlock != rcpu->rcpu_mtx)
473 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
474 if (rcpu->rcpu_wanted)
475 rumpuser_cv_broadcast(rcpu->rcpu_cv);
476 if (interlock != rcpu->rcpu_mtx)
477 rumpuser_mutex_exit(rcpu->rcpu_mtx);
478 }
479
480 /* Give up and retake CPU (perhaps a different one) */
481 void
482 yield()
483 {
484 struct lwp *l = curlwp;
485 int nlocks;
486
487 KERNEL_UNLOCK_ALL(l, &nlocks);
488 rump_unschedule_cpu(l);
489 rump_schedule_cpu(l);
490 KERNEL_LOCK(nlocks, l);
491 }
492
493 void
494 preempt()
495 {
496
497 yield();
498 }
499
500 bool
501 kpreempt(uintptr_t where)
502 {
503
504 return false;
505 }
506
507 /*
508 * There is no kernel thread preemption in rump currently. But call
509 * the implementing macros anyway in case they grow some side-effects
510 * down the road.
511 */
512 void
513 kpreempt_disable(void)
514 {
515
516 //KPREEMPT_DISABLE(curlwp);
517 }
518
519 void
520 kpreempt_enable(void)
521 {
522
523 //KPREEMPT_ENABLE(curlwp);
524 }
525
526 void
527 suspendsched(void)
528 {
529
530 /*
531 * Could wait until everyone is out and block further entries,
532 * but skip that for now.
533 */
534 }
535
536 void
537 sched_nice(struct proc *p, int level)
538 {
539
540 /* nothing to do for now */
541 }
542