scheduler.c revision 1.16 1 /* $NetBSD: scheduler.c,v 1.16 2010/05/28 18:17:24 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.16 2010/05/28 18:17:24 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/cpu.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/select.h>
39 #include <sys/systm.h>
40
41 #include <rump/rumpuser.h>
42
43 #include "rump_private.h"
44
45 static struct cpu_info rump_cpus[MAXCPUS];
46 static struct rumpcpu {
47 /* needed in fastpath */
48 struct cpu_info *rcpu_ci;
49 void *rcpu_prevlwp;
50
51 /* needed in slowpath */
52 struct rumpuser_mtx *rcpu_mtx;
53 struct rumpuser_cv *rcpu_cv;
54 int rcpu_wanted;
55
56 /* offset 20 (P=4) or 36 (P=8) here */
57
58 /*
59 * Some stats. Not really that necessary, but we should
60 * have room. Note that these overflow quite fast, so need
61 * to be collected often.
62 */
63 unsigned int rcpu_fastpath;
64 unsigned int rcpu_slowpath;
65 unsigned int rcpu_migrated;
66
67 /* offset 32 (P=4) or 50 (P=8) */
68
69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
70 } rcpu_storage[MAXCPUS];
71 struct cpu_info *rump_cpu = &rump_cpus[0];
72 int ncpu;
73
74 #define RCPULWP_BUSY ((void *)-1)
75 #define RCPULWP_WANTED ((void *)-2)
76
77 static struct rumpuser_mtx *lwp0mtx;
78 static struct rumpuser_cv *lwp0cv;
79 static unsigned nextcpu;
80
81 static bool lwp0busy = false;
82
83 /*
84 * Keep some stats.
85 *
86 * Keeping track of there is not really critical for speed, unless
87 * stats happen to be on a different cache line (CACHE_LINE_SIZE is
88 * really just a coarse estimate), so default for the performant case
89 * (i.e. no stats).
90 */
91 #ifdef RUMPSCHED_STATS
92 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
93 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
94 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
95 #else
96 #define SCHED_FASTPATH(rcpu)
97 #define SCHED_SLOWPATH(rcpu)
98 #define SCHED_MIGRATED(rcpu)
99 #endif
100
101 struct cpu_info *
102 cpu_lookup(u_int index)
103 {
104
105 return &rump_cpus[index];
106 }
107
108 static inline struct rumpcpu *
109 getnextcpu(void)
110 {
111 unsigned newcpu;
112
113 newcpu = atomic_inc_uint_nv(&nextcpu);
114 if (__predict_false(ncpu > UINT_MAX/2))
115 atomic_and_uint(&nextcpu, 0);
116 newcpu = newcpu % ncpu;
117
118 return &rcpu_storage[newcpu];
119 }
120
121 /* this could/should be mi_attach_cpu? */
122 void
123 rump_cpus_bootstrap(int num)
124 {
125 struct rumpcpu *rcpu;
126 struct cpu_info *ci;
127 int i;
128
129 if (num > MAXCPUS) {
130 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) available\n",
131 num, MAXCPUS);
132 num = MAXCPUS;
133 }
134
135 for (i = 0; i < num; i++) {
136 rcpu = &rcpu_storage[i];
137 ci = &rump_cpus[i];
138 ci->ci_index = i;
139 rump_cpu_attach(ci);
140 ncpu++;
141 }
142 }
143
144 void
145 rump_scheduler_init()
146 {
147 struct rumpcpu *rcpu;
148 struct cpu_info *ci;
149 int i;
150
151 rumpuser_mutex_init(&lwp0mtx);
152 rumpuser_cv_init(&lwp0cv);
153 for (i = 0; i < ncpu; i++) {
154 rcpu = &rcpu_storage[i];
155 ci = &rump_cpus[i];
156 rcpu->rcpu_ci = ci;
157 ci->ci_schedstate.spc_mutex =
158 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
159 ci->ci_schedstate.spc_flags = SPCF_RUNNING;
160 rcpu->rcpu_wanted = 0;
161 rumpuser_cv_init(&rcpu->rcpu_cv);
162 rumpuser_mutex_init(&rcpu->rcpu_mtx);
163 }
164 }
165
166 /*
167 * condvar ops using scheduler lock as the rumpuser interlock.
168 */
169 void
170 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
171 {
172 struct lwp *l = curlwp;
173 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
174
175 /* mutex will be taken and released in cpu schedule/unschedule */
176 rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
177 }
178
179 int
180 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
181 {
182 struct lwp *l = curlwp;
183 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
184
185 /* mutex will be taken and released in cpu schedule/unschedule */
186 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
187 ts->tv_sec, ts->tv_nsec);
188 }
189
190 void
191 rump_schedule()
192 {
193 struct lwp *l;
194
195 /*
196 * If there is no dedicated lwp, allocate a temp one and
197 * set it to be free'd upon unschedule(). Use lwp0 context
198 * for reserving the necessary resources. Don't optimize
199 * for this case -- anyone who cares about performance will
200 * start a real thread.
201 */
202 l = rumpuser_get_curlwp();
203 if (l == NULL) {
204 /* busy lwp0 */
205 rumpuser_mutex_enter_nowrap(lwp0mtx);
206 while (lwp0busy)
207 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
208 lwp0busy = true;
209 rumpuser_mutex_exit(lwp0mtx);
210
211 /* schedule cpu and use lwp0 */
212 rump_schedule_cpu(&lwp0);
213 rumpuser_set_curlwp(&lwp0);
214 l = rump_lwp_alloc(0, rump_nextlid());
215
216 /* release lwp0 */
217 rump_lwp_switch(l);
218 rumpuser_mutex_enter_nowrap(lwp0mtx);
219 lwp0busy = false;
220 rumpuser_cv_signal(lwp0cv);
221 rumpuser_mutex_exit(lwp0mtx);
222
223 /* mark new lwp as dead-on-exit */
224 rump_lwp_release(l);
225 } else {
226 rump_schedule_cpu(l);
227 }
228 }
229
230 void
231 rump_schedule_cpu(struct lwp *l)
232 {
233
234 rump_schedule_cpu_interlock(l, NULL);
235 }
236
237 /*
238 * Schedule a CPU. This optimizes for the case where we schedule
239 * the same thread often, and we have nCPU >= nFrequently-Running-Thread
240 * (where CPU is virtual rump cpu, not host CPU).
241 */
242 void
243 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
244 {
245 struct rumpcpu *rcpu;
246 void *old;
247 bool domigrate;
248 bool bound = l->l_pflag & LP_BOUND;
249
250 /*
251 * First, try fastpath: if we were the previous user of the
252 * CPU, everything is in order cachewise and we can just
253 * proceed to use it.
254 *
255 * If we are a different thread (i.e. CAS fails), we must go
256 * through a memory barrier to ensure we get a truthful
257 * view of the world.
258 */
259
260 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]];
261 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
262 if (__predict_true(interlock == rcpu->rcpu_mtx))
263 rumpuser_mutex_exit(rcpu->rcpu_mtx);
264 SCHED_FASTPATH(rcpu);
265 /* jones, you're the man */
266 goto fastlane;
267 }
268
269 /*
270 * Else, it's the slowpath for us. First, determine if we
271 * can migrate.
272 */
273 if (ncpu == 1)
274 domigrate = false;
275 else
276 domigrate = true;
277
278 /* Take lock. This acts as a load barrier too. */
279 if (__predict_true(interlock != rcpu->rcpu_mtx))
280 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
281
282 for (;;) {
283 SCHED_SLOWPATH(rcpu);
284 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
285
286 /* CPU is free? */
287 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
288 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
289 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
290 break;
291 }
292 }
293
294 /*
295 * Do we want to migrate once?
296 * This may need a slightly better algorithm, or we
297 * might cache pingpong eternally for non-frequent
298 * threads.
299 */
300 if (domigrate && !bound) {
301 domigrate = false;
302 SCHED_MIGRATED(rcpu);
303 rumpuser_mutex_exit(rcpu->rcpu_mtx);
304 rcpu = getnextcpu();
305 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
306 continue;
307 }
308
309 /* Want CPU, wait until it's released an retry */
310 rcpu->rcpu_wanted++;
311 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
312 rcpu->rcpu_wanted--;
313 }
314 rumpuser_mutex_exit(rcpu->rcpu_mtx);
315
316 fastlane:
317 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci;
318 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
319 }
320
321 void
322 rump_unschedule()
323 {
324 struct lwp *l;
325
326 l = rumpuser_get_curlwp();
327 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
328 rump_unschedule_cpu(l);
329 l->l_mutex = NULL;
330
331 /*
332 * If we're using a temp lwp, need to take lwp0 for rump_lwp_free().
333 * (we could maybe cache idle lwp's to avoid constant bouncing)
334 */
335 if (l->l_flag & LW_WEXIT) {
336 rumpuser_set_curlwp(NULL);
337
338 /* busy lwp0 */
339 rumpuser_mutex_enter_nowrap(lwp0mtx);
340 while (lwp0busy)
341 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
342 lwp0busy = true;
343 rumpuser_mutex_exit(lwp0mtx);
344
345 rump_schedule_cpu(&lwp0);
346 rumpuser_set_curlwp(&lwp0);
347 rump_lwp_free(l);
348 rump_unschedule_cpu(&lwp0);
349 rumpuser_set_curlwp(NULL);
350
351 rumpuser_mutex_enter_nowrap(lwp0mtx);
352 lwp0busy = false;
353 rumpuser_cv_signal(lwp0cv);
354 rumpuser_mutex_exit(lwp0mtx);
355 }
356 }
357
358 void
359 rump_unschedule_cpu(struct lwp *l)
360 {
361
362 rump_unschedule_cpu_interlock(l, NULL);
363 }
364
365 void
366 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
367 {
368
369 if ((l->l_pflag & LP_INTR) == 0)
370 rump_softint_run(l->l_cpu);
371 rump_unschedule_cpu1(l, interlock);
372 }
373
374 void
375 rump_unschedule_cpu1(struct lwp *l, void *interlock)
376 {
377 struct rumpcpu *rcpu;
378 struct cpu_info *ci;
379 void *old;
380
381 ci = l->l_cpu;
382 l->l_cpu = NULL;
383 rcpu = &rcpu_storage[ci-&rump_cpus[0]];
384
385 KASSERT(rcpu->rcpu_ci == ci);
386
387 /*
388 * Make sure all stores are seen before the CPU release. This
389 * is relevant only in the non-fastpath scheduling case, but
390 * we don't know here if that's going to happen, so need to
391 * expect the worst.
392 */
393 membar_exit();
394
395 /* Release the CPU. */
396 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
397
398 /* No waiters? No problems. We're outta here. */
399 if (old == RCPULWP_BUSY) {
400 /* Was the scheduler interlock requested? */
401 if (__predict_false(interlock == rcpu->rcpu_mtx))
402 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
403 return;
404 }
405
406 KASSERT(old == RCPULWP_WANTED);
407
408 /*
409 * Ok, things weren't so snappy.
410 *
411 * Snailpath: take lock and signal anyone waiting for this CPU.
412 */
413
414 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
415 if (rcpu->rcpu_wanted)
416 rumpuser_cv_broadcast(rcpu->rcpu_cv);
417
418 if (__predict_true(interlock != rcpu->rcpu_mtx))
419 rumpuser_mutex_exit(rcpu->rcpu_mtx);
420 }
421
422 /* Give up and retake CPU (perhaps a different one) */
423 void
424 yield()
425 {
426 struct lwp *l = curlwp;
427 int nlocks;
428
429 KERNEL_UNLOCK_ALL(l, &nlocks);
430 rump_unschedule_cpu(l);
431 rump_schedule_cpu(l);
432 KERNEL_LOCK(nlocks, l);
433 }
434
435 void
436 preempt()
437 {
438
439 yield();
440 }
441
442 bool
443 kpreempt(uintptr_t where)
444 {
445
446 return false;
447 }
448
449 /*
450 * There is no kernel thread preemption in rump currently. But call
451 * the implementing macros anyway in case they grow some side-effects
452 * down the road.
453 */
454 void
455 kpreempt_disable(void)
456 {
457
458 KPREEMPT_DISABLE(curlwp);
459 }
460
461 void
462 kpreempt_enable(void)
463 {
464
465 KPREEMPT_ENABLE(curlwp);
466 }
467
468 void
469 suspendsched(void)
470 {
471
472 /*
473 * Could wait until everyone is out and block further entries,
474 * but skip that for now.
475 */
476 }
477
478 void
479 sched_nice(struct proc *p, int level)
480 {
481
482 /* nothing to do for now */
483 }
484