Home | History | Annotate | Line # | Download | only in rumpkern
scheduler.c revision 1.44
      1 /*      $NetBSD: scheduler.c,v 1.44 2016/02/19 18:38:37 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.44 2016/02/19 18:38:37 pooka Exp $");
     30 
     31 #include <sys/param.h>
     32 #include <sys/atomic.h>
     33 #include <sys/cpu.h>
     34 #include <sys/kmem.h>
     35 #include <sys/mutex.h>
     36 #include <sys/namei.h>
     37 #include <sys/queue.h>
     38 #include <sys/select.h>
     39 #include <sys/systm.h>
     40 
     41 #include <rump-sys/kern.h>
     42 
     43 #include <rump/rumpuser.h>
     44 
     45 static struct rumpcpu {
     46 	/* needed in fastpath */
     47 	struct cpu_info *rcpu_ci;
     48 	void *rcpu_prevlwp;
     49 
     50 	/* needed in slowpath */
     51 	struct rumpuser_mtx *rcpu_mtx;
     52 	struct rumpuser_cv *rcpu_cv;
     53 	int rcpu_wanted;
     54 
     55 	/* offset 20 (P=4) or 36 (P=8) here */
     56 
     57 	/*
     58 	 * Some stats.  Not really that necessary, but we should
     59 	 * have room.  Note that these overflow quite fast, so need
     60 	 * to be collected often.
     61 	 */
     62 	unsigned int rcpu_fastpath;
     63 	unsigned int rcpu_slowpath;
     64 	unsigned int rcpu_migrated;
     65 
     66 	/* offset 32 (P=4) or 50 (P=8) */
     67 
     68 	int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
     69 } rcpu_storage[MAXCPUS];
     70 
     71 static inline struct rumpcpu *
     72 cpuinfo_to_rumpcpu(struct cpu_info *ci)
     73 {
     74 
     75 	return &rcpu_storage[cpu_index(ci)];
     76 }
     77 
     78 struct cpu_info rump_bootcpu;
     79 kcpuset_t *kcpuset_attached = NULL;
     80 kcpuset_t *kcpuset_running = NULL;
     81 int ncpu, ncpuonline;
     82 
     83 kmutex_t cpu_lock;
     84 
     85 #define RCPULWP_BUSY	((void *)-1)
     86 #define RCPULWP_WANTED	((void *)-2)
     87 
     88 static struct rumpuser_mtx *lwp0mtx;
     89 static struct rumpuser_cv *lwp0cv;
     90 static unsigned nextcpu;
     91 
     92 kmutex_t unruntime_lock; /* unruntime lwp lock.  practically unused */
     93 
     94 static bool lwp0isbusy = false;
     95 
     96 /*
     97  * Keep some stats.
     98  *
     99  * Keeping track of there is not really critical for speed, unless
    100  * stats happen to be on a different cache line (CACHE_LINE_SIZE is
    101  * really just a coarse estimate), so default for the performant case
    102  * (i.e. no stats).
    103  */
    104 #ifdef RUMPSCHED_STATS
    105 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
    106 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
    107 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
    108 #else
    109 #define SCHED_FASTPATH(rcpu)
    110 #define SCHED_SLOWPATH(rcpu)
    111 #define SCHED_MIGRATED(rcpu)
    112 #endif
    113 
    114 struct cpu_info *
    115 cpu_lookup(u_int index)
    116 {
    117 
    118 	return rcpu_storage[index].rcpu_ci;
    119 }
    120 
    121 static inline struct rumpcpu *
    122 getnextcpu(void)
    123 {
    124 	unsigned newcpu;
    125 
    126 	newcpu = atomic_inc_uint_nv(&nextcpu);
    127 	if (__predict_false(ncpu > UINT_MAX/2))
    128 		atomic_and_uint(&nextcpu, 0);
    129 	newcpu = newcpu % ncpu;
    130 
    131 	return &rcpu_storage[newcpu];
    132 }
    133 
    134 /* this could/should be mi_attach_cpu? */
    135 void
    136 rump_cpus_bootstrap(int *nump)
    137 {
    138 	int num = *nump;
    139 
    140 	if (num > MAXCPUS) {
    141 		aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) "
    142 		    "available (adjusted)\n", num, MAXCPUS);
    143 		num = MAXCPUS;
    144 	}
    145 
    146 	mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
    147 
    148 	kcpuset_create(&kcpuset_attached, true);
    149 	kcpuset_create(&kcpuset_running, true);
    150 
    151 	/* attach first cpu for bootstrap */
    152 	rump_cpu_attach(&rump_bootcpu);
    153 	ncpu = 1;
    154 	*nump = num;
    155 }
    156 
    157 void
    158 rump_scheduler_init(int numcpu)
    159 {
    160 	struct rumpcpu *rcpu;
    161 	struct cpu_info *ci;
    162 	int i;
    163 
    164 	rumpuser_mutex_init(&lwp0mtx, RUMPUSER_MTX_SPIN);
    165 	rumpuser_cv_init(&lwp0cv);
    166 	for (i = 0; i < numcpu; i++) {
    167 		if (i == 0) {
    168 			ci = &rump_bootcpu;
    169 		} else {
    170 			ci = kmem_zalloc(sizeof(*ci), KM_SLEEP);
    171 			ci->ci_index = i;
    172 		}
    173 
    174 		rcpu = &rcpu_storage[i];
    175 		rcpu->rcpu_ci = ci;
    176 		rcpu->rcpu_wanted = 0;
    177 		rumpuser_cv_init(&rcpu->rcpu_cv);
    178 		rumpuser_mutex_init(&rcpu->rcpu_mtx, RUMPUSER_MTX_SPIN);
    179 
    180 		ci->ci_schedstate.spc_mutex =
    181 		    mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
    182 		ci->ci_schedstate.spc_flags = SPCF_RUNNING;
    183 	}
    184 
    185 	mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_SCHED);
    186 }
    187 
    188 /*
    189  * condvar ops using scheduler lock as the rumpuser interlock.
    190  */
    191 void
    192 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
    193 {
    194 	struct lwp *l = curlwp;
    195 	struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu);
    196 
    197 	/* mutex will be taken and released in cpu schedule/unschedule */
    198 	rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
    199 }
    200 
    201 int
    202 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
    203 {
    204 	struct lwp *l = curlwp;
    205 	struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu);
    206 
    207 	/* mutex will be taken and released in cpu schedule/unschedule */
    208 	return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
    209 	    ts->tv_sec, ts->tv_nsec);
    210 }
    211 
    212 static void
    213 lwp0busy(void)
    214 {
    215 
    216 	/* busy lwp0 */
    217 	KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC);
    218 	rumpuser_mutex_enter_nowrap(lwp0mtx);
    219 	while (lwp0isbusy)
    220 		rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
    221 	lwp0isbusy = true;
    222 	rumpuser_mutex_exit(lwp0mtx);
    223 }
    224 
    225 static void
    226 lwp0rele(void)
    227 {
    228 
    229 	rumpuser_mutex_enter_nowrap(lwp0mtx);
    230 	KASSERT(lwp0isbusy == true);
    231 	lwp0isbusy = false;
    232 	rumpuser_cv_signal(lwp0cv);
    233 	rumpuser_mutex_exit(lwp0mtx);
    234 }
    235 
    236 /*
    237  * rump_schedule: ensure that the calling host thread has a valid lwp context.
    238  * ie. ensure that curlwp != NULL.  Also, ensure that there
    239  * a 1:1 mapping between the lwp and rump kernel cpu.
    240  */
    241 void
    242 rump_schedule()
    243 {
    244 	struct lwp *l;
    245 
    246 	/*
    247 	 * If there is no dedicated lwp, allocate a temp one and
    248 	 * set it to be free'd upon unschedule().  Use lwp0 context
    249 	 * for reserving the necessary resources.  Don't optimize
    250 	 * for this case -- anyone who cares about performance will
    251 	 * start a real thread.
    252 	 */
    253 	if (__predict_true((l = curlwp) != NULL)) {
    254 		rump_schedule_cpu(l);
    255 		LWP_CACHE_CREDS(l, l->l_proc);
    256 	} else {
    257 		lwp0busy();
    258 
    259 		/* schedule cpu and use lwp0 */
    260 		rump_schedule_cpu(&lwp0);
    261 		rump_lwproc_curlwp_set(&lwp0);
    262 
    263 		/* allocate thread, switch to it, and release lwp0 */
    264 		l = rump__lwproc_alloclwp(initproc);
    265 		rump_lwproc_switch(l);
    266 		lwp0rele();
    267 
    268 		/*
    269 		 * mark new thread dead-on-unschedule.  this
    270 		 * means that we'll be running with l_refcnt == 0.
    271 		 * relax, it's fine.
    272 		 */
    273 		rump_lwproc_releaselwp();
    274 	}
    275 }
    276 
    277 void
    278 rump_schedule_cpu(struct lwp *l)
    279 {
    280 
    281 	rump_schedule_cpu_interlock(l, NULL);
    282 }
    283 
    284 /*
    285  * Schedule a CPU.  This optimizes for the case where we schedule
    286  * the same thread often, and we have nCPU >= nFrequently-Running-Thread
    287  * (where CPU is virtual rump cpu, not host CPU).
    288  */
    289 void
    290 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
    291 {
    292 	struct rumpcpu *rcpu;
    293 	struct cpu_info *ci;
    294 	void *old;
    295 	bool domigrate;
    296 	bool bound = l->l_pflag & LP_BOUND;
    297 
    298 	l->l_stat = LSRUN;
    299 
    300 	/*
    301 	 * First, try fastpath: if we were the previous user of the
    302 	 * CPU, everything is in order cachewise and we can just
    303 	 * proceed to use it.
    304 	 *
    305 	 * If we are a different thread (i.e. CAS fails), we must go
    306 	 * through a memory barrier to ensure we get a truthful
    307 	 * view of the world.
    308 	 */
    309 
    310 	KASSERT(l->l_target_cpu != NULL);
    311 	rcpu = cpuinfo_to_rumpcpu(l->l_target_cpu);
    312 	if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
    313 		if (interlock == rcpu->rcpu_mtx)
    314 			rumpuser_mutex_exit(rcpu->rcpu_mtx);
    315 		SCHED_FASTPATH(rcpu);
    316 		/* jones, you're the man */
    317 		goto fastlane;
    318 	}
    319 
    320 	/*
    321 	 * Else, it's the slowpath for us.  First, determine if we
    322 	 * can migrate.
    323 	 */
    324 	if (ncpu == 1)
    325 		domigrate = false;
    326 	else
    327 		domigrate = true;
    328 
    329 	/* Take lock.  This acts as a load barrier too. */
    330 	if (interlock != rcpu->rcpu_mtx)
    331 		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
    332 
    333 	for (;;) {
    334 		SCHED_SLOWPATH(rcpu);
    335 		old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
    336 
    337 		/* CPU is free? */
    338 		if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
    339 			if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
    340 			    RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
    341 				break;
    342 			}
    343 		}
    344 
    345 		/*
    346 		 * Do we want to migrate once?
    347 		 * This may need a slightly better algorithm, or we
    348 		 * might cache pingpong eternally for non-frequent
    349 		 * threads.
    350 		 */
    351 		if (domigrate && !bound) {
    352 			domigrate = false;
    353 			SCHED_MIGRATED(rcpu);
    354 			rumpuser_mutex_exit(rcpu->rcpu_mtx);
    355 			rcpu = getnextcpu();
    356 			rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
    357 			continue;
    358 		}
    359 
    360 		/* Want CPU, wait until it's released an retry */
    361 		rcpu->rcpu_wanted++;
    362 		rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
    363 		rcpu->rcpu_wanted--;
    364 	}
    365 	rumpuser_mutex_exit(rcpu->rcpu_mtx);
    366 
    367  fastlane:
    368 	ci = rcpu->rcpu_ci;
    369 	l->l_cpu = l->l_target_cpu = ci;
    370 	l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
    371 	l->l_ncsw++;
    372 	l->l_stat = LSONPROC;
    373 
    374 	/*
    375 	 * No interrupts, so ci_curlwp === cpu_onproc.
    376 	 * Okay, we could make an attempt to not set cpu_onproc
    377 	 * in the case that an interrupt is scheduled immediately
    378 	 * after a user proc, but leave that for later.
    379 	 */
    380 	ci->ci_curlwp = ci->ci_data.cpu_onproc = l;
    381 }
    382 
    383 void
    384 rump_unschedule()
    385 {
    386 	struct lwp *l = curlwp;
    387 #ifdef DIAGNOSTIC
    388 	int nlock;
    389 
    390 	KERNEL_UNLOCK_ALL(l, &nlock);
    391 	KASSERT(nlock == 0);
    392 #endif
    393 
    394 	KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
    395 	rump_unschedule_cpu(l);
    396 	l->l_mutex = &unruntime_lock;
    397 	l->l_stat = LSSTOP;
    398 
    399 	/*
    400 	 * Check special conditions:
    401 	 *  1) do we need to free the lwp which just unscheduled?
    402 	 *     (locking order: lwp0, cpu)
    403 	 *  2) do we want to clear curlwp for the current host thread
    404 	 */
    405 	if (__predict_false(l->l_flag & LW_WEXIT)) {
    406 		lwp0busy();
    407 
    408 		/* Now that we have lwp0, we can schedule a CPU again */
    409 		rump_schedule_cpu(l);
    410 
    411 		/* switch to lwp0.  this frees the old thread */
    412 		KASSERT(l->l_flag & LW_WEXIT);
    413 		rump_lwproc_switch(&lwp0);
    414 
    415 		/* release lwp0 */
    416 		rump_unschedule_cpu(&lwp0);
    417 		lwp0.l_mutex = &unruntime_lock;
    418 		lwp0.l_pflag &= ~LP_RUNNING;
    419 		lwp0rele();
    420 		rump_lwproc_curlwp_clear(&lwp0);
    421 
    422 	} else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
    423 		rump_lwproc_curlwp_clear(l);
    424 		l->l_flag &= ~LW_RUMP_CLEAR;
    425 	}
    426 }
    427 
    428 void
    429 rump_unschedule_cpu(struct lwp *l)
    430 {
    431 
    432 	rump_unschedule_cpu_interlock(l, NULL);
    433 }
    434 
    435 void
    436 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
    437 {
    438 
    439 	if ((l->l_pflag & LP_INTR) == 0)
    440 		rump_softint_run(l->l_cpu);
    441 	rump_unschedule_cpu1(l, interlock);
    442 }
    443 
    444 void
    445 rump_unschedule_cpu1(struct lwp *l, void *interlock)
    446 {
    447 	struct rumpcpu *rcpu;
    448 	struct cpu_info *ci;
    449 	void *old;
    450 
    451 	ci = l->l_cpu;
    452 	ci->ci_curlwp = ci->ci_data.cpu_onproc = NULL;
    453 	rcpu = cpuinfo_to_rumpcpu(ci);
    454 
    455 	KASSERT(rcpu->rcpu_ci == ci);
    456 
    457 	/*
    458 	 * Make sure all stores are seen before the CPU release.  This
    459 	 * is relevant only in the non-fastpath scheduling case, but
    460 	 * we don't know here if that's going to happen, so need to
    461 	 * expect the worst.
    462 	 *
    463 	 * If the scheduler interlock was requested by the caller, we
    464 	 * need to obtain it before we release the CPU.  Otherwise, we risk a
    465 	 * race condition where another thread is scheduled onto the
    466 	 * rump kernel CPU before our current thread can
    467 	 * grab the interlock.
    468 	 */
    469 	if (interlock == rcpu->rcpu_mtx)
    470 		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
    471 	else
    472 		membar_exit();
    473 
    474 	/* Release the CPU. */
    475 	old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
    476 
    477 	/* No waiters?  No problems.  We're outta here. */
    478 	if (old == RCPULWP_BUSY) {
    479 		return;
    480 	}
    481 
    482 	KASSERT(old == RCPULWP_WANTED);
    483 
    484 	/*
    485 	 * Ok, things weren't so snappy.
    486 	 *
    487 	 * Snailpath: take lock and signal anyone waiting for this CPU.
    488 	 */
    489 
    490 	if (interlock != rcpu->rcpu_mtx)
    491 		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
    492 	if (rcpu->rcpu_wanted)
    493 		rumpuser_cv_broadcast(rcpu->rcpu_cv);
    494 	if (interlock != rcpu->rcpu_mtx)
    495 		rumpuser_mutex_exit(rcpu->rcpu_mtx);
    496 }
    497 
    498 /* Give up and retake CPU (perhaps a different one) */
    499 void
    500 yield()
    501 {
    502 	struct lwp *l = curlwp;
    503 	int nlocks;
    504 
    505 	KERNEL_UNLOCK_ALL(l, &nlocks);
    506 	rump_unschedule_cpu(l);
    507 	rump_schedule_cpu(l);
    508 	KERNEL_LOCK(nlocks, l);
    509 }
    510 
    511 void
    512 preempt()
    513 {
    514 
    515 	yield();
    516 }
    517 
    518 bool
    519 kpreempt(uintptr_t where)
    520 {
    521 
    522 	return false;
    523 }
    524 
    525 /*
    526  * There is no kernel thread preemption in rump currently.  But call
    527  * the implementing macros anyway in case they grow some side-effects
    528  * down the road.
    529  */
    530 void
    531 kpreempt_disable(void)
    532 {
    533 
    534 	KPREEMPT_DISABLE(curlwp);
    535 }
    536 
    537 void
    538 kpreempt_enable(void)
    539 {
    540 
    541 	KPREEMPT_ENABLE(curlwp);
    542 }
    543 
    544 bool
    545 kpreempt_disabled(void)
    546 {
    547 #if 0
    548 	const lwp_t *l = curlwp;
    549 
    550 	return l->l_nopreempt != 0 || l->l_stat == LSZOMB ||
    551 	    (l->l_flag & LW_IDLE) != 0 || cpu_kpreempt_disabled();
    552 #endif
    553 	/* XXX: emulate cpu_kpreempt_disabled() */
    554 	return true;
    555 }
    556 
    557 void
    558 suspendsched(void)
    559 {
    560 
    561 	/*
    562 	 * Could wait until everyone is out and block further entries,
    563 	 * but skip that for now.
    564 	 */
    565 }
    566 
    567 void
    568 sched_nice(struct proc *p, int level)
    569 {
    570 
    571 	/* nothing to do for now */
    572 }
    573 
    574 void
    575 sched_enqueue(struct lwp *l, bool swtch)
    576 {
    577 
    578 	if (swtch)
    579 		panic("sched_enqueue with switcheroo");
    580 	rump_thread_allow(l);
    581 }
    582 
    583 void
    584 sched_dequeue(struct lwp *l)
    585 {
    586 
    587 	panic("sched_dequeue not implemented");
    588 }
    589