Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.11
      1 /*	$NetBSD: linux_work.c,v 1.11 2018/08/27 14:48:47 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2013 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.11 2018/08/27 14:48:47 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/param.h>
     37 #include <sys/atomic.h>
     38 #include <sys/callout.h>
     39 #include <sys/condvar.h>
     40 #include <sys/errno.h>
     41 #include <sys/intr.h>
     42 #include <sys/kmem.h>
     43 #include <sys/mutex.h>
     44 #include <sys/queue.h>
     45 #include <sys/systm.h>
     46 #include <sys/workqueue.h>
     47 #include <sys/cpu.h>
     48 
     49 #include <machine/lock.h>
     50 
     51 #include <linux/workqueue.h>
     52 
     53 struct workqueue_struct {
     54 	struct workqueue		*wq_workqueue;
     55 
     56 	struct rb_node			wq_node;
     57 	struct lwp			*wq_lwp;
     58 
     59 	/* XXX The following should all be per-CPU.  */
     60 	kmutex_t			wq_lock;
     61 
     62 	/*
     63 	 * Condvar for when any state related to this workqueue
     64 	 * changes.  XXX Could split this into multiple condvars for
     65 	 * different purposes, but whatever...
     66 	 */
     67 	kcondvar_t			wq_cv;
     68 
     69 	TAILQ_HEAD(, delayed_work)	wq_delayed;
     70 	struct work_struct		*wq_current_work;
     71 };
     72 
     73 static void	linux_work_lock_init(struct work_struct *);
     74 static void	linux_work_lock(struct work_struct *);
     75 static void	linux_work_unlock(struct work_struct *);
     76 static bool	linux_work_locked(struct work_struct *) __diagused;
     77 
     78 static void	linux_wq_barrier(struct work_struct *);
     79 
     80 static void	linux_wait_for_cancelled_work(struct work_struct *);
     81 static void	linux_wait_for_invoked_work(struct work_struct *);
     82 static void	linux_worker(struct work *, void *);
     83 
     84 static void	linux_cancel_delayed_work_callout(struct delayed_work *, bool);
     85 static void	linux_wait_for_delayed_cancelled_work(struct delayed_work *);
     86 static void	linux_worker_intr(void *);
     87 
     88 struct workqueue_struct		*system_wq;
     89 struct workqueue_struct		*system_long_wq;
     90 struct workqueue_struct		*system_power_efficient_wq;
     91 
     92 static struct {
     93 	kmutex_t		lock;
     94 	struct rb_tree		tree;
     95 } workqueues __cacheline_aligned;
     96 
     97 static const rb_tree_ops_t	workqueues_rb_ops;
     98 
     99 int
    100 linux_workqueue_init(void)
    101 {
    102 
    103 	mutex_init(&workqueues.lock, MUTEX_DEFAULT, IPL_VM);
    104 	rb_tree_init(&workqueues.tree, &workqueues_rb_ops);
    105 
    106 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
    107 	if (system_wq == NULL)
    108 		goto fail0;
    109 
    110 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
    111 	if (system_long_wq == NULL)
    112 		goto fail1;
    113 
    114 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    115 	if (system_long_wq == NULL)
    116 		goto fail2;
    117 
    118 	return 0;
    119 
    120 fail3: __unused
    121 	destroy_workqueue(system_power_efficient_wq);
    122 fail2:	destroy_workqueue(system_long_wq);
    123 fail1:	destroy_workqueue(system_wq);
    124 fail0:	mutex_destroy(&workqueues.lock);
    125 	return ENOMEM;
    126 }
    127 
    128 void
    129 linux_workqueue_fini(void)
    130 {
    131 
    132 	destroy_workqueue(system_long_wq);
    133 	system_long_wq = NULL;
    134 	destroy_workqueue(system_wq);
    135 	system_wq = NULL;
    136 	KASSERT(RB_TREE_MIN(&workqueues.tree) == NULL);
    137 	mutex_destroy(&workqueues.lock);
    138 }
    139 
    140 /*
    142  * Table of workqueue LWPs for validation -- assumes there is only one
    143  * thread per workqueue.
    144  *
    145  * XXX Mega-kludgerific!
    146  */
    147 
    148 static int
    149 compare_nodes(void *cookie, const void *va, const void *vb)
    150 {
    151 	const struct workqueue_struct *wa = va;
    152 	const struct workqueue_struct *wb = vb;
    153 
    154 	if ((uintptr_t)wa->wq_lwp < (uintptr_t)wb->wq_lwp)
    155 		return -1;
    156 	if ((uintptr_t)wa->wq_lwp > (uintptr_t)wb->wq_lwp)
    157 		return +1;
    158 	return 0;
    159 }
    160 
    161 static int
    162 compare_key(void *cookie, const void *vn, const void *vk)
    163 {
    164 	const struct workqueue_struct *w = vn;
    165 	const struct lwp *lwp = vk;
    166 
    167 	if ((uintptr_t)w->wq_lwp < (uintptr_t)lwp)
    168 		return -1;
    169 	if ((uintptr_t)w->wq_lwp > (uintptr_t)lwp)
    170 		return +1;
    171 	return 0;
    172 }
    173 
    174 static const rb_tree_ops_t workqueues_rb_ops = {
    175 	.rbto_compare_nodes = compare_nodes,
    176 	.rbto_compare_key = compare_key,
    177 	.rbto_node_offset = offsetof(struct workqueue_struct, wq_node),
    178 };
    179 
    180 struct wq_whoami_work {
    181 	kmutex_t		www_lock;
    182 	kcondvar_t		www_cv;
    183 	struct workqueue_struct	*www_wq;
    184 	struct work_struct	www_work;
    185 };
    186 
    187 static void
    188 workqueue_whoami_work(struct work_struct *work)
    189 {
    190 	struct wq_whoami_work *www = container_of(work, struct wq_whoami_work,
    191 	    www_work);
    192 	struct workqueue_struct *wq = www->www_wq;
    193 
    194 	KASSERT(wq->wq_lwp == NULL);
    195 	wq->wq_lwp = curlwp;
    196 
    197 	mutex_enter(&www->www_lock);
    198 	cv_broadcast(&www->www_cv);
    199 	mutex_exit(&www->www_lock);
    200 }
    201 
    202 static void
    203 workqueue_whoami(struct workqueue_struct *wq)
    204 {
    205 	struct wq_whoami_work www;
    206 	struct workqueue_struct *collision __diagused;
    207 
    208 	mutex_init(&www.www_lock, MUTEX_DEFAULT, IPL_NONE);
    209 	cv_init(&www.www_cv, "wqwhoami");
    210 
    211 	INIT_WORK(&www.www_work, &workqueue_whoami_work);
    212 	queue_work(wq, &www.www_work);
    213 
    214 	mutex_enter(&www.www_lock);
    215 	while (wq->wq_lwp == NULL)
    216 		cv_wait(&www.www_cv, &www.www_lock);
    217 	mutex_exit(&www.www_lock);
    218 
    219 	cv_destroy(&www.www_cv);
    220 	mutex_destroy(&www.www_lock);
    221 
    222 	mutex_enter(&workqueues.lock);
    223 	collision = rb_tree_insert_node(&workqueues.tree, wq);
    224 	mutex_exit(&workqueues.lock);
    225 
    226 	KASSERT(collision == wq);
    227 }
    228 
    229 struct work_struct *
    230 current_work(void)
    231 {
    232 	struct workqueue_struct *wq;
    233 	struct work_struct *work;
    234 
    235 	mutex_enter(&workqueues.lock);
    236 	wq = rb_tree_find_node(&workqueues.tree, curlwp);
    237 	work = (wq == NULL ? NULL : wq->wq_current_work);
    238 	mutex_exit(&workqueues.lock);
    239 
    240 	return work;
    241 }
    242 
    243 /*
    245  * Workqueues
    246  */
    247 
    248 struct workqueue_struct *
    249 alloc_ordered_workqueue(const char *name, int linux_flags)
    250 {
    251 	struct workqueue_struct *wq;
    252 	int flags = WQ_MPSAFE;
    253 	int error;
    254 
    255 	KASSERT(linux_flags == 0);
    256 
    257 	wq = kmem_alloc(sizeof(*wq), KM_SLEEP);
    258 	error = workqueue_create(&wq->wq_workqueue, name, &linux_worker,
    259 	    wq, PRI_NONE, IPL_VM, flags);
    260 	if (error) {
    261 		kmem_free(wq, sizeof(*wq));
    262 		return NULL;
    263 	}
    264 
    265 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
    266 	cv_init(&wq->wq_cv, name);
    267 	TAILQ_INIT(&wq->wq_delayed);
    268 	wq->wq_current_work = NULL;
    269 
    270 	if (0) {		/* XXX broken garbage */
    271 		workqueue_whoami(wq);
    272 		KASSERT(wq->wq_lwp != NULL);
    273 	}
    274 
    275 	return wq;
    276 }
    277 
    278 void
    279 destroy_workqueue(struct workqueue_struct *wq)
    280 {
    281 
    282 	/*
    283 	 * Cancel all delayed work.
    284 	 */
    285 	for (;;) {
    286 		struct delayed_work *dw;
    287 
    288 		mutex_enter(&wq->wq_lock);
    289 		if (TAILQ_EMPTY(&wq->wq_delayed)) {
    290 			dw = NULL;
    291 		} else {
    292 			dw = TAILQ_FIRST(&wq->wq_delayed);
    293 			TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    294 		}
    295 		mutex_exit(&wq->wq_lock);
    296 
    297 		if (dw == NULL)
    298 			break;
    299 
    300 		cancel_delayed_work_sync(dw);
    301 	}
    302 
    303 	/*
    304 	 * workqueue_destroy empties the queue; we need not wait for
    305 	 * completion explicitly.  However, we can't destroy the
    306 	 * condvar or mutex until this is done.
    307 	 */
    308 	workqueue_destroy(wq->wq_workqueue);
    309 	KASSERT(wq->wq_current_work == NULL);
    310 	wq->wq_workqueue = NULL;
    311 
    312 	cv_destroy(&wq->wq_cv);
    313 	mutex_destroy(&wq->wq_lock);
    314 
    315 	kmem_free(wq, sizeof(*wq));
    316 }
    317 
    318 /*
    320  * Flush
    321  *
    322  * Note:  This doesn't cancel or wait for delayed work.  This seems to
    323  * match what Linux does (or, doesn't do).
    324  */
    325 
    326 void
    327 flush_scheduled_work(void)
    328 {
    329 	flush_workqueue(system_wq);
    330 }
    331 
    332 struct wq_flush_work {
    333 	struct work_struct	wqfw_work;
    334 	struct wq_flush		*wqfw_flush;
    335 };
    336 
    337 struct wq_flush {
    338 	kmutex_t	wqf_lock;
    339 	kcondvar_t	wqf_cv;
    340 	unsigned int	wqf_n;
    341 };
    342 
    343 void
    344 flush_work(struct work_struct *work)
    345 {
    346 	struct workqueue_struct *const wq = work->w_wq;
    347 
    348 	if (wq != NULL)
    349 		flush_workqueue(wq);
    350 }
    351 
    352 void
    353 flush_workqueue(struct workqueue_struct *wq)
    354 {
    355 	static const struct wq_flush zero_wqf;
    356 	struct wq_flush wqf = zero_wqf;
    357 
    358 	mutex_init(&wqf.wqf_lock, MUTEX_DEFAULT, IPL_NONE);
    359 	cv_init(&wqf.wqf_cv, "lnxwflsh");
    360 
    361 	if (1) {
    362 		struct wq_flush_work *const wqfw = kmem_zalloc(sizeof(*wqfw),
    363 		    KM_SLEEP);
    364 
    365 		wqf.wqf_n = 1;
    366 		wqfw->wqfw_flush = &wqf;
    367 		INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
    368 		wqfw->wqfw_work.w_wq = wq;
    369 		wqfw->wqfw_work.w_state = WORK_PENDING;
    370 		workqueue_enqueue(wq->wq_workqueue, &wqfw->wqfw_work.w_wk,
    371 		    NULL);
    372 	} else {
    373 		struct cpu_info *ci;
    374 		CPU_INFO_ITERATOR cii;
    375 		struct wq_flush_work *wqfw;
    376 
    377 		panic("per-CPU Linux workqueues don't work yet!");
    378 
    379 		wqf.wqf_n = 0;
    380 		for (CPU_INFO_FOREACH(cii, ci)) {
    381 			wqfw = kmem_zalloc(sizeof(*wqfw), KM_SLEEP);
    382 			mutex_enter(&wqf.wqf_lock);
    383 			wqf.wqf_n++;
    384 			mutex_exit(&wqf.wqf_lock);
    385 			wqfw->wqfw_flush = &wqf;
    386 			INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
    387 			wqfw->wqfw_work.w_state = WORK_PENDING;
    388 			wqfw->wqfw_work.w_wq = wq;
    389 			workqueue_enqueue(wq->wq_workqueue,
    390 			    &wqfw->wqfw_work.w_wk, ci);
    391 		}
    392 	}
    393 
    394 	mutex_enter(&wqf.wqf_lock);
    395 	while (0 < wqf.wqf_n)
    396 		cv_wait(&wqf.wqf_cv, &wqf.wqf_lock);
    397 	mutex_exit(&wqf.wqf_lock);
    398 
    399 	cv_destroy(&wqf.wqf_cv);
    400 	mutex_destroy(&wqf.wqf_lock);
    401 }
    402 
    403 static void
    404 linux_wq_barrier(struct work_struct *work)
    405 {
    406 	struct wq_flush_work *const wqfw = container_of(work,
    407 	    struct wq_flush_work, wqfw_work);
    408 	struct wq_flush *const wqf = wqfw->wqfw_flush;
    409 
    410 	mutex_enter(&wqf->wqf_lock);
    411 	if (--wqf->wqf_n == 0)
    412 		cv_broadcast(&wqf->wqf_cv);
    413 	mutex_exit(&wqf->wqf_lock);
    414 
    415 	kmem_free(wqfw, sizeof(*wqfw));
    416 }
    417 
    418 /*
    420  * Work locking
    421  *
    422  * We use __cpu_simple_lock(9) rather than mutex(9) because Linux code
    423  * does not destroy work, so there is nowhere to call mutex_destroy.
    424  *
    425  * XXX This is getting out of hand...  Really, work items shouldn't
    426  * have locks in them at all; instead the workqueues should.
    427  */
    428 
    429 static void
    430 linux_work_lock_init(struct work_struct *work)
    431 {
    432 
    433 	__cpu_simple_lock_init(&work->w_lock);
    434 }
    435 
    436 static void
    437 linux_work_lock(struct work_struct *work)
    438 {
    439 	struct cpu_info *ci;
    440 	int cnt, s;
    441 
    442 	/* XXX Copypasta of MUTEX_SPIN_SPLRAISE.  */
    443 	s = splvm();
    444 	ci = curcpu();
    445 	cnt = ci->ci_mtx_count--;
    446 	__insn_barrier();
    447 	if (cnt == 0)
    448 		ci->ci_mtx_oldspl = s;
    449 
    450 	__cpu_simple_lock(&work->w_lock);
    451 }
    452 
    453 static void
    454 linux_work_unlock(struct work_struct *work)
    455 {
    456 	struct cpu_info *ci;
    457 	int s;
    458 
    459 	__cpu_simple_unlock(&work->w_lock);
    460 
    461 	/* XXX Copypasta of MUTEX_SPIN_SPLRESTORE.  */
    462 	ci = curcpu();
    463 	s = ci->ci_mtx_oldspl;
    464 	__insn_barrier();
    465 	if (++ci->ci_mtx_count == 0)
    466 		splx(s);
    467 }
    468 
    469 static bool __diagused
    470 linux_work_locked(struct work_struct *work)
    471 {
    472 	return __SIMPLELOCK_LOCKED_P(&work->w_lock);
    473 }
    474 
    475 /*
    477  * Work
    478  */
    479 
    480 void
    481 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    482 {
    483 
    484 	linux_work_lock_init(work);
    485 	work->w_state = WORK_IDLE;
    486 	work->w_wq = NULL;
    487 	work->func = fn;
    488 }
    489 
    490 bool
    491 schedule_work(struct work_struct *work)
    492 {
    493 	return queue_work(system_wq, work);
    494 }
    495 
    496 bool
    497 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    498 {
    499 	/* True if we put it on the queue, false if it was already there.  */
    500 	bool newly_queued;
    501 
    502 	KASSERT(wq != NULL);
    503 
    504 	linux_work_lock(work);
    505 	switch (work->w_state) {
    506 	case WORK_IDLE:
    507 	case WORK_INVOKED:
    508 		work->w_state = WORK_PENDING;
    509 		work->w_wq = wq;
    510 		workqueue_enqueue(wq->wq_workqueue, &work->w_wk, NULL);
    511 		newly_queued = true;
    512 		break;
    513 
    514 	case WORK_DELAYED:
    515 		panic("queue_work(delayed work %p)", work);
    516 		break;
    517 
    518 	case WORK_PENDING:
    519 		KASSERT(work->w_wq == wq);
    520 		newly_queued = false;
    521 		break;
    522 
    523 	case WORK_CANCELLED:
    524 		newly_queued = false;
    525 		break;
    526 
    527 	case WORK_DELAYED_CANCELLED:
    528 		panic("queue_work(delayed work %p)", work);
    529 		break;
    530 
    531 	default:
    532 		panic("work %p in bad state: %d", work, (int)work->w_state);
    533 		break;
    534 	}
    535 	linux_work_unlock(work);
    536 
    537 	return newly_queued;
    538 }
    539 
    540 bool
    541 cancel_work_sync(struct work_struct *work)
    542 {
    543 	bool cancelled_p = false;
    544 
    545 	linux_work_lock(work);
    546 	switch (work->w_state) {
    547 	case WORK_IDLE:		/* Nothing to do.  */
    548 		break;
    549 
    550 	case WORK_DELAYED:
    551 		panic("cancel_work_sync(delayed work %p)", work);
    552 		break;
    553 
    554 	case WORK_PENDING:
    555 		work->w_state = WORK_CANCELLED;
    556 		linux_wait_for_cancelled_work(work);
    557 		cancelled_p = true;
    558 		break;
    559 
    560 	case WORK_INVOKED:
    561 		linux_wait_for_invoked_work(work);
    562 		break;
    563 
    564 	case WORK_CANCELLED:	/* Already done.  */
    565 		break;
    566 
    567 	case WORK_DELAYED_CANCELLED:
    568 		panic("cancel_work_sync(delayed work %p)", work);
    569 		break;
    570 
    571 	default:
    572 		panic("work %p in bad state: %d", work, (int)work->w_state);
    573 		break;
    574 	}
    575 	linux_work_unlock(work);
    576 
    577 	return cancelled_p;
    578 }
    579 
    580 static void
    581 linux_wait_for_cancelled_work(struct work_struct *work)
    582 {
    583 	struct workqueue_struct *wq;
    584 
    585 	KASSERT(linux_work_locked(work));
    586 	KASSERT(work->w_state == WORK_CANCELLED);
    587 
    588 	wq = work->w_wq;
    589 	do {
    590 		mutex_enter(&wq->wq_lock);
    591 		linux_work_unlock(work);
    592 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    593 		mutex_exit(&wq->wq_lock);
    594 		linux_work_lock(work);
    595 	} while ((work->w_state == WORK_CANCELLED) && (work->w_wq == wq));
    596 }
    597 
    598 static void
    599 linux_wait_for_invoked_work(struct work_struct *work)
    600 {
    601 	struct workqueue_struct *wq;
    602 
    603 	KASSERT(linux_work_locked(work));
    604 	KASSERT(work->w_state == WORK_INVOKED);
    605 
    606 	wq = work->w_wq;
    607 	mutex_enter(&wq->wq_lock);
    608 	linux_work_unlock(work);
    609 	while (wq->wq_current_work == work)
    610 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    611 	mutex_exit(&wq->wq_lock);
    612 
    613 	linux_work_lock(work);	/* XXX needless relock */
    614 }
    615 
    616 static void
    617 linux_worker(struct work *wk, void *arg)
    618 {
    619 	struct work_struct *const work = container_of(wk, struct work_struct,
    620 	    w_wk);
    621 	struct workqueue_struct *const wq = arg;
    622 
    623 	linux_work_lock(work);
    624 	switch (work->w_state) {
    625 	case WORK_IDLE:
    626 		panic("idle work %p got queued: %p", work, wq);
    627 		break;
    628 
    629 	case WORK_DELAYED:
    630 		panic("delayed work %p got queued: %p", work, wq);
    631 		break;
    632 
    633 	case WORK_PENDING:
    634 		KASSERT(work->w_wq == wq);
    635 
    636 		/* Get ready to invoke this one.  */
    637 		mutex_enter(&wq->wq_lock);
    638 		work->w_state = WORK_INVOKED;
    639 		KASSERT(wq->wq_current_work == NULL);
    640 		wq->wq_current_work = work;
    641 		mutex_exit(&wq->wq_lock);
    642 
    643 		/* Unlock it and do it.  Can't use work after this.  */
    644 		linux_work_unlock(work);
    645 		(*work->func)(work);
    646 
    647 		/* All done.  Notify anyone waiting for completion.  */
    648 		mutex_enter(&wq->wq_lock);
    649 		KASSERT(wq->wq_current_work == work);
    650 		wq->wq_current_work = NULL;
    651 		cv_broadcast(&wq->wq_cv);
    652 		mutex_exit(&wq->wq_lock);
    653 		return;
    654 
    655 	case WORK_INVOKED:
    656 		panic("invoked work %p got requeued: %p", work, wq);
    657 		break;
    658 
    659 	case WORK_CANCELLED:
    660 		KASSERT(work->w_wq == wq);
    661 
    662 		/* Return to idle; notify anyone waiting for cancellation.  */
    663 		mutex_enter(&wq->wq_lock);
    664 		work->w_state = WORK_IDLE;
    665 		work->w_wq = NULL;
    666 		cv_broadcast(&wq->wq_cv);
    667 		mutex_exit(&wq->wq_lock);
    668 		break;
    669 
    670 	case WORK_DELAYED_CANCELLED:
    671 		panic("cancelled delayed work %p got uqeued: %p", work, wq);
    672 		break;
    673 
    674 	default:
    675 		panic("work %p in bad state: %d", work, (int)work->w_state);
    676 		break;
    677 	}
    678 	linux_work_unlock(work);
    679 }
    680 
    681 /*
    683  * Delayed work
    684  */
    685 
    686 void
    687 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    688 {
    689 	INIT_WORK(&dw->work, fn);
    690 }
    691 
    692 bool
    693 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    694 {
    695 	return queue_delayed_work(system_wq, dw, ticks);
    696 }
    697 
    698 bool
    699 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    700     unsigned long ticks)
    701 {
    702 	bool newly_queued;
    703 
    704 	KASSERT(wq != NULL);
    705 
    706 	linux_work_lock(&dw->work);
    707 	switch (dw->work.w_state) {
    708 	case WORK_IDLE:
    709 	case WORK_INVOKED:
    710 		if (ticks == 0) {
    711 			/* Skip the delay and queue it now.  */
    712 			dw->work.w_state = WORK_PENDING;
    713 			dw->work.w_wq = wq;
    714 			workqueue_enqueue(wq->wq_workqueue, &dw->work.w_wk,
    715 			    NULL);
    716 		} else {
    717 			callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    718 			callout_reset(&dw->dw_callout, ticks,
    719 			    &linux_worker_intr, dw);
    720 			dw->work.w_state = WORK_DELAYED;
    721 			dw->work.w_wq = wq;
    722 			mutex_enter(&wq->wq_lock);
    723 			TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    724 			mutex_exit(&wq->wq_lock);
    725 		}
    726 		newly_queued = true;
    727 		break;
    728 
    729 	case WORK_DELAYED:
    730 		/*
    731 		 * Timer is already ticking.  Leave it to time out
    732 		 * whenever it was going to time out, as Linux does --
    733 		 * neither speed it up nor postpone it.
    734 		 */
    735 		newly_queued = false;
    736 		break;
    737 
    738 	case WORK_PENDING:
    739 		KASSERT(dw->work.w_wq == wq);
    740 		newly_queued = false;
    741 		break;
    742 
    743 	case WORK_CANCELLED:
    744 	case WORK_DELAYED_CANCELLED:
    745 		/* XXX Wait for cancellation and then queue?  */
    746 		newly_queued = false;
    747 		break;
    748 
    749 	default:
    750 		panic("delayed work %p in bad state: %d", dw,
    751 		    (int)dw->work.w_state);
    752 		break;
    753 	}
    754 	linux_work_unlock(&dw->work);
    755 
    756 	return newly_queued;
    757 }
    758 
    759 bool
    760 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    761     unsigned long ticks)
    762 {
    763 	bool timer_modified;
    764 
    765 	KASSERT(wq != NULL);
    766 
    767 	linux_work_lock(&dw->work);
    768 	switch (dw->work.w_state) {
    769 	case WORK_IDLE:
    770 	case WORK_INVOKED:
    771 		if (ticks == 0) {
    772 			/* Skip the delay and queue it now.  */
    773 			dw->work.w_state = WORK_PENDING;
    774 			dw->work.w_wq = wq;
    775 			workqueue_enqueue(wq->wq_workqueue, &dw->work.w_wk,
    776 			    NULL);
    777 		} else {
    778 			callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    779 			callout_reset(&dw->dw_callout, ticks,
    780 			    &linux_worker_intr, dw);
    781 			dw->work.w_state = WORK_DELAYED;
    782 			dw->work.w_wq = wq;
    783 			mutex_enter(&wq->wq_lock);
    784 			TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    785 			mutex_exit(&wq->wq_lock);
    786 		}
    787 		timer_modified = false;
    788 		break;
    789 
    790 	case WORK_DELAYED:
    791 		/*
    792 		 * Timer is already ticking.  Reschedule it.
    793 		 */
    794 		callout_schedule(&dw->dw_callout, ticks);
    795 		timer_modified = true;
    796 		break;
    797 
    798 	case WORK_PENDING:
    799 		KASSERT(dw->work.w_wq == wq);
    800 		timer_modified = false;
    801 		break;
    802 
    803 	case WORK_CANCELLED:
    804 	case WORK_DELAYED_CANCELLED:
    805 		/* XXX Wait for cancellation and then queue?  */
    806 		timer_modified = false;
    807 		break;
    808 
    809 	default:
    810 		panic("delayed work %p in bad state: %d", dw,
    811 		    (int)dw->work.w_state);
    812 		break;
    813 	}
    814 	linux_work_unlock(&dw->work);
    815 
    816 	return timer_modified;
    817 }
    818 
    819 bool
    820 cancel_delayed_work(struct delayed_work *dw)
    821 {
    822 	bool cancelled_p = false;
    823 
    824 	linux_work_lock(&dw->work);
    825 	switch (dw->work.w_state) {
    826 	case WORK_IDLE:		/* Nothing to do.  */
    827 		break;
    828 
    829 	case WORK_DELAYED:
    830 		dw->work.w_state = WORK_DELAYED_CANCELLED;
    831 		linux_cancel_delayed_work_callout(dw, false);
    832 		cancelled_p = true;
    833 		break;
    834 
    835 	case WORK_PENDING:
    836 		dw->work.w_state = WORK_CANCELLED;
    837 		cancelled_p = true;
    838 		break;
    839 
    840 	case WORK_INVOKED:	/* Don't wait!  */
    841 		break;
    842 
    843 	case WORK_CANCELLED:	/* Already done.  */
    844 	case WORK_DELAYED_CANCELLED:
    845 		break;
    846 
    847 	default:
    848 		panic("delayed work %p in bad state: %d", dw,
    849 		    (int)dw->work.w_state);
    850 		break;
    851 	}
    852 	linux_work_unlock(&dw->work);
    853 
    854 	return cancelled_p;
    855 }
    856 
    857 bool
    858 cancel_delayed_work_sync(struct delayed_work *dw)
    859 {
    860 	bool cancelled_p = false;
    861 
    862 	linux_work_lock(&dw->work);
    863 	switch (dw->work.w_state) {
    864 	case WORK_IDLE:		/* Nothing to do.  */
    865 		break;
    866 
    867 	case WORK_DELAYED:
    868 		dw->work.w_state = WORK_DELAYED_CANCELLED;
    869 		linux_cancel_delayed_work_callout(dw, true);
    870 		cancelled_p = true;
    871 		break;
    872 
    873 	case WORK_PENDING:
    874 		dw->work.w_state = WORK_CANCELLED;
    875 		linux_wait_for_cancelled_work(&dw->work);
    876 		cancelled_p = true;
    877 		break;
    878 
    879 	case WORK_INVOKED:
    880 		linux_wait_for_invoked_work(&dw->work);
    881 		break;
    882 
    883 	case WORK_CANCELLED:	/* Already done.  */
    884 		break;
    885 
    886 	case WORK_DELAYED_CANCELLED:
    887 		linux_wait_for_delayed_cancelled_work(dw);
    888 		break;
    889 
    890 	default:
    891 		panic("delayed work %p in bad state: %d", dw,
    892 		    (int)dw->work.w_state);
    893 		break;
    894 	}
    895 	linux_work_unlock(&dw->work);
    896 
    897 	return cancelled_p;
    898 }
    899 
    900 void
    901 flush_delayed_work(struct delayed_work *dw)
    902 {
    903 
    904 	if (cancel_delayed_work_sync(dw)) {
    905 		/*
    906 		 * Cancelled it.  Run it now.
    907 		 *
    908 		 * XXX What if it's supposed to run on a different
    909 		 * workqueue?  Let's just hope it's not...
    910 		 */
    911 		mod_delayed_work(system_wq, dw, 0);
    912 		flush_workqueue(system_wq);
    913 	} else {
    914 		/* Work ran to completion already.  We're done.  */
    915 	}
    916 }
    917 
    918 static void
    919 linux_cancel_delayed_work_callout(struct delayed_work *dw, bool wait)
    920 {
    921 	bool fired_p;
    922 
    923 	KASSERT(linux_work_locked(&dw->work));
    924 	KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    925 
    926 	if (wait) {
    927 		/*
    928 		 * We unlock, halt, and then relock, rather than
    929 		 * passing an interlock to callout_halt, for two
    930 		 * reasons:
    931 		 *
    932 		 * (1) The work lock is not a mutex(9), so we can't use it.
    933 		 * (2) The WORK_DELAYED_CANCELLED state serves as an interlock.
    934 		 */
    935 		linux_work_unlock(&dw->work);
    936 		fired_p = callout_halt(&dw->dw_callout, NULL);
    937 		linux_work_lock(&dw->work);
    938 	} else {
    939 		fired_p = callout_stop(&dw->dw_callout);
    940 	}
    941 
    942 	/*
    943 	 * fired_p means we didn't cancel the callout, so it must have
    944 	 * already begun and will clean up after itself.
    945 	 *
    946 	 * !fired_p means we cancelled it so we have to clean up after
    947 	 * it.  Nobody else should have changed the state in that case.
    948 	 */
    949 	if (!fired_p) {
    950 		struct workqueue_struct *wq;
    951 
    952 		KASSERT(linux_work_locked(&dw->work));
    953 		KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    954 
    955 		wq = dw->work.w_wq;
    956 		mutex_enter(&wq->wq_lock);
    957 		TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    958 		callout_destroy(&dw->dw_callout);
    959 		dw->work.w_state = WORK_IDLE;
    960 		dw->work.w_wq = NULL;
    961 		cv_broadcast(&wq->wq_cv);
    962 		mutex_exit(&wq->wq_lock);
    963 	}
    964 }
    965 
    966 static void
    967 linux_wait_for_delayed_cancelled_work(struct delayed_work *dw)
    968 {
    969 	struct workqueue_struct *wq;
    970 
    971 	KASSERT(linux_work_locked(&dw->work));
    972 	KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    973 
    974 	wq = dw->work.w_wq;
    975 	do {
    976 		mutex_enter(&wq->wq_lock);
    977 		linux_work_unlock(&dw->work);
    978 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    979 		mutex_exit(&wq->wq_lock);
    980 		linux_work_lock(&dw->work);
    981 	} while ((dw->work.w_state == WORK_DELAYED_CANCELLED) &&
    982 	    (dw->work.w_wq == wq));
    983 }
    984 
    985 static void
    986 linux_worker_intr(void *arg)
    987 {
    988 	struct delayed_work *dw = arg;
    989 	struct workqueue_struct *wq;
    990 
    991 	linux_work_lock(&dw->work);
    992 
    993 	KASSERT((dw->work.w_state == WORK_DELAYED) ||
    994 	    (dw->work.w_state == WORK_DELAYED_CANCELLED));
    995 
    996 	wq = dw->work.w_wq;
    997 	mutex_enter(&wq->wq_lock);
    998 
    999 	/* Queue the work, or return it to idle and alert any cancellers.  */
   1000 	if (__predict_true(dw->work.w_state == WORK_DELAYED)) {
   1001 		dw->work.w_state = WORK_PENDING;
   1002 		workqueue_enqueue(dw->work.w_wq->wq_workqueue, &dw->work.w_wk,
   1003 		    NULL);
   1004 	} else {
   1005 		KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
   1006 		dw->work.w_state = WORK_IDLE;
   1007 		dw->work.w_wq = NULL;
   1008 		cv_broadcast(&wq->wq_cv);
   1009 	}
   1010 
   1011 	/* Either way, the callout is done.  */
   1012 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
   1013 	callout_destroy(&dw->dw_callout);
   1014 
   1015 	mutex_exit(&wq->wq_lock);
   1016 	linux_work_unlock(&dw->work);
   1017 }
   1018