Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.8
      1 /*	$NetBSD: linux_work.c,v 1.8 2018/08/27 13:38:32 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2013 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.8 2018/08/27 13:38:32 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/param.h>
     37 #include <sys/atomic.h>
     38 #include <sys/callout.h>
     39 #include <sys/condvar.h>
     40 #include <sys/errno.h>
     41 #include <sys/intr.h>
     42 #include <sys/kmem.h>
     43 #include <sys/mutex.h>
     44 #include <sys/queue.h>
     45 #include <sys/systm.h>
     46 #include <sys/workqueue.h>
     47 #include <sys/cpu.h>
     48 
     49 #include <machine/lock.h>
     50 
     51 #include <linux/workqueue.h>
     52 
     53 /* XXX Kludge until we sync with HEAD.  */
     54 #if DIAGNOSTIC
     55 #define	__diagused
     56 #else
     57 #define	__diagused	__unused
     58 #endif
     59 
     60 struct workqueue_struct {
     61 	struct workqueue		*wq_workqueue;
     62 
     63 	struct rb_node			wq_node;
     64 	struct lwp			*wq_lwp;
     65 
     66 	/* XXX The following should all be per-CPU.  */
     67 	kmutex_t			wq_lock;
     68 
     69 	/*
     70 	 * Condvar for when any state related to this workqueue
     71 	 * changes.  XXX Could split this into multiple condvars for
     72 	 * different purposes, but whatever...
     73 	 */
     74 	kcondvar_t			wq_cv;
     75 
     76 	TAILQ_HEAD(, delayed_work)	wq_delayed;
     77 	struct work_struct		*wq_current_work;
     78 };
     79 
     80 static void	linux_work_lock_init(struct work_struct *);
     81 static void	linux_work_lock(struct work_struct *);
     82 static void	linux_work_unlock(struct work_struct *);
     83 static bool	linux_work_locked(struct work_struct *) __diagused;
     84 
     85 static void	linux_wq_barrier(struct work_struct *);
     86 
     87 static void	linux_wait_for_cancelled_work(struct work_struct *);
     88 static void	linux_wait_for_invoked_work(struct work_struct *);
     89 static void	linux_worker(struct work *, void *);
     90 
     91 static void	linux_cancel_delayed_work_callout(struct delayed_work *, bool);
     92 static void	linux_wait_for_delayed_cancelled_work(struct delayed_work *);
     93 static void	linux_worker_intr(void *);
     94 
     95 struct workqueue_struct		*system_wq;
     96 struct workqueue_struct		*system_long_wq;
     97 struct workqueue_struct		*system_power_efficient_wq;
     98 
     99 static struct {
    100 	kmutex_t		lock;
    101 	struct rb_tree		tree;
    102 } workqueues __cacheline_aligned;
    103 
    104 static const rb_tree_ops_t	workqueues_rb_ops;
    105 
    106 int
    107 linux_workqueue_init(void)
    108 {
    109 
    110 	mutex_init(&workqueues.lock, MUTEX_DEFAULT, IPL_VM);
    111 	rb_tree_init(&workqueues.tree, &workqueues_rb_ops);
    112 
    113 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
    114 	if (system_wq == NULL)
    115 		goto fail0;
    116 
    117 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
    118 	if (system_long_wq == NULL)
    119 		goto fail1;
    120 
    121 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    122 	if (system_long_wq == NULL)
    123 		goto fail2;
    124 
    125 	return 0;
    126 
    127 fail3: __unused
    128 	destroy_workqueue(system_power_efficient_wq);
    129 fail2:	destroy_workqueue(system_long_wq);
    130 fail1:	destroy_workqueue(system_wq);
    131 fail0:	mutex_destroy(&workqueues.lock);
    132 	return ENOMEM;
    133 }
    134 
    135 void
    136 linux_workqueue_fini(void)
    137 {
    138 
    139 	destroy_workqueue(system_long_wq);
    140 	system_long_wq = NULL;
    141 	destroy_workqueue(system_wq);
    142 	system_wq = NULL;
    143 	KASSERT(RB_TREE_MIN(&workqueues.tree) == NULL);
    144 	mutex_destroy(&workqueues.lock);
    145 }
    146 
    147 /*
    149  * Table of workqueue LWPs for validation -- assumes there is only one
    150  * thread per workqueue.
    151  *
    152  * XXX Mega-kludgerific!
    153  */
    154 
    155 static int
    156 compare_nodes(void *cookie, const void *va, const void *vb)
    157 {
    158 	const struct workqueue_struct *wa = va;
    159 	const struct workqueue_struct *wb = vb;
    160 
    161 	if ((uintptr_t)wa->wq_lwp < (uintptr_t)wb->wq_lwp)
    162 		return -1;
    163 	if ((uintptr_t)wa->wq_lwp > (uintptr_t)wb->wq_lwp)
    164 		return +1;
    165 	return 0;
    166 }
    167 
    168 static int
    169 compare_key(void *cookie, const void *vn, const void *vk)
    170 {
    171 	const struct workqueue_struct *w = vn;
    172 	const struct lwp *lwp = vk;
    173 
    174 	if ((uintptr_t)w->wq_lwp < (uintptr_t)lwp)
    175 		return -1;
    176 	if ((uintptr_t)w->wq_lwp > (uintptr_t)lwp)
    177 		return +1;
    178 	return 0;
    179 }
    180 
    181 static const rb_tree_ops_t workqueues_rb_ops = {
    182 	.rbto_compare_nodes = compare_nodes,
    183 	.rbto_compare_key = compare_key,
    184 	.rbto_node_offset = offsetof(struct workqueue_struct, wq_node),
    185 };
    186 
    187 struct wq_whoami_work {
    188 	kmutex_t		www_lock;
    189 	kcondvar_t		www_cv;
    190 	struct workqueue_struct	*www_wq;
    191 	struct work_struct	www_work;
    192 };
    193 
    194 static void
    195 workqueue_whoami_work(struct work_struct *work)
    196 {
    197 	struct wq_whoami_work *www = container_of(work, struct wq_whoami_work,
    198 	    www_work);
    199 	struct workqueue_struct *wq = www->www_wq;
    200 
    201 	KASSERT(wq->wq_lwp == NULL);
    202 	wq->wq_lwp = curlwp;
    203 
    204 	mutex_enter(&www->www_lock);
    205 	cv_broadcast(&www->www_cv);
    206 	mutex_exit(&www->www_lock);
    207 }
    208 
    209 static void
    210 workqueue_whoami(struct workqueue_struct *wq)
    211 {
    212 	struct wq_whoami_work www;
    213 	struct workqueue_struct *collision __diagused;
    214 
    215 	mutex_init(&www.www_lock, MUTEX_DEFAULT, IPL_NONE);
    216 	cv_init(&www.www_cv, "wqwhoami");
    217 
    218 	INIT_WORK(&www.www_work, &workqueue_whoami_work);
    219 	queue_work(wq, &www.www_work);
    220 
    221 	mutex_enter(&www.www_lock);
    222 	while (wq->wq_lwp == NULL)
    223 		cv_wait(&www.www_cv, &www.www_lock);
    224 	mutex_exit(&www.www_lock);
    225 
    226 	cv_destroy(&www.www_cv);
    227 	mutex_destroy(&www.www_lock);
    228 
    229 	mutex_enter(&workqueues.lock);
    230 	collision = rb_tree_insert_node(&workqueues.tree, wq);
    231 	mutex_exit(&workqueues.lock);
    232 
    233 	KASSERT(collision == wq);
    234 }
    235 
    236 struct work_struct *
    237 current_work(void)
    238 {
    239 	struct workqueue_struct *wq;
    240 	struct work_struct *work;
    241 
    242 	mutex_enter(&workqueues.lock);
    243 	wq = rb_tree_find_node(&workqueues.tree, curlwp);
    244 	work = (wq == NULL ? NULL : wq->wq_current_work);
    245 	mutex_exit(&workqueues.lock);
    246 
    247 	return work;
    248 }
    249 
    250 /*
    252  * Workqueues
    253  */
    254 
    255 struct workqueue_struct *
    256 alloc_ordered_workqueue(const char *name, int linux_flags)
    257 {
    258 	struct workqueue_struct *wq;
    259 	int flags = WQ_MPSAFE;
    260 	int error;
    261 
    262 	KASSERT(linux_flags == 0);
    263 
    264 	wq = kmem_alloc(sizeof(*wq), KM_SLEEP);
    265 	error = workqueue_create(&wq->wq_workqueue, name, &linux_worker,
    266 	    wq, PRI_NONE, IPL_VM, flags);
    267 	if (error) {
    268 		kmem_free(wq, sizeof(*wq));
    269 		return NULL;
    270 	}
    271 
    272 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
    273 	cv_init(&wq->wq_cv, name);
    274 	TAILQ_INIT(&wq->wq_delayed);
    275 	wq->wq_current_work = NULL;
    276 
    277 	workqueue_whoami(wq);
    278 	KASSERT(wq->wq_lwp != NULL);
    279 
    280 	return wq;
    281 }
    282 
    283 void
    284 destroy_workqueue(struct workqueue_struct *wq)
    285 {
    286 
    287 	/*
    288 	 * Cancel all delayed work.
    289 	 */
    290 	for (;;) {
    291 		struct delayed_work *dw;
    292 
    293 		mutex_enter(&wq->wq_lock);
    294 		if (TAILQ_EMPTY(&wq->wq_delayed)) {
    295 			dw = NULL;
    296 		} else {
    297 			dw = TAILQ_FIRST(&wq->wq_delayed);
    298 			TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    299 		}
    300 		mutex_exit(&wq->wq_lock);
    301 
    302 		if (dw == NULL)
    303 			break;
    304 
    305 		cancel_delayed_work_sync(dw);
    306 	}
    307 
    308 	/*
    309 	 * workqueue_destroy empties the queue; we need not wait for
    310 	 * completion explicitly.  However, we can't destroy the
    311 	 * condvar or mutex until this is done.
    312 	 */
    313 	workqueue_destroy(wq->wq_workqueue);
    314 	KASSERT(wq->wq_current_work == NULL);
    315 	wq->wq_workqueue = NULL;
    316 
    317 	cv_destroy(&wq->wq_cv);
    318 	mutex_destroy(&wq->wq_lock);
    319 
    320 	kmem_free(wq, sizeof(*wq));
    321 }
    322 
    323 /*
    325  * Flush
    326  *
    327  * Note:  This doesn't cancel or wait for delayed work.  This seems to
    328  * match what Linux does (or, doesn't do).
    329  */
    330 
    331 void
    332 flush_scheduled_work(void)
    333 {
    334 	flush_workqueue(system_wq);
    335 }
    336 
    337 struct wq_flush_work {
    338 	struct work_struct	wqfw_work;
    339 	struct wq_flush		*wqfw_flush;
    340 };
    341 
    342 struct wq_flush {
    343 	kmutex_t	wqf_lock;
    344 	kcondvar_t	wqf_cv;
    345 	unsigned int	wqf_n;
    346 };
    347 
    348 void
    349 flush_work(struct work_struct *work)
    350 {
    351 	struct workqueue_struct *const wq = work->w_wq;
    352 
    353 	if (wq != NULL)
    354 		flush_workqueue(wq);
    355 }
    356 
    357 void
    358 flush_workqueue(struct workqueue_struct *wq)
    359 {
    360 	static const struct wq_flush zero_wqf;
    361 	struct wq_flush wqf = zero_wqf;
    362 
    363 	mutex_init(&wqf.wqf_lock, MUTEX_DEFAULT, IPL_NONE);
    364 	cv_init(&wqf.wqf_cv, "lnxwflsh");
    365 
    366 	if (1) {
    367 		struct wq_flush_work *const wqfw = kmem_zalloc(sizeof(*wqfw),
    368 		    KM_SLEEP);
    369 
    370 		wqf.wqf_n = 1;
    371 		wqfw->wqfw_flush = &wqf;
    372 		INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
    373 		wqfw->wqfw_work.w_wq = wq;
    374 		wqfw->wqfw_work.w_state = WORK_PENDING;
    375 		workqueue_enqueue(wq->wq_workqueue, &wqfw->wqfw_work.w_wk,
    376 		    NULL);
    377 	} else {
    378 		struct cpu_info *ci;
    379 		CPU_INFO_ITERATOR cii;
    380 		struct wq_flush_work *wqfw;
    381 
    382 		panic("per-CPU Linux workqueues don't work yet!");
    383 
    384 		wqf.wqf_n = 0;
    385 		for (CPU_INFO_FOREACH(cii, ci)) {
    386 			wqfw = kmem_zalloc(sizeof(*wqfw), KM_SLEEP);
    387 			mutex_enter(&wqf.wqf_lock);
    388 			wqf.wqf_n++;
    389 			mutex_exit(&wqf.wqf_lock);
    390 			wqfw->wqfw_flush = &wqf;
    391 			INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
    392 			wqfw->wqfw_work.w_state = WORK_PENDING;
    393 			wqfw->wqfw_work.w_wq = wq;
    394 			workqueue_enqueue(wq->wq_workqueue,
    395 			    &wqfw->wqfw_work.w_wk, ci);
    396 		}
    397 	}
    398 
    399 	mutex_enter(&wqf.wqf_lock);
    400 	while (0 < wqf.wqf_n)
    401 		cv_wait(&wqf.wqf_cv, &wqf.wqf_lock);
    402 	mutex_exit(&wqf.wqf_lock);
    403 
    404 	cv_destroy(&wqf.wqf_cv);
    405 	mutex_destroy(&wqf.wqf_lock);
    406 }
    407 
    408 static void
    409 linux_wq_barrier(struct work_struct *work)
    410 {
    411 	struct wq_flush_work *const wqfw = container_of(work,
    412 	    struct wq_flush_work, wqfw_work);
    413 	struct wq_flush *const wqf = wqfw->wqfw_flush;
    414 
    415 	mutex_enter(&wqf->wqf_lock);
    416 	if (--wqf->wqf_n == 0)
    417 		cv_broadcast(&wqf->wqf_cv);
    418 	mutex_exit(&wqf->wqf_lock);
    419 
    420 	kmem_free(wqfw, sizeof(*wqfw));
    421 }
    422 
    423 /*
    425  * Work locking
    426  *
    427  * We use __cpu_simple_lock(9) rather than mutex(9) because Linux code
    428  * does not destroy work, so there is nowhere to call mutex_destroy.
    429  *
    430  * XXX This is getting out of hand...  Really, work items shouldn't
    431  * have locks in them at all; instead the workqueues should.
    432  */
    433 
    434 static void
    435 linux_work_lock_init(struct work_struct *work)
    436 {
    437 
    438 	__cpu_simple_lock_init(&work->w_lock);
    439 }
    440 
    441 static void
    442 linux_work_lock(struct work_struct *work)
    443 {
    444 	struct cpu_info *ci;
    445 	int cnt, s;
    446 
    447 	/* XXX Copypasta of MUTEX_SPIN_SPLRAISE.  */
    448 	s = splvm();
    449 	ci = curcpu();
    450 	cnt = ci->ci_mtx_count--;
    451 	__insn_barrier();
    452 	if (cnt == 0)
    453 		ci->ci_mtx_oldspl = s;
    454 
    455 	__cpu_simple_lock(&work->w_lock);
    456 }
    457 
    458 static void
    459 linux_work_unlock(struct work_struct *work)
    460 {
    461 	struct cpu_info *ci;
    462 	int s;
    463 
    464 	__cpu_simple_unlock(&work->w_lock);
    465 
    466 	/* XXX Copypasta of MUTEX_SPIN_SPLRESTORE.  */
    467 	ci = curcpu();
    468 	s = ci->ci_mtx_oldspl;
    469 	__insn_barrier();
    470 	if (++ci->ci_mtx_count == 0)
    471 		splx(s);
    472 }
    473 
    474 static bool __diagused
    475 linux_work_locked(struct work_struct *work)
    476 {
    477 	return __SIMPLELOCK_LOCKED_P(&work->w_lock);
    478 }
    479 
    480 /*
    482  * Work
    483  */
    484 
    485 void
    486 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    487 {
    488 
    489 	linux_work_lock_init(work);
    490 	work->w_state = WORK_IDLE;
    491 	work->w_wq = NULL;
    492 	work->func = fn;
    493 }
    494 
    495 bool
    496 schedule_work(struct work_struct *work)
    497 {
    498 	return queue_work(system_wq, work);
    499 }
    500 
    501 bool
    502 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    503 {
    504 	/* True if we put it on the queue, false if it was already there.  */
    505 	bool newly_queued;
    506 
    507 	KASSERT(wq != NULL);
    508 
    509 	linux_work_lock(work);
    510 	switch (work->w_state) {
    511 	case WORK_IDLE:
    512 	case WORK_INVOKED:
    513 		work->w_state = WORK_PENDING;
    514 		work->w_wq = wq;
    515 		workqueue_enqueue(wq->wq_workqueue, &work->w_wk, NULL);
    516 		newly_queued = true;
    517 		break;
    518 
    519 	case WORK_DELAYED:
    520 		panic("queue_work(delayed work %p)", work);
    521 		break;
    522 
    523 	case WORK_PENDING:
    524 		KASSERT(work->w_wq == wq);
    525 		newly_queued = false;
    526 		break;
    527 
    528 	case WORK_CANCELLED:
    529 		newly_queued = false;
    530 		break;
    531 
    532 	case WORK_DELAYED_CANCELLED:
    533 		panic("queue_work(delayed work %p)", work);
    534 		break;
    535 
    536 	default:
    537 		panic("work %p in bad state: %d", work, (int)work->w_state);
    538 		break;
    539 	}
    540 	linux_work_unlock(work);
    541 
    542 	return newly_queued;
    543 }
    544 
    545 bool
    546 cancel_work_sync(struct work_struct *work)
    547 {
    548 	bool cancelled_p = false;
    549 
    550 	linux_work_lock(work);
    551 	switch (work->w_state) {
    552 	case WORK_IDLE:		/* Nothing to do.  */
    553 		break;
    554 
    555 	case WORK_DELAYED:
    556 		panic("cancel_work_sync(delayed work %p)", work);
    557 		break;
    558 
    559 	case WORK_PENDING:
    560 		work->w_state = WORK_CANCELLED;
    561 		linux_wait_for_cancelled_work(work);
    562 		cancelled_p = true;
    563 		break;
    564 
    565 	case WORK_INVOKED:
    566 		linux_wait_for_invoked_work(work);
    567 		break;
    568 
    569 	case WORK_CANCELLED:	/* Already done.  */
    570 		break;
    571 
    572 	case WORK_DELAYED_CANCELLED:
    573 		panic("cancel_work_sync(delayed work %p)", work);
    574 		break;
    575 
    576 	default:
    577 		panic("work %p in bad state: %d", work, (int)work->w_state);
    578 		break;
    579 	}
    580 	linux_work_unlock(work);
    581 
    582 	return cancelled_p;
    583 }
    584 
    585 static void
    586 linux_wait_for_cancelled_work(struct work_struct *work)
    587 {
    588 	struct workqueue_struct *wq;
    589 
    590 	KASSERT(linux_work_locked(work));
    591 	KASSERT(work->w_state == WORK_CANCELLED);
    592 
    593 	wq = work->w_wq;
    594 	do {
    595 		mutex_enter(&wq->wq_lock);
    596 		linux_work_unlock(work);
    597 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    598 		mutex_exit(&wq->wq_lock);
    599 		linux_work_lock(work);
    600 	} while ((work->w_state == WORK_CANCELLED) && (work->w_wq == wq));
    601 }
    602 
    603 static void
    604 linux_wait_for_invoked_work(struct work_struct *work)
    605 {
    606 	struct workqueue_struct *wq;
    607 
    608 	KASSERT(linux_work_locked(work));
    609 	KASSERT(work->w_state == WORK_INVOKED);
    610 
    611 	wq = work->w_wq;
    612 	mutex_enter(&wq->wq_lock);
    613 	linux_work_unlock(work);
    614 	while (wq->wq_current_work == work)
    615 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    616 	mutex_exit(&wq->wq_lock);
    617 
    618 	linux_work_lock(work);	/* XXX needless relock */
    619 }
    620 
    621 static void
    622 linux_worker(struct work *wk, void *arg)
    623 {
    624 	struct work_struct *const work = container_of(wk, struct work_struct,
    625 	    w_wk);
    626 	struct workqueue_struct *const wq = arg;
    627 
    628 	linux_work_lock(work);
    629 	switch (work->w_state) {
    630 	case WORK_IDLE:
    631 		panic("idle work %p got queued: %p", work, wq);
    632 		break;
    633 
    634 	case WORK_DELAYED:
    635 		panic("delayed work %p got queued: %p", work, wq);
    636 		break;
    637 
    638 	case WORK_PENDING:
    639 		KASSERT(work->w_wq == wq);
    640 
    641 		/* Get ready to invoke this one.  */
    642 		mutex_enter(&wq->wq_lock);
    643 		work->w_state = WORK_INVOKED;
    644 		KASSERT(wq->wq_current_work == NULL);
    645 		wq->wq_current_work = work;
    646 		mutex_exit(&wq->wq_lock);
    647 
    648 		/* Unlock it and do it.  Can't use work after this.  */
    649 		linux_work_unlock(work);
    650 		(*work->func)(work);
    651 
    652 		/* All done.  Notify anyone waiting for completion.  */
    653 		mutex_enter(&wq->wq_lock);
    654 		KASSERT(wq->wq_current_work == work);
    655 		wq->wq_current_work = NULL;
    656 		cv_broadcast(&wq->wq_cv);
    657 		mutex_exit(&wq->wq_lock);
    658 		return;
    659 
    660 	case WORK_INVOKED:
    661 		panic("invoked work %p got requeued: %p", work, wq);
    662 		break;
    663 
    664 	case WORK_CANCELLED:
    665 		KASSERT(work->w_wq == wq);
    666 
    667 		/* Return to idle; notify anyone waiting for cancellation.  */
    668 		mutex_enter(&wq->wq_lock);
    669 		work->w_state = WORK_IDLE;
    670 		work->w_wq = NULL;
    671 		cv_broadcast(&wq->wq_cv);
    672 		mutex_exit(&wq->wq_lock);
    673 		break;
    674 
    675 	case WORK_DELAYED_CANCELLED:
    676 		panic("cancelled delayed work %p got uqeued: %p", work, wq);
    677 		break;
    678 
    679 	default:
    680 		panic("work %p in bad state: %d", work, (int)work->w_state);
    681 		break;
    682 	}
    683 	linux_work_unlock(work);
    684 }
    685 
    686 /*
    688  * Delayed work
    689  */
    690 
    691 void
    692 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    693 {
    694 	INIT_WORK(&dw->work, fn);
    695 }
    696 
    697 bool
    698 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    699 {
    700 	return queue_delayed_work(system_wq, dw, ticks);
    701 }
    702 
    703 bool
    704 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    705     unsigned long ticks)
    706 {
    707 	bool newly_queued;
    708 
    709 	KASSERT(wq != NULL);
    710 
    711 	linux_work_lock(&dw->work);
    712 	switch (dw->work.w_state) {
    713 	case WORK_IDLE:
    714 	case WORK_INVOKED:
    715 		if (ticks == 0) {
    716 			/* Skip the delay and queue it now.  */
    717 			dw->work.w_state = WORK_PENDING;
    718 			dw->work.w_wq = wq;
    719 			workqueue_enqueue(wq->wq_workqueue, &dw->work.w_wk,
    720 			    NULL);
    721 		} else {
    722 			callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    723 			callout_reset(&dw->dw_callout, ticks,
    724 			    &linux_worker_intr, dw);
    725 			dw->work.w_state = WORK_DELAYED;
    726 			dw->work.w_wq = wq;
    727 			mutex_enter(&wq->wq_lock);
    728 			TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    729 			mutex_exit(&wq->wq_lock);
    730 		}
    731 		newly_queued = true;
    732 		break;
    733 
    734 	case WORK_DELAYED:
    735 		/*
    736 		 * Timer is already ticking.  Leave it to time out
    737 		 * whenever it was going to time out, as Linux does --
    738 		 * neither speed it up nor postpone it.
    739 		 */
    740 		newly_queued = false;
    741 		break;
    742 
    743 	case WORK_PENDING:
    744 		KASSERT(dw->work.w_wq == wq);
    745 		newly_queued = false;
    746 		break;
    747 
    748 	case WORK_CANCELLED:
    749 	case WORK_DELAYED_CANCELLED:
    750 		/* XXX Wait for cancellation and then queue?  */
    751 		newly_queued = false;
    752 		break;
    753 
    754 	default:
    755 		panic("delayed work %p in bad state: %d", dw,
    756 		    (int)dw->work.w_state);
    757 		break;
    758 	}
    759 	linux_work_unlock(&dw->work);
    760 
    761 	return newly_queued;
    762 }
    763 
    764 bool
    765 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    766     unsigned long ticks)
    767 {
    768 	bool timer_modified;
    769 
    770 	KASSERT(wq != NULL);
    771 
    772 	linux_work_lock(&dw->work);
    773 	switch (dw->work.w_state) {
    774 	case WORK_IDLE:
    775 	case WORK_INVOKED:
    776 		if (ticks == 0) {
    777 			/* Skip the delay and queue it now.  */
    778 			dw->work.w_state = WORK_PENDING;
    779 			dw->work.w_wq = wq;
    780 			workqueue_enqueue(wq->wq_workqueue, &dw->work.w_wk,
    781 			    NULL);
    782 		} else {
    783 			callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    784 			callout_reset(&dw->dw_callout, ticks,
    785 			    &linux_worker_intr, dw);
    786 			dw->work.w_state = WORK_DELAYED;
    787 			dw->work.w_wq = wq;
    788 			mutex_enter(&wq->wq_lock);
    789 			TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    790 			mutex_exit(&wq->wq_lock);
    791 		}
    792 		timer_modified = false;
    793 		break;
    794 
    795 	case WORK_DELAYED:
    796 		/*
    797 		 * Timer is already ticking.  Reschedule it.
    798 		 */
    799 		callout_schedule(&dw->dw_callout, ticks);
    800 		timer_modified = true;
    801 		break;
    802 
    803 	case WORK_PENDING:
    804 		KASSERT(dw->work.w_wq == wq);
    805 		timer_modified = false;
    806 		break;
    807 
    808 	case WORK_CANCELLED:
    809 	case WORK_DELAYED_CANCELLED:
    810 		/* XXX Wait for cancellation and then queue?  */
    811 		timer_modified = false;
    812 		break;
    813 
    814 	default:
    815 		panic("delayed work %p in bad state: %d", dw,
    816 		    (int)dw->work.w_state);
    817 		break;
    818 	}
    819 	linux_work_unlock(&dw->work);
    820 
    821 	return timer_modified;
    822 }
    823 
    824 bool
    825 cancel_delayed_work(struct delayed_work *dw)
    826 {
    827 	bool cancelled_p = false;
    828 
    829 	linux_work_lock(&dw->work);
    830 	switch (dw->work.w_state) {
    831 	case WORK_IDLE:		/* Nothing to do.  */
    832 		break;
    833 
    834 	case WORK_DELAYED:
    835 		dw->work.w_state = WORK_DELAYED_CANCELLED;
    836 		linux_cancel_delayed_work_callout(dw, false);
    837 		cancelled_p = true;
    838 		break;
    839 
    840 	case WORK_PENDING:
    841 		dw->work.w_state = WORK_CANCELLED;
    842 		cancelled_p = true;
    843 		break;
    844 
    845 	case WORK_INVOKED:	/* Don't wait!  */
    846 		break;
    847 
    848 	case WORK_CANCELLED:	/* Already done.  */
    849 	case WORK_DELAYED_CANCELLED:
    850 		break;
    851 
    852 	default:
    853 		panic("delayed work %p in bad state: %d", dw,
    854 		    (int)dw->work.w_state);
    855 		break;
    856 	}
    857 	linux_work_unlock(&dw->work);
    858 
    859 	return cancelled_p;
    860 }
    861 
    862 bool
    863 cancel_delayed_work_sync(struct delayed_work *dw)
    864 {
    865 	bool cancelled_p = false;
    866 
    867 	linux_work_lock(&dw->work);
    868 	switch (dw->work.w_state) {
    869 	case WORK_IDLE:		/* Nothing to do.  */
    870 		break;
    871 
    872 	case WORK_DELAYED:
    873 		dw->work.w_state = WORK_DELAYED_CANCELLED;
    874 		linux_cancel_delayed_work_callout(dw, true);
    875 		cancelled_p = true;
    876 		break;
    877 
    878 	case WORK_PENDING:
    879 		dw->work.w_state = WORK_CANCELLED;
    880 		linux_wait_for_cancelled_work(&dw->work);
    881 		cancelled_p = true;
    882 		break;
    883 
    884 	case WORK_INVOKED:
    885 		linux_wait_for_invoked_work(&dw->work);
    886 		break;
    887 
    888 	case WORK_CANCELLED:	/* Already done.  */
    889 		break;
    890 
    891 	case WORK_DELAYED_CANCELLED:
    892 		linux_wait_for_delayed_cancelled_work(dw);
    893 		break;
    894 
    895 	default:
    896 		panic("delayed work %p in bad state: %d", dw,
    897 		    (int)dw->work.w_state);
    898 		break;
    899 	}
    900 	linux_work_unlock(&dw->work);
    901 
    902 	return cancelled_p;
    903 }
    904 
    905 void
    906 flush_delayed_work(struct delayed_work *dw)
    907 {
    908 	struct workqueue_struct *wq = dw->work.w_wq;
    909 
    910 	if (wq != NULL)
    911 		flush_workqueue(wq);
    912 }
    913 
    914 static void
    915 linux_cancel_delayed_work_callout(struct delayed_work *dw, bool wait)
    916 {
    917 	bool fired_p;
    918 
    919 	KASSERT(linux_work_locked(&dw->work));
    920 	KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    921 
    922 	if (wait) {
    923 		/*
    924 		 * We unlock, halt, and then relock, rather than
    925 		 * passing an interlock to callout_halt, for two
    926 		 * reasons:
    927 		 *
    928 		 * (1) The work lock is not a mutex(9), so we can't use it.
    929 		 * (2) The WORK_DELAYED_CANCELLED state serves as an interlock.
    930 		 */
    931 		linux_work_unlock(&dw->work);
    932 		fired_p = callout_halt(&dw->dw_callout, NULL);
    933 		linux_work_lock(&dw->work);
    934 	} else {
    935 		fired_p = callout_stop(&dw->dw_callout);
    936 	}
    937 
    938 	/*
    939 	 * fired_p means we didn't cancel the callout, so it must have
    940 	 * already begun and will clean up after itself.
    941 	 *
    942 	 * !fired_p means we cancelled it so we have to clean up after
    943 	 * it.  Nobody else should have changed the state in that case.
    944 	 */
    945 	if (!fired_p) {
    946 		struct workqueue_struct *wq;
    947 
    948 		KASSERT(linux_work_locked(&dw->work));
    949 		KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    950 
    951 		wq = dw->work.w_wq;
    952 		mutex_enter(&wq->wq_lock);
    953 		TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    954 		callout_destroy(&dw->dw_callout);
    955 		dw->work.w_state = WORK_IDLE;
    956 		dw->work.w_wq = NULL;
    957 		cv_broadcast(&wq->wq_cv);
    958 		mutex_exit(&wq->wq_lock);
    959 	}
    960 }
    961 
    962 static void
    963 linux_wait_for_delayed_cancelled_work(struct delayed_work *dw)
    964 {
    965 	struct workqueue_struct *wq;
    966 
    967 	KASSERT(linux_work_locked(&dw->work));
    968 	KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    969 
    970 	wq = dw->work.w_wq;
    971 	do {
    972 		mutex_enter(&wq->wq_lock);
    973 		linux_work_unlock(&dw->work);
    974 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    975 		mutex_exit(&wq->wq_lock);
    976 		linux_work_lock(&dw->work);
    977 	} while ((dw->work.w_state == WORK_DELAYED_CANCELLED) &&
    978 	    (dw->work.w_wq == wq));
    979 }
    980 
    981 static void
    982 linux_worker_intr(void *arg)
    983 {
    984 	struct delayed_work *dw = arg;
    985 	struct workqueue_struct *wq;
    986 
    987 	linux_work_lock(&dw->work);
    988 
    989 	KASSERT((dw->work.w_state == WORK_DELAYED) ||
    990 	    (dw->work.w_state == WORK_DELAYED_CANCELLED));
    991 
    992 	wq = dw->work.w_wq;
    993 	mutex_enter(&wq->wq_lock);
    994 
    995 	/* Queue the work, or return it to idle and alert any cancellers.  */
    996 	if (__predict_true(dw->work.w_state == WORK_DELAYED)) {
    997 		dw->work.w_state = WORK_PENDING;
    998 		workqueue_enqueue(dw->work.w_wq->wq_workqueue, &dw->work.w_wk,
    999 		    NULL);
   1000 	} else {
   1001 		KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
   1002 		dw->work.w_state = WORK_IDLE;
   1003 		dw->work.w_wq = NULL;
   1004 		cv_broadcast(&wq->wq_cv);
   1005 	}
   1006 
   1007 	/* Either way, the callout is done.  */
   1008 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
   1009 	callout_destroy(&dw->dw_callout);
   1010 
   1011 	mutex_exit(&wq->wq_lock);
   1012 	linux_work_unlock(&dw->work);
   1013 }
   1014