Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.7
      1 /*	$NetBSD: linux_work.c,v 1.7 2018/08/27 13:35:55 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2013 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.7 2018/08/27 13:35:55 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/param.h>
     37 #include <sys/atomic.h>
     38 #include <sys/callout.h>
     39 #include <sys/condvar.h>
     40 #include <sys/errno.h>
     41 #include <sys/intr.h>
     42 #include <sys/kmem.h>
     43 #include <sys/mutex.h>
     44 #include <sys/queue.h>
     45 #include <sys/systm.h>
     46 #include <sys/workqueue.h>
     47 #include <sys/cpu.h>
     48 
     49 #include <machine/lock.h>
     50 
     51 #include <linux/workqueue.h>
     52 
     53 /* XXX Kludge until we sync with HEAD.  */
     54 #if DIAGNOSTIC
     55 #define	__diagused
     56 #else
     57 #define	__diagused	__unused
     58 #endif
     59 
     60 struct workqueue_struct {
     61 	struct workqueue		*wq_workqueue;
     62 
     63 	struct rb_node			wq_node;
     64 	struct lwp			*wq_lwp;
     65 
     66 	/* XXX The following should all be per-CPU.  */
     67 	kmutex_t			wq_lock;
     68 
     69 	/*
     70 	 * Condvar for when any state related to this workqueue
     71 	 * changes.  XXX Could split this into multiple condvars for
     72 	 * different purposes, but whatever...
     73 	 */
     74 	kcondvar_t			wq_cv;
     75 
     76 	TAILQ_HEAD(, delayed_work)	wq_delayed;
     77 	struct work_struct		*wq_current_work;
     78 };
     79 
     80 static void	linux_work_lock_init(struct work_struct *);
     81 static void	linux_work_lock(struct work_struct *);
     82 static void	linux_work_unlock(struct work_struct *);
     83 static bool	linux_work_locked(struct work_struct *) __diagused;
     84 
     85 static void	linux_wq_barrier(struct work_struct *);
     86 
     87 static void	linux_wait_for_cancelled_work(struct work_struct *);
     88 static void	linux_wait_for_invoked_work(struct work_struct *);
     89 static void	linux_worker(struct work *, void *);
     90 
     91 static void	linux_cancel_delayed_work_callout(struct delayed_work *, bool);
     92 static void	linux_wait_for_delayed_cancelled_work(struct delayed_work *);
     93 static void	linux_worker_intr(void *);
     94 
     95 struct workqueue_struct		*system_wq;
     96 struct workqueue_struct		*system_long_wq;
     97 struct workqueue_struct		*system_power_efficient_wq;
     98 
     99 static struct {
    100 	kmutex_t		lock;
    101 	struct rb_tree		tree;
    102 } workqueues __cacheline_aligned;
    103 
    104 static const rb_tree_ops_t	workqueues_rb_ops;
    105 
    106 int
    107 linux_workqueue_init(void)
    108 {
    109 
    110 	mutex_init(&workqueues.lock, MUTEX_DEFAULT, IPL_VM);
    111 	rb_tree_init(&workqueues.tree, &workqueues_rb_ops);
    112 
    113 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
    114 	if (system_wq == NULL)
    115 		goto fail0;
    116 
    117 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
    118 	if (system_long_wq == NULL)
    119 		goto fail1;
    120 
    121 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    122 	if (system_long_wq == NULL)
    123 		goto fail2;
    124 
    125 	return 0;
    126 
    127 fail3: __unused
    128 	destroy_workqueue(system_power_efficient_wq);
    129 fail2:	destroy_workqueue(system_long_wq);
    130 fail1:	destroy_workqueue(system_wq);
    131 fail0:	mutex_destroy(&workqueues.lock);
    132 	return ENOMEM;
    133 }
    134 
    135 void
    136 linux_workqueue_fini(void)
    137 {
    138 
    139 	destroy_workqueue(system_long_wq);
    140 	system_long_wq = NULL;
    141 	destroy_workqueue(system_wq);
    142 	system_wq = NULL;
    143 	KASSERT(RB_TREE_MIN(&workqueues.tree) == NULL);
    144 	mutex_destroy(&workqueues.lock);
    145 }
    146 
    147 /*
    149  * Table of workqueue LWPs for validation -- assumes there is only one
    150  * thread per workqueue.
    151  *
    152  * XXX Mega-kludgerific!
    153  */
    154 
    155 static int
    156 compare_nodes(void *cookie, const void *va, const void *vb)
    157 {
    158 	const struct workqueue_struct *wa = va;
    159 	const struct workqueue_struct *wb = vb;
    160 
    161 	if ((uintptr_t)wa->wq_lwp < (uintptr_t)wb->wq_lwp)
    162 		return -1;
    163 	if ((uintptr_t)wa->wq_lwp > (uintptr_t)wb->wq_lwp)
    164 		return +1;
    165 	return 0;
    166 }
    167 
    168 static int
    169 compare_key(void *cookie, const void *vn, const void *vk)
    170 {
    171 	const struct workqueue_struct *w = vn;
    172 	const struct lwp *lwp = vk;
    173 
    174 	if ((uintptr_t)w->wq_lwp < (uintptr_t)lwp)
    175 		return -1;
    176 	if ((uintptr_t)w->wq_lwp > (uintptr_t)lwp)
    177 		return +1;
    178 	return 0;
    179 }
    180 
    181 static const rb_tree_ops_t workqueues_rb_ops = {
    182 	.rbto_compare_nodes = compare_nodes,
    183 	.rbto_compare_key = compare_key,
    184 	.rbto_node_offset = offsetof(struct workqueue_struct, wq_node),
    185 };
    186 
    187 struct wq_whoami_work {
    188 	kmutex_t		www_lock;
    189 	kcondvar_t		www_cv;
    190 	struct workqueue_struct	*www_wq;
    191 	struct work_struct	www_work;
    192 };
    193 
    194 static void
    195 workqueue_whoami_work(struct work_struct *work)
    196 {
    197 	struct wq_whoami_work *www = www;
    198 	struct workqueue_struct *wq = www->www_wq;
    199 
    200 	KASSERT(wq->wq_lwp == NULL);
    201 	wq->wq_lwp = curlwp;
    202 
    203 	mutex_enter(&www->www_lock);
    204 	cv_broadcast(&www->www_cv);
    205 	mutex_exit(&www->www_lock);
    206 }
    207 
    208 static void
    209 workqueue_whoami(struct workqueue_struct *wq)
    210 {
    211 	struct wq_whoami_work www;
    212 	struct workqueue_struct *collision __diagused;
    213 
    214 	mutex_init(&www.www_lock, MUTEX_DEFAULT, IPL_NONE);
    215 	cv_init(&www.www_cv, "wqwhoami");
    216 
    217 	INIT_WORK(&www.www_work, &workqueue_whoami_work);
    218 	queue_work(wq, &www.www_work);
    219 
    220 	mutex_enter(&www.www_lock);
    221 	while (wq->wq_lwp == NULL)
    222 		cv_wait(&www.www_cv, &www.www_lock);
    223 	mutex_exit(&www.www_lock);
    224 
    225 	cv_destroy(&www.www_cv);
    226 	mutex_destroy(&www.www_lock);
    227 
    228 	mutex_enter(&workqueues.lock);
    229 	collision = rb_tree_insert_node(&workqueues.tree, wq);
    230 	mutex_exit(&workqueues.lock);
    231 
    232 	KASSERT(collision == wq);
    233 }
    234 
    235 struct work_struct *
    236 current_work(void)
    237 {
    238 	struct workqueue_struct *wq;
    239 	struct work_struct *work;
    240 
    241 	mutex_enter(&workqueues.lock);
    242 	wq = rb_tree_find_node(&workqueues.tree, curlwp);
    243 	work = (wq == NULL ? NULL : wq->wq_current_work);
    244 	mutex_exit(&workqueues.lock);
    245 
    246 	return work;
    247 }
    248 
    249 /*
    251  * Workqueues
    252  */
    253 
    254 struct workqueue_struct *
    255 alloc_ordered_workqueue(const char *name, int linux_flags)
    256 {
    257 	struct workqueue_struct *wq;
    258 	int flags = WQ_MPSAFE;
    259 	int error;
    260 
    261 	KASSERT(linux_flags == 0);
    262 
    263 	wq = kmem_alloc(sizeof(*wq), KM_SLEEP);
    264 	error = workqueue_create(&wq->wq_workqueue, name, &linux_worker,
    265 	    wq, PRI_NONE, IPL_VM, flags);
    266 	if (error) {
    267 		kmem_free(wq, sizeof(*wq));
    268 		return NULL;
    269 	}
    270 
    271 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
    272 	cv_init(&wq->wq_cv, name);
    273 	TAILQ_INIT(&wq->wq_delayed);
    274 	wq->wq_current_work = NULL;
    275 
    276 	workqueue_whoami(wq);
    277 	KASSERT(wq->wq_lwp != NULL);
    278 
    279 	return wq;
    280 }
    281 
    282 void
    283 destroy_workqueue(struct workqueue_struct *wq)
    284 {
    285 
    286 	/*
    287 	 * Cancel all delayed work.
    288 	 */
    289 	for (;;) {
    290 		struct delayed_work *dw;
    291 
    292 		mutex_enter(&wq->wq_lock);
    293 		if (TAILQ_EMPTY(&wq->wq_delayed)) {
    294 			dw = NULL;
    295 		} else {
    296 			dw = TAILQ_FIRST(&wq->wq_delayed);
    297 			TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    298 		}
    299 		mutex_exit(&wq->wq_lock);
    300 
    301 		if (dw == NULL)
    302 			break;
    303 
    304 		cancel_delayed_work_sync(dw);
    305 	}
    306 
    307 	/*
    308 	 * workqueue_destroy empties the queue; we need not wait for
    309 	 * completion explicitly.  However, we can't destroy the
    310 	 * condvar or mutex until this is done.
    311 	 */
    312 	workqueue_destroy(wq->wq_workqueue);
    313 	KASSERT(wq->wq_current_work == NULL);
    314 	wq->wq_workqueue = NULL;
    315 
    316 	cv_destroy(&wq->wq_cv);
    317 	mutex_destroy(&wq->wq_lock);
    318 
    319 	kmem_free(wq, sizeof(*wq));
    320 }
    321 
    322 /*
    324  * Flush
    325  *
    326  * Note:  This doesn't cancel or wait for delayed work.  This seems to
    327  * match what Linux does (or, doesn't do).
    328  */
    329 
    330 void
    331 flush_scheduled_work(void)
    332 {
    333 	flush_workqueue(system_wq);
    334 }
    335 
    336 struct wq_flush_work {
    337 	struct work_struct	wqfw_work;
    338 	struct wq_flush		*wqfw_flush;
    339 };
    340 
    341 struct wq_flush {
    342 	kmutex_t	wqf_lock;
    343 	kcondvar_t	wqf_cv;
    344 	unsigned int	wqf_n;
    345 };
    346 
    347 void
    348 flush_work(struct work_struct *work)
    349 {
    350 	struct workqueue_struct *const wq = work->w_wq;
    351 
    352 	if (wq != NULL)
    353 		flush_workqueue(wq);
    354 }
    355 
    356 void
    357 flush_workqueue(struct workqueue_struct *wq)
    358 {
    359 	static const struct wq_flush zero_wqf;
    360 	struct wq_flush wqf = zero_wqf;
    361 
    362 	mutex_init(&wqf.wqf_lock, MUTEX_DEFAULT, IPL_NONE);
    363 	cv_init(&wqf.wqf_cv, "lnxwflsh");
    364 
    365 	if (1) {
    366 		struct wq_flush_work *const wqfw = kmem_zalloc(sizeof(*wqfw),
    367 		    KM_SLEEP);
    368 
    369 		wqf.wqf_n = 1;
    370 		wqfw->wqfw_flush = &wqf;
    371 		INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
    372 		wqfw->wqfw_work.w_wq = wq;
    373 		wqfw->wqfw_work.w_state = WORK_PENDING;
    374 		workqueue_enqueue(wq->wq_workqueue, &wqfw->wqfw_work.w_wk,
    375 		    NULL);
    376 	} else {
    377 		struct cpu_info *ci;
    378 		CPU_INFO_ITERATOR cii;
    379 		struct wq_flush_work *wqfw;
    380 
    381 		panic("per-CPU Linux workqueues don't work yet!");
    382 
    383 		wqf.wqf_n = 0;
    384 		for (CPU_INFO_FOREACH(cii, ci)) {
    385 			wqfw = kmem_zalloc(sizeof(*wqfw), KM_SLEEP);
    386 			mutex_enter(&wqf.wqf_lock);
    387 			wqf.wqf_n++;
    388 			mutex_exit(&wqf.wqf_lock);
    389 			wqfw->wqfw_flush = &wqf;
    390 			INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
    391 			wqfw->wqfw_work.w_state = WORK_PENDING;
    392 			wqfw->wqfw_work.w_wq = wq;
    393 			workqueue_enqueue(wq->wq_workqueue,
    394 			    &wqfw->wqfw_work.w_wk, ci);
    395 		}
    396 	}
    397 
    398 	mutex_enter(&wqf.wqf_lock);
    399 	while (0 < wqf.wqf_n)
    400 		cv_wait(&wqf.wqf_cv, &wqf.wqf_lock);
    401 	mutex_exit(&wqf.wqf_lock);
    402 
    403 	cv_destroy(&wqf.wqf_cv);
    404 	mutex_destroy(&wqf.wqf_lock);
    405 }
    406 
    407 static void
    408 linux_wq_barrier(struct work_struct *work)
    409 {
    410 	struct wq_flush_work *const wqfw = container_of(work,
    411 	    struct wq_flush_work, wqfw_work);
    412 	struct wq_flush *const wqf = wqfw->wqfw_flush;
    413 
    414 	mutex_enter(&wqf->wqf_lock);
    415 	if (--wqf->wqf_n == 0)
    416 		cv_broadcast(&wqf->wqf_cv);
    417 	mutex_exit(&wqf->wqf_lock);
    418 
    419 	kmem_free(wqfw, sizeof(*wqfw));
    420 }
    421 
    422 /*
    424  * Work locking
    425  *
    426  * We use __cpu_simple_lock(9) rather than mutex(9) because Linux code
    427  * does not destroy work, so there is nowhere to call mutex_destroy.
    428  *
    429  * XXX This is getting out of hand...  Really, work items shouldn't
    430  * have locks in them at all; instead the workqueues should.
    431  */
    432 
    433 static void
    434 linux_work_lock_init(struct work_struct *work)
    435 {
    436 
    437 	__cpu_simple_lock_init(&work->w_lock);
    438 }
    439 
    440 static void
    441 linux_work_lock(struct work_struct *work)
    442 {
    443 	struct cpu_info *ci;
    444 	int cnt, s;
    445 
    446 	/* XXX Copypasta of MUTEX_SPIN_SPLRAISE.  */
    447 	s = splvm();
    448 	ci = curcpu();
    449 	cnt = ci->ci_mtx_count--;
    450 	__insn_barrier();
    451 	if (cnt == 0)
    452 		ci->ci_mtx_oldspl = s;
    453 
    454 	__cpu_simple_lock(&work->w_lock);
    455 }
    456 
    457 static void
    458 linux_work_unlock(struct work_struct *work)
    459 {
    460 	struct cpu_info *ci;
    461 	int s;
    462 
    463 	__cpu_simple_unlock(&work->w_lock);
    464 
    465 	/* XXX Copypasta of MUTEX_SPIN_SPLRESTORE.  */
    466 	ci = curcpu();
    467 	s = ci->ci_mtx_oldspl;
    468 	__insn_barrier();
    469 	if (++ci->ci_mtx_count == 0)
    470 		splx(s);
    471 }
    472 
    473 static bool __diagused
    474 linux_work_locked(struct work_struct *work)
    475 {
    476 	return __SIMPLELOCK_LOCKED_P(&work->w_lock);
    477 }
    478 
    479 /*
    481  * Work
    482  */
    483 
    484 void
    485 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    486 {
    487 
    488 	linux_work_lock_init(work);
    489 	work->w_state = WORK_IDLE;
    490 	work->w_wq = NULL;
    491 	work->func = fn;
    492 }
    493 
    494 bool
    495 schedule_work(struct work_struct *work)
    496 {
    497 	return queue_work(system_wq, work);
    498 }
    499 
    500 bool
    501 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    502 {
    503 	/* True if we put it on the queue, false if it was already there.  */
    504 	bool newly_queued;
    505 
    506 	KASSERT(wq != NULL);
    507 
    508 	linux_work_lock(work);
    509 	switch (work->w_state) {
    510 	case WORK_IDLE:
    511 	case WORK_INVOKED:
    512 		work->w_state = WORK_PENDING;
    513 		work->w_wq = wq;
    514 		workqueue_enqueue(wq->wq_workqueue, &work->w_wk, NULL);
    515 		newly_queued = true;
    516 		break;
    517 
    518 	case WORK_DELAYED:
    519 		panic("queue_work(delayed work %p)", work);
    520 		break;
    521 
    522 	case WORK_PENDING:
    523 		KASSERT(work->w_wq == wq);
    524 		newly_queued = false;
    525 		break;
    526 
    527 	case WORK_CANCELLED:
    528 		newly_queued = false;
    529 		break;
    530 
    531 	case WORK_DELAYED_CANCELLED:
    532 		panic("queue_work(delayed work %p)", work);
    533 		break;
    534 
    535 	default:
    536 		panic("work %p in bad state: %d", work, (int)work->w_state);
    537 		break;
    538 	}
    539 	linux_work_unlock(work);
    540 
    541 	return newly_queued;
    542 }
    543 
    544 bool
    545 cancel_work_sync(struct work_struct *work)
    546 {
    547 	bool cancelled_p = false;
    548 
    549 	linux_work_lock(work);
    550 	switch (work->w_state) {
    551 	case WORK_IDLE:		/* Nothing to do.  */
    552 		break;
    553 
    554 	case WORK_DELAYED:
    555 		panic("cancel_work_sync(delayed work %p)", work);
    556 		break;
    557 
    558 	case WORK_PENDING:
    559 		work->w_state = WORK_CANCELLED;
    560 		linux_wait_for_cancelled_work(work);
    561 		cancelled_p = true;
    562 		break;
    563 
    564 	case WORK_INVOKED:
    565 		linux_wait_for_invoked_work(work);
    566 		break;
    567 
    568 	case WORK_CANCELLED:	/* Already done.  */
    569 		break;
    570 
    571 	case WORK_DELAYED_CANCELLED:
    572 		panic("cancel_work_sync(delayed work %p)", work);
    573 		break;
    574 
    575 	default:
    576 		panic("work %p in bad state: %d", work, (int)work->w_state);
    577 		break;
    578 	}
    579 	linux_work_unlock(work);
    580 
    581 	return cancelled_p;
    582 }
    583 
    584 static void
    585 linux_wait_for_cancelled_work(struct work_struct *work)
    586 {
    587 	struct workqueue_struct *wq;
    588 
    589 	KASSERT(linux_work_locked(work));
    590 	KASSERT(work->w_state == WORK_CANCELLED);
    591 
    592 	wq = work->w_wq;
    593 	do {
    594 		mutex_enter(&wq->wq_lock);
    595 		linux_work_unlock(work);
    596 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    597 		mutex_exit(&wq->wq_lock);
    598 		linux_work_lock(work);
    599 	} while ((work->w_state == WORK_CANCELLED) && (work->w_wq == wq));
    600 }
    601 
    602 static void
    603 linux_wait_for_invoked_work(struct work_struct *work)
    604 {
    605 	struct workqueue_struct *wq;
    606 
    607 	KASSERT(linux_work_locked(work));
    608 	KASSERT(work->w_state == WORK_INVOKED);
    609 
    610 	wq = work->w_wq;
    611 	mutex_enter(&wq->wq_lock);
    612 	linux_work_unlock(work);
    613 	while (wq->wq_current_work == work)
    614 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    615 	mutex_exit(&wq->wq_lock);
    616 
    617 	linux_work_lock(work);	/* XXX needless relock */
    618 }
    619 
    620 static void
    621 linux_worker(struct work *wk, void *arg)
    622 {
    623 	struct work_struct *const work = container_of(wk, struct work_struct,
    624 	    w_wk);
    625 	struct workqueue_struct *const wq = arg;
    626 
    627 	linux_work_lock(work);
    628 	switch (work->w_state) {
    629 	case WORK_IDLE:
    630 		panic("idle work %p got queued: %p", work, wq);
    631 		break;
    632 
    633 	case WORK_DELAYED:
    634 		panic("delayed work %p got queued: %p", work, wq);
    635 		break;
    636 
    637 	case WORK_PENDING:
    638 		KASSERT(work->w_wq == wq);
    639 
    640 		/* Get ready to invoke this one.  */
    641 		mutex_enter(&wq->wq_lock);
    642 		work->w_state = WORK_INVOKED;
    643 		KASSERT(wq->wq_current_work == NULL);
    644 		wq->wq_current_work = work;
    645 		mutex_exit(&wq->wq_lock);
    646 
    647 		/* Unlock it and do it.  Can't use work after this.  */
    648 		linux_work_unlock(work);
    649 		(*work->func)(work);
    650 
    651 		/* All done.  Notify anyone waiting for completion.  */
    652 		mutex_enter(&wq->wq_lock);
    653 		KASSERT(wq->wq_current_work == work);
    654 		wq->wq_current_work = NULL;
    655 		cv_broadcast(&wq->wq_cv);
    656 		mutex_exit(&wq->wq_lock);
    657 		return;
    658 
    659 	case WORK_INVOKED:
    660 		panic("invoked work %p got requeued: %p", work, wq);
    661 		break;
    662 
    663 	case WORK_CANCELLED:
    664 		KASSERT(work->w_wq == wq);
    665 
    666 		/* Return to idle; notify anyone waiting for cancellation.  */
    667 		mutex_enter(&wq->wq_lock);
    668 		work->w_state = WORK_IDLE;
    669 		work->w_wq = NULL;
    670 		cv_broadcast(&wq->wq_cv);
    671 		mutex_exit(&wq->wq_lock);
    672 		break;
    673 
    674 	case WORK_DELAYED_CANCELLED:
    675 		panic("cancelled delayed work %p got uqeued: %p", work, wq);
    676 		break;
    677 
    678 	default:
    679 		panic("work %p in bad state: %d", work, (int)work->w_state);
    680 		break;
    681 	}
    682 	linux_work_unlock(work);
    683 }
    684 
    685 /*
    687  * Delayed work
    688  */
    689 
    690 void
    691 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    692 {
    693 	INIT_WORK(&dw->work, fn);
    694 }
    695 
    696 bool
    697 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    698 {
    699 	return queue_delayed_work(system_wq, dw, ticks);
    700 }
    701 
    702 bool
    703 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    704     unsigned long ticks)
    705 {
    706 	bool newly_queued;
    707 
    708 	KASSERT(wq != NULL);
    709 
    710 	linux_work_lock(&dw->work);
    711 	switch (dw->work.w_state) {
    712 	case WORK_IDLE:
    713 	case WORK_INVOKED:
    714 		if (ticks == 0) {
    715 			/* Skip the delay and queue it now.  */
    716 			dw->work.w_state = WORK_PENDING;
    717 			dw->work.w_wq = wq;
    718 			workqueue_enqueue(wq->wq_workqueue, &dw->work.w_wk,
    719 			    NULL);
    720 		} else {
    721 			callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    722 			callout_reset(&dw->dw_callout, ticks,
    723 			    &linux_worker_intr, dw);
    724 			dw->work.w_state = WORK_DELAYED;
    725 			dw->work.w_wq = wq;
    726 			mutex_enter(&wq->wq_lock);
    727 			TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    728 			mutex_exit(&wq->wq_lock);
    729 		}
    730 		newly_queued = true;
    731 		break;
    732 
    733 	case WORK_DELAYED:
    734 		/*
    735 		 * Timer is already ticking.  Leave it to time out
    736 		 * whenever it was going to time out, as Linux does --
    737 		 * neither speed it up nor postpone it.
    738 		 */
    739 		newly_queued = false;
    740 		break;
    741 
    742 	case WORK_PENDING:
    743 		KASSERT(dw->work.w_wq == wq);
    744 		newly_queued = false;
    745 		break;
    746 
    747 	case WORK_CANCELLED:
    748 	case WORK_DELAYED_CANCELLED:
    749 		/* XXX Wait for cancellation and then queue?  */
    750 		newly_queued = false;
    751 		break;
    752 
    753 	default:
    754 		panic("delayed work %p in bad state: %d", dw,
    755 		    (int)dw->work.w_state);
    756 		break;
    757 	}
    758 	linux_work_unlock(&dw->work);
    759 
    760 	return newly_queued;
    761 }
    762 
    763 bool
    764 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    765     unsigned long ticks)
    766 {
    767 	bool timer_modified;
    768 
    769 	KASSERT(wq != NULL);
    770 
    771 	linux_work_lock(&dw->work);
    772 	switch (dw->work.w_state) {
    773 	case WORK_IDLE:
    774 	case WORK_INVOKED:
    775 		if (ticks == 0) {
    776 			/* Skip the delay and queue it now.  */
    777 			dw->work.w_state = WORK_PENDING;
    778 			dw->work.w_wq = wq;
    779 			workqueue_enqueue(wq->wq_workqueue, &dw->work.w_wk,
    780 			    NULL);
    781 		} else {
    782 			callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    783 			callout_reset(&dw->dw_callout, ticks,
    784 			    &linux_worker_intr, dw);
    785 			dw->work.w_state = WORK_DELAYED;
    786 			dw->work.w_wq = wq;
    787 			mutex_enter(&wq->wq_lock);
    788 			TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    789 			mutex_exit(&wq->wq_lock);
    790 		}
    791 		timer_modified = false;
    792 		break;
    793 
    794 	case WORK_DELAYED:
    795 		/*
    796 		 * Timer is already ticking.  Reschedule it.
    797 		 */
    798 		callout_schedule(&dw->dw_callout, ticks);
    799 		timer_modified = true;
    800 		break;
    801 
    802 	case WORK_PENDING:
    803 		KASSERT(dw->work.w_wq == wq);
    804 		timer_modified = false;
    805 		break;
    806 
    807 	case WORK_CANCELLED:
    808 	case WORK_DELAYED_CANCELLED:
    809 		/* XXX Wait for cancellation and then queue?  */
    810 		timer_modified = false;
    811 		break;
    812 
    813 	default:
    814 		panic("delayed work %p in bad state: %d", dw,
    815 		    (int)dw->work.w_state);
    816 		break;
    817 	}
    818 	linux_work_unlock(&dw->work);
    819 
    820 	return timer_modified;
    821 }
    822 
    823 bool
    824 cancel_delayed_work(struct delayed_work *dw)
    825 {
    826 	bool cancelled_p = false;
    827 
    828 	linux_work_lock(&dw->work);
    829 	switch (dw->work.w_state) {
    830 	case WORK_IDLE:		/* Nothing to do.  */
    831 		break;
    832 
    833 	case WORK_DELAYED:
    834 		dw->work.w_state = WORK_DELAYED_CANCELLED;
    835 		linux_cancel_delayed_work_callout(dw, false);
    836 		cancelled_p = true;
    837 		break;
    838 
    839 	case WORK_PENDING:
    840 		dw->work.w_state = WORK_CANCELLED;
    841 		cancelled_p = true;
    842 		break;
    843 
    844 	case WORK_INVOKED:	/* Don't wait!  */
    845 		break;
    846 
    847 	case WORK_CANCELLED:	/* Already done.  */
    848 	case WORK_DELAYED_CANCELLED:
    849 		break;
    850 
    851 	default:
    852 		panic("delayed work %p in bad state: %d", dw,
    853 		    (int)dw->work.w_state);
    854 		break;
    855 	}
    856 	linux_work_unlock(&dw->work);
    857 
    858 	return cancelled_p;
    859 }
    860 
    861 bool
    862 cancel_delayed_work_sync(struct delayed_work *dw)
    863 {
    864 	bool cancelled_p = false;
    865 
    866 	linux_work_lock(&dw->work);
    867 	switch (dw->work.w_state) {
    868 	case WORK_IDLE:		/* Nothing to do.  */
    869 		break;
    870 
    871 	case WORK_DELAYED:
    872 		dw->work.w_state = WORK_DELAYED_CANCELLED;
    873 		linux_cancel_delayed_work_callout(dw, true);
    874 		cancelled_p = true;
    875 		break;
    876 
    877 	case WORK_PENDING:
    878 		dw->work.w_state = WORK_CANCELLED;
    879 		linux_wait_for_cancelled_work(&dw->work);
    880 		cancelled_p = true;
    881 		break;
    882 
    883 	case WORK_INVOKED:
    884 		linux_wait_for_invoked_work(&dw->work);
    885 		break;
    886 
    887 	case WORK_CANCELLED:	/* Already done.  */
    888 		break;
    889 
    890 	case WORK_DELAYED_CANCELLED:
    891 		linux_wait_for_delayed_cancelled_work(dw);
    892 		break;
    893 
    894 	default:
    895 		panic("delayed work %p in bad state: %d", dw,
    896 		    (int)dw->work.w_state);
    897 		break;
    898 	}
    899 	linux_work_unlock(&dw->work);
    900 
    901 	return cancelled_p;
    902 }
    903 
    904 void
    905 flush_delayed_work(struct delayed_work *dw)
    906 {
    907 	struct workqueue_struct *wq = dw->work.w_wq;
    908 
    909 	if (wq != NULL)
    910 		flush_workqueue(wq);
    911 }
    912 
    913 static void
    914 linux_cancel_delayed_work_callout(struct delayed_work *dw, bool wait)
    915 {
    916 	bool fired_p;
    917 
    918 	KASSERT(linux_work_locked(&dw->work));
    919 	KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    920 
    921 	if (wait) {
    922 		/*
    923 		 * We unlock, halt, and then relock, rather than
    924 		 * passing an interlock to callout_halt, for two
    925 		 * reasons:
    926 		 *
    927 		 * (1) The work lock is not a mutex(9), so we can't use it.
    928 		 * (2) The WORK_DELAYED_CANCELLED state serves as an interlock.
    929 		 */
    930 		linux_work_unlock(&dw->work);
    931 		fired_p = callout_halt(&dw->dw_callout, NULL);
    932 		linux_work_lock(&dw->work);
    933 	} else {
    934 		fired_p = callout_stop(&dw->dw_callout);
    935 	}
    936 
    937 	/*
    938 	 * fired_p means we didn't cancel the callout, so it must have
    939 	 * already begun and will clean up after itself.
    940 	 *
    941 	 * !fired_p means we cancelled it so we have to clean up after
    942 	 * it.  Nobody else should have changed the state in that case.
    943 	 */
    944 	if (!fired_p) {
    945 		struct workqueue_struct *wq;
    946 
    947 		KASSERT(linux_work_locked(&dw->work));
    948 		KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    949 
    950 		wq = dw->work.w_wq;
    951 		mutex_enter(&wq->wq_lock);
    952 		TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    953 		callout_destroy(&dw->dw_callout);
    954 		dw->work.w_state = WORK_IDLE;
    955 		dw->work.w_wq = NULL;
    956 		cv_broadcast(&wq->wq_cv);
    957 		mutex_exit(&wq->wq_lock);
    958 	}
    959 }
    960 
    961 static void
    962 linux_wait_for_delayed_cancelled_work(struct delayed_work *dw)
    963 {
    964 	struct workqueue_struct *wq;
    965 
    966 	KASSERT(linux_work_locked(&dw->work));
    967 	KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    968 
    969 	wq = dw->work.w_wq;
    970 	do {
    971 		mutex_enter(&wq->wq_lock);
    972 		linux_work_unlock(&dw->work);
    973 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    974 		mutex_exit(&wq->wq_lock);
    975 		linux_work_lock(&dw->work);
    976 	} while ((dw->work.w_state == WORK_DELAYED_CANCELLED) &&
    977 	    (dw->work.w_wq == wq));
    978 }
    979 
    980 static void
    981 linux_worker_intr(void *arg)
    982 {
    983 	struct delayed_work *dw = arg;
    984 	struct workqueue_struct *wq;
    985 
    986 	linux_work_lock(&dw->work);
    987 
    988 	KASSERT((dw->work.w_state == WORK_DELAYED) ||
    989 	    (dw->work.w_state == WORK_DELAYED_CANCELLED));
    990 
    991 	wq = dw->work.w_wq;
    992 	mutex_enter(&wq->wq_lock);
    993 
    994 	/* Queue the work, or return it to idle and alert any cancellers.  */
    995 	if (__predict_true(dw->work.w_state == WORK_DELAYED)) {
    996 		dw->work.w_state = WORK_PENDING;
    997 		workqueue_enqueue(dw->work.w_wq->wq_workqueue, &dw->work.w_wk,
    998 		    NULL);
    999 	} else {
   1000 		KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
   1001 		dw->work.w_state = WORK_IDLE;
   1002 		dw->work.w_wq = NULL;
   1003 		cv_broadcast(&wq->wq_cv);
   1004 	}
   1005 
   1006 	/* Either way, the callout is done.  */
   1007 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
   1008 	callout_destroy(&dw->dw_callout);
   1009 
   1010 	mutex_exit(&wq->wq_lock);
   1011 	linux_work_unlock(&dw->work);
   1012 }
   1013