Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.9
      1 /*	$NetBSD: linux_work.c,v 1.9 2018/08/27 13:38:51 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2013 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.9 2018/08/27 13:38:51 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/param.h>
     37 #include <sys/atomic.h>
     38 #include <sys/callout.h>
     39 #include <sys/condvar.h>
     40 #include <sys/errno.h>
     41 #include <sys/intr.h>
     42 #include <sys/kmem.h>
     43 #include <sys/mutex.h>
     44 #include <sys/queue.h>
     45 #include <sys/systm.h>
     46 #include <sys/workqueue.h>
     47 #include <sys/cpu.h>
     48 
     49 #include <machine/lock.h>
     50 
     51 #include <linux/workqueue.h>
     52 
     53 struct workqueue_struct {
     54 	struct workqueue		*wq_workqueue;
     55 
     56 	struct rb_node			wq_node;
     57 	struct lwp			*wq_lwp;
     58 
     59 	/* XXX The following should all be per-CPU.  */
     60 	kmutex_t			wq_lock;
     61 
     62 	/*
     63 	 * Condvar for when any state related to this workqueue
     64 	 * changes.  XXX Could split this into multiple condvars for
     65 	 * different purposes, but whatever...
     66 	 */
     67 	kcondvar_t			wq_cv;
     68 
     69 	TAILQ_HEAD(, delayed_work)	wq_delayed;
     70 	struct work_struct		*wq_current_work;
     71 };
     72 
     73 static void	linux_work_lock_init(struct work_struct *);
     74 static void	linux_work_lock(struct work_struct *);
     75 static void	linux_work_unlock(struct work_struct *);
     76 static bool	linux_work_locked(struct work_struct *) __diagused;
     77 
     78 static void	linux_wq_barrier(struct work_struct *);
     79 
     80 static void	linux_wait_for_cancelled_work(struct work_struct *);
     81 static void	linux_wait_for_invoked_work(struct work_struct *);
     82 static void	linux_worker(struct work *, void *);
     83 
     84 static void	linux_cancel_delayed_work_callout(struct delayed_work *, bool);
     85 static void	linux_wait_for_delayed_cancelled_work(struct delayed_work *);
     86 static void	linux_worker_intr(void *);
     87 
     88 struct workqueue_struct		*system_wq;
     89 struct workqueue_struct		*system_long_wq;
     90 struct workqueue_struct		*system_power_efficient_wq;
     91 
     92 static struct {
     93 	kmutex_t		lock;
     94 	struct rb_tree		tree;
     95 } workqueues __cacheline_aligned;
     96 
     97 static const rb_tree_ops_t	workqueues_rb_ops;
     98 
     99 int
    100 linux_workqueue_init(void)
    101 {
    102 
    103 	mutex_init(&workqueues.lock, MUTEX_DEFAULT, IPL_VM);
    104 	rb_tree_init(&workqueues.tree, &workqueues_rb_ops);
    105 
    106 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
    107 	if (system_wq == NULL)
    108 		goto fail0;
    109 
    110 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
    111 	if (system_long_wq == NULL)
    112 		goto fail1;
    113 
    114 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    115 	if (system_long_wq == NULL)
    116 		goto fail2;
    117 
    118 	return 0;
    119 
    120 fail3: __unused
    121 	destroy_workqueue(system_power_efficient_wq);
    122 fail2:	destroy_workqueue(system_long_wq);
    123 fail1:	destroy_workqueue(system_wq);
    124 fail0:	mutex_destroy(&workqueues.lock);
    125 	return ENOMEM;
    126 }
    127 
    128 void
    129 linux_workqueue_fini(void)
    130 {
    131 
    132 	destroy_workqueue(system_long_wq);
    133 	system_long_wq = NULL;
    134 	destroy_workqueue(system_wq);
    135 	system_wq = NULL;
    136 	KASSERT(RB_TREE_MIN(&workqueues.tree) == NULL);
    137 	mutex_destroy(&workqueues.lock);
    138 }
    139 
    140 /*
    142  * Table of workqueue LWPs for validation -- assumes there is only one
    143  * thread per workqueue.
    144  *
    145  * XXX Mega-kludgerific!
    146  */
    147 
    148 static int
    149 compare_nodes(void *cookie, const void *va, const void *vb)
    150 {
    151 	const struct workqueue_struct *wa = va;
    152 	const struct workqueue_struct *wb = vb;
    153 
    154 	if ((uintptr_t)wa->wq_lwp < (uintptr_t)wb->wq_lwp)
    155 		return -1;
    156 	if ((uintptr_t)wa->wq_lwp > (uintptr_t)wb->wq_lwp)
    157 		return +1;
    158 	return 0;
    159 }
    160 
    161 static int
    162 compare_key(void *cookie, const void *vn, const void *vk)
    163 {
    164 	const struct workqueue_struct *w = vn;
    165 	const struct lwp *lwp = vk;
    166 
    167 	if ((uintptr_t)w->wq_lwp < (uintptr_t)lwp)
    168 		return -1;
    169 	if ((uintptr_t)w->wq_lwp > (uintptr_t)lwp)
    170 		return +1;
    171 	return 0;
    172 }
    173 
    174 static const rb_tree_ops_t workqueues_rb_ops = {
    175 	.rbto_compare_nodes = compare_nodes,
    176 	.rbto_compare_key = compare_key,
    177 	.rbto_node_offset = offsetof(struct workqueue_struct, wq_node),
    178 };
    179 
    180 struct wq_whoami_work {
    181 	kmutex_t		www_lock;
    182 	kcondvar_t		www_cv;
    183 	struct workqueue_struct	*www_wq;
    184 	struct work_struct	www_work;
    185 };
    186 
    187 static void
    188 workqueue_whoami_work(struct work_struct *work)
    189 {
    190 	struct wq_whoami_work *www = container_of(work, struct wq_whoami_work,
    191 	    www_work);
    192 	struct workqueue_struct *wq = www->www_wq;
    193 
    194 	KASSERT(wq->wq_lwp == NULL);
    195 	wq->wq_lwp = curlwp;
    196 
    197 	mutex_enter(&www->www_lock);
    198 	cv_broadcast(&www->www_cv);
    199 	mutex_exit(&www->www_lock);
    200 }
    201 
    202 static void
    203 workqueue_whoami(struct workqueue_struct *wq)
    204 {
    205 	struct wq_whoami_work www;
    206 	struct workqueue_struct *collision __diagused;
    207 
    208 	mutex_init(&www.www_lock, MUTEX_DEFAULT, IPL_NONE);
    209 	cv_init(&www.www_cv, "wqwhoami");
    210 
    211 	INIT_WORK(&www.www_work, &workqueue_whoami_work);
    212 	queue_work(wq, &www.www_work);
    213 
    214 	mutex_enter(&www.www_lock);
    215 	while (wq->wq_lwp == NULL)
    216 		cv_wait(&www.www_cv, &www.www_lock);
    217 	mutex_exit(&www.www_lock);
    218 
    219 	cv_destroy(&www.www_cv);
    220 	mutex_destroy(&www.www_lock);
    221 
    222 	mutex_enter(&workqueues.lock);
    223 	collision = rb_tree_insert_node(&workqueues.tree, wq);
    224 	mutex_exit(&workqueues.lock);
    225 
    226 	KASSERT(collision == wq);
    227 }
    228 
    229 struct work_struct *
    230 current_work(void)
    231 {
    232 	struct workqueue_struct *wq;
    233 	struct work_struct *work;
    234 
    235 	mutex_enter(&workqueues.lock);
    236 	wq = rb_tree_find_node(&workqueues.tree, curlwp);
    237 	work = (wq == NULL ? NULL : wq->wq_current_work);
    238 	mutex_exit(&workqueues.lock);
    239 
    240 	return work;
    241 }
    242 
    243 /*
    245  * Workqueues
    246  */
    247 
    248 struct workqueue_struct *
    249 alloc_ordered_workqueue(const char *name, int linux_flags)
    250 {
    251 	struct workqueue_struct *wq;
    252 	int flags = WQ_MPSAFE;
    253 	int error;
    254 
    255 	KASSERT(linux_flags == 0);
    256 
    257 	wq = kmem_alloc(sizeof(*wq), KM_SLEEP);
    258 	error = workqueue_create(&wq->wq_workqueue, name, &linux_worker,
    259 	    wq, PRI_NONE, IPL_VM, flags);
    260 	if (error) {
    261 		kmem_free(wq, sizeof(*wq));
    262 		return NULL;
    263 	}
    264 
    265 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
    266 	cv_init(&wq->wq_cv, name);
    267 	TAILQ_INIT(&wq->wq_delayed);
    268 	wq->wq_current_work = NULL;
    269 
    270 	workqueue_whoami(wq);
    271 	KASSERT(wq->wq_lwp != NULL);
    272 
    273 	return wq;
    274 }
    275 
    276 void
    277 destroy_workqueue(struct workqueue_struct *wq)
    278 {
    279 
    280 	/*
    281 	 * Cancel all delayed work.
    282 	 */
    283 	for (;;) {
    284 		struct delayed_work *dw;
    285 
    286 		mutex_enter(&wq->wq_lock);
    287 		if (TAILQ_EMPTY(&wq->wq_delayed)) {
    288 			dw = NULL;
    289 		} else {
    290 			dw = TAILQ_FIRST(&wq->wq_delayed);
    291 			TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    292 		}
    293 		mutex_exit(&wq->wq_lock);
    294 
    295 		if (dw == NULL)
    296 			break;
    297 
    298 		cancel_delayed_work_sync(dw);
    299 	}
    300 
    301 	/*
    302 	 * workqueue_destroy empties the queue; we need not wait for
    303 	 * completion explicitly.  However, we can't destroy the
    304 	 * condvar or mutex until this is done.
    305 	 */
    306 	workqueue_destroy(wq->wq_workqueue);
    307 	KASSERT(wq->wq_current_work == NULL);
    308 	wq->wq_workqueue = NULL;
    309 
    310 	cv_destroy(&wq->wq_cv);
    311 	mutex_destroy(&wq->wq_lock);
    312 
    313 	kmem_free(wq, sizeof(*wq));
    314 }
    315 
    316 /*
    318  * Flush
    319  *
    320  * Note:  This doesn't cancel or wait for delayed work.  This seems to
    321  * match what Linux does (or, doesn't do).
    322  */
    323 
    324 void
    325 flush_scheduled_work(void)
    326 {
    327 	flush_workqueue(system_wq);
    328 }
    329 
    330 struct wq_flush_work {
    331 	struct work_struct	wqfw_work;
    332 	struct wq_flush		*wqfw_flush;
    333 };
    334 
    335 struct wq_flush {
    336 	kmutex_t	wqf_lock;
    337 	kcondvar_t	wqf_cv;
    338 	unsigned int	wqf_n;
    339 };
    340 
    341 void
    342 flush_work(struct work_struct *work)
    343 {
    344 	struct workqueue_struct *const wq = work->w_wq;
    345 
    346 	if (wq != NULL)
    347 		flush_workqueue(wq);
    348 }
    349 
    350 void
    351 flush_workqueue(struct workqueue_struct *wq)
    352 {
    353 	static const struct wq_flush zero_wqf;
    354 	struct wq_flush wqf = zero_wqf;
    355 
    356 	mutex_init(&wqf.wqf_lock, MUTEX_DEFAULT, IPL_NONE);
    357 	cv_init(&wqf.wqf_cv, "lnxwflsh");
    358 
    359 	if (1) {
    360 		struct wq_flush_work *const wqfw = kmem_zalloc(sizeof(*wqfw),
    361 		    KM_SLEEP);
    362 
    363 		wqf.wqf_n = 1;
    364 		wqfw->wqfw_flush = &wqf;
    365 		INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
    366 		wqfw->wqfw_work.w_wq = wq;
    367 		wqfw->wqfw_work.w_state = WORK_PENDING;
    368 		workqueue_enqueue(wq->wq_workqueue, &wqfw->wqfw_work.w_wk,
    369 		    NULL);
    370 	} else {
    371 		struct cpu_info *ci;
    372 		CPU_INFO_ITERATOR cii;
    373 		struct wq_flush_work *wqfw;
    374 
    375 		panic("per-CPU Linux workqueues don't work yet!");
    376 
    377 		wqf.wqf_n = 0;
    378 		for (CPU_INFO_FOREACH(cii, ci)) {
    379 			wqfw = kmem_zalloc(sizeof(*wqfw), KM_SLEEP);
    380 			mutex_enter(&wqf.wqf_lock);
    381 			wqf.wqf_n++;
    382 			mutex_exit(&wqf.wqf_lock);
    383 			wqfw->wqfw_flush = &wqf;
    384 			INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
    385 			wqfw->wqfw_work.w_state = WORK_PENDING;
    386 			wqfw->wqfw_work.w_wq = wq;
    387 			workqueue_enqueue(wq->wq_workqueue,
    388 			    &wqfw->wqfw_work.w_wk, ci);
    389 		}
    390 	}
    391 
    392 	mutex_enter(&wqf.wqf_lock);
    393 	while (0 < wqf.wqf_n)
    394 		cv_wait(&wqf.wqf_cv, &wqf.wqf_lock);
    395 	mutex_exit(&wqf.wqf_lock);
    396 
    397 	cv_destroy(&wqf.wqf_cv);
    398 	mutex_destroy(&wqf.wqf_lock);
    399 }
    400 
    401 static void
    402 linux_wq_barrier(struct work_struct *work)
    403 {
    404 	struct wq_flush_work *const wqfw = container_of(work,
    405 	    struct wq_flush_work, wqfw_work);
    406 	struct wq_flush *const wqf = wqfw->wqfw_flush;
    407 
    408 	mutex_enter(&wqf->wqf_lock);
    409 	if (--wqf->wqf_n == 0)
    410 		cv_broadcast(&wqf->wqf_cv);
    411 	mutex_exit(&wqf->wqf_lock);
    412 
    413 	kmem_free(wqfw, sizeof(*wqfw));
    414 }
    415 
    416 /*
    418  * Work locking
    419  *
    420  * We use __cpu_simple_lock(9) rather than mutex(9) because Linux code
    421  * does not destroy work, so there is nowhere to call mutex_destroy.
    422  *
    423  * XXX This is getting out of hand...  Really, work items shouldn't
    424  * have locks in them at all; instead the workqueues should.
    425  */
    426 
    427 static void
    428 linux_work_lock_init(struct work_struct *work)
    429 {
    430 
    431 	__cpu_simple_lock_init(&work->w_lock);
    432 }
    433 
    434 static void
    435 linux_work_lock(struct work_struct *work)
    436 {
    437 	struct cpu_info *ci;
    438 	int cnt, s;
    439 
    440 	/* XXX Copypasta of MUTEX_SPIN_SPLRAISE.  */
    441 	s = splvm();
    442 	ci = curcpu();
    443 	cnt = ci->ci_mtx_count--;
    444 	__insn_barrier();
    445 	if (cnt == 0)
    446 		ci->ci_mtx_oldspl = s;
    447 
    448 	__cpu_simple_lock(&work->w_lock);
    449 }
    450 
    451 static void
    452 linux_work_unlock(struct work_struct *work)
    453 {
    454 	struct cpu_info *ci;
    455 	int s;
    456 
    457 	__cpu_simple_unlock(&work->w_lock);
    458 
    459 	/* XXX Copypasta of MUTEX_SPIN_SPLRESTORE.  */
    460 	ci = curcpu();
    461 	s = ci->ci_mtx_oldspl;
    462 	__insn_barrier();
    463 	if (++ci->ci_mtx_count == 0)
    464 		splx(s);
    465 }
    466 
    467 static bool __diagused
    468 linux_work_locked(struct work_struct *work)
    469 {
    470 	return __SIMPLELOCK_LOCKED_P(&work->w_lock);
    471 }
    472 
    473 /*
    475  * Work
    476  */
    477 
    478 void
    479 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    480 {
    481 
    482 	linux_work_lock_init(work);
    483 	work->w_state = WORK_IDLE;
    484 	work->w_wq = NULL;
    485 	work->func = fn;
    486 }
    487 
    488 bool
    489 schedule_work(struct work_struct *work)
    490 {
    491 	return queue_work(system_wq, work);
    492 }
    493 
    494 bool
    495 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    496 {
    497 	/* True if we put it on the queue, false if it was already there.  */
    498 	bool newly_queued;
    499 
    500 	KASSERT(wq != NULL);
    501 
    502 	linux_work_lock(work);
    503 	switch (work->w_state) {
    504 	case WORK_IDLE:
    505 	case WORK_INVOKED:
    506 		work->w_state = WORK_PENDING;
    507 		work->w_wq = wq;
    508 		workqueue_enqueue(wq->wq_workqueue, &work->w_wk, NULL);
    509 		newly_queued = true;
    510 		break;
    511 
    512 	case WORK_DELAYED:
    513 		panic("queue_work(delayed work %p)", work);
    514 		break;
    515 
    516 	case WORK_PENDING:
    517 		KASSERT(work->w_wq == wq);
    518 		newly_queued = false;
    519 		break;
    520 
    521 	case WORK_CANCELLED:
    522 		newly_queued = false;
    523 		break;
    524 
    525 	case WORK_DELAYED_CANCELLED:
    526 		panic("queue_work(delayed work %p)", work);
    527 		break;
    528 
    529 	default:
    530 		panic("work %p in bad state: %d", work, (int)work->w_state);
    531 		break;
    532 	}
    533 	linux_work_unlock(work);
    534 
    535 	return newly_queued;
    536 }
    537 
    538 bool
    539 cancel_work_sync(struct work_struct *work)
    540 {
    541 	bool cancelled_p = false;
    542 
    543 	linux_work_lock(work);
    544 	switch (work->w_state) {
    545 	case WORK_IDLE:		/* Nothing to do.  */
    546 		break;
    547 
    548 	case WORK_DELAYED:
    549 		panic("cancel_work_sync(delayed work %p)", work);
    550 		break;
    551 
    552 	case WORK_PENDING:
    553 		work->w_state = WORK_CANCELLED;
    554 		linux_wait_for_cancelled_work(work);
    555 		cancelled_p = true;
    556 		break;
    557 
    558 	case WORK_INVOKED:
    559 		linux_wait_for_invoked_work(work);
    560 		break;
    561 
    562 	case WORK_CANCELLED:	/* Already done.  */
    563 		break;
    564 
    565 	case WORK_DELAYED_CANCELLED:
    566 		panic("cancel_work_sync(delayed work %p)", work);
    567 		break;
    568 
    569 	default:
    570 		panic("work %p in bad state: %d", work, (int)work->w_state);
    571 		break;
    572 	}
    573 	linux_work_unlock(work);
    574 
    575 	return cancelled_p;
    576 }
    577 
    578 static void
    579 linux_wait_for_cancelled_work(struct work_struct *work)
    580 {
    581 	struct workqueue_struct *wq;
    582 
    583 	KASSERT(linux_work_locked(work));
    584 	KASSERT(work->w_state == WORK_CANCELLED);
    585 
    586 	wq = work->w_wq;
    587 	do {
    588 		mutex_enter(&wq->wq_lock);
    589 		linux_work_unlock(work);
    590 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    591 		mutex_exit(&wq->wq_lock);
    592 		linux_work_lock(work);
    593 	} while ((work->w_state == WORK_CANCELLED) && (work->w_wq == wq));
    594 }
    595 
    596 static void
    597 linux_wait_for_invoked_work(struct work_struct *work)
    598 {
    599 	struct workqueue_struct *wq;
    600 
    601 	KASSERT(linux_work_locked(work));
    602 	KASSERT(work->w_state == WORK_INVOKED);
    603 
    604 	wq = work->w_wq;
    605 	mutex_enter(&wq->wq_lock);
    606 	linux_work_unlock(work);
    607 	while (wq->wq_current_work == work)
    608 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    609 	mutex_exit(&wq->wq_lock);
    610 
    611 	linux_work_lock(work);	/* XXX needless relock */
    612 }
    613 
    614 static void
    615 linux_worker(struct work *wk, void *arg)
    616 {
    617 	struct work_struct *const work = container_of(wk, struct work_struct,
    618 	    w_wk);
    619 	struct workqueue_struct *const wq = arg;
    620 
    621 	linux_work_lock(work);
    622 	switch (work->w_state) {
    623 	case WORK_IDLE:
    624 		panic("idle work %p got queued: %p", work, wq);
    625 		break;
    626 
    627 	case WORK_DELAYED:
    628 		panic("delayed work %p got queued: %p", work, wq);
    629 		break;
    630 
    631 	case WORK_PENDING:
    632 		KASSERT(work->w_wq == wq);
    633 
    634 		/* Get ready to invoke this one.  */
    635 		mutex_enter(&wq->wq_lock);
    636 		work->w_state = WORK_INVOKED;
    637 		KASSERT(wq->wq_current_work == NULL);
    638 		wq->wq_current_work = work;
    639 		mutex_exit(&wq->wq_lock);
    640 
    641 		/* Unlock it and do it.  Can't use work after this.  */
    642 		linux_work_unlock(work);
    643 		(*work->func)(work);
    644 
    645 		/* All done.  Notify anyone waiting for completion.  */
    646 		mutex_enter(&wq->wq_lock);
    647 		KASSERT(wq->wq_current_work == work);
    648 		wq->wq_current_work = NULL;
    649 		cv_broadcast(&wq->wq_cv);
    650 		mutex_exit(&wq->wq_lock);
    651 		return;
    652 
    653 	case WORK_INVOKED:
    654 		panic("invoked work %p got requeued: %p", work, wq);
    655 		break;
    656 
    657 	case WORK_CANCELLED:
    658 		KASSERT(work->w_wq == wq);
    659 
    660 		/* Return to idle; notify anyone waiting for cancellation.  */
    661 		mutex_enter(&wq->wq_lock);
    662 		work->w_state = WORK_IDLE;
    663 		work->w_wq = NULL;
    664 		cv_broadcast(&wq->wq_cv);
    665 		mutex_exit(&wq->wq_lock);
    666 		break;
    667 
    668 	case WORK_DELAYED_CANCELLED:
    669 		panic("cancelled delayed work %p got uqeued: %p", work, wq);
    670 		break;
    671 
    672 	default:
    673 		panic("work %p in bad state: %d", work, (int)work->w_state);
    674 		break;
    675 	}
    676 	linux_work_unlock(work);
    677 }
    678 
    679 /*
    681  * Delayed work
    682  */
    683 
    684 void
    685 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    686 {
    687 	INIT_WORK(&dw->work, fn);
    688 }
    689 
    690 bool
    691 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    692 {
    693 	return queue_delayed_work(system_wq, dw, ticks);
    694 }
    695 
    696 bool
    697 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    698     unsigned long ticks)
    699 {
    700 	bool newly_queued;
    701 
    702 	KASSERT(wq != NULL);
    703 
    704 	linux_work_lock(&dw->work);
    705 	switch (dw->work.w_state) {
    706 	case WORK_IDLE:
    707 	case WORK_INVOKED:
    708 		if (ticks == 0) {
    709 			/* Skip the delay and queue it now.  */
    710 			dw->work.w_state = WORK_PENDING;
    711 			dw->work.w_wq = wq;
    712 			workqueue_enqueue(wq->wq_workqueue, &dw->work.w_wk,
    713 			    NULL);
    714 		} else {
    715 			callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    716 			callout_reset(&dw->dw_callout, ticks,
    717 			    &linux_worker_intr, dw);
    718 			dw->work.w_state = WORK_DELAYED;
    719 			dw->work.w_wq = wq;
    720 			mutex_enter(&wq->wq_lock);
    721 			TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    722 			mutex_exit(&wq->wq_lock);
    723 		}
    724 		newly_queued = true;
    725 		break;
    726 
    727 	case WORK_DELAYED:
    728 		/*
    729 		 * Timer is already ticking.  Leave it to time out
    730 		 * whenever it was going to time out, as Linux does --
    731 		 * neither speed it up nor postpone it.
    732 		 */
    733 		newly_queued = false;
    734 		break;
    735 
    736 	case WORK_PENDING:
    737 		KASSERT(dw->work.w_wq == wq);
    738 		newly_queued = false;
    739 		break;
    740 
    741 	case WORK_CANCELLED:
    742 	case WORK_DELAYED_CANCELLED:
    743 		/* XXX Wait for cancellation and then queue?  */
    744 		newly_queued = false;
    745 		break;
    746 
    747 	default:
    748 		panic("delayed work %p in bad state: %d", dw,
    749 		    (int)dw->work.w_state);
    750 		break;
    751 	}
    752 	linux_work_unlock(&dw->work);
    753 
    754 	return newly_queued;
    755 }
    756 
    757 bool
    758 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    759     unsigned long ticks)
    760 {
    761 	bool timer_modified;
    762 
    763 	KASSERT(wq != NULL);
    764 
    765 	linux_work_lock(&dw->work);
    766 	switch (dw->work.w_state) {
    767 	case WORK_IDLE:
    768 	case WORK_INVOKED:
    769 		if (ticks == 0) {
    770 			/* Skip the delay and queue it now.  */
    771 			dw->work.w_state = WORK_PENDING;
    772 			dw->work.w_wq = wq;
    773 			workqueue_enqueue(wq->wq_workqueue, &dw->work.w_wk,
    774 			    NULL);
    775 		} else {
    776 			callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    777 			callout_reset(&dw->dw_callout, ticks,
    778 			    &linux_worker_intr, dw);
    779 			dw->work.w_state = WORK_DELAYED;
    780 			dw->work.w_wq = wq;
    781 			mutex_enter(&wq->wq_lock);
    782 			TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    783 			mutex_exit(&wq->wq_lock);
    784 		}
    785 		timer_modified = false;
    786 		break;
    787 
    788 	case WORK_DELAYED:
    789 		/*
    790 		 * Timer is already ticking.  Reschedule it.
    791 		 */
    792 		callout_schedule(&dw->dw_callout, ticks);
    793 		timer_modified = true;
    794 		break;
    795 
    796 	case WORK_PENDING:
    797 		KASSERT(dw->work.w_wq == wq);
    798 		timer_modified = false;
    799 		break;
    800 
    801 	case WORK_CANCELLED:
    802 	case WORK_DELAYED_CANCELLED:
    803 		/* XXX Wait for cancellation and then queue?  */
    804 		timer_modified = false;
    805 		break;
    806 
    807 	default:
    808 		panic("delayed work %p in bad state: %d", dw,
    809 		    (int)dw->work.w_state);
    810 		break;
    811 	}
    812 	linux_work_unlock(&dw->work);
    813 
    814 	return timer_modified;
    815 }
    816 
    817 bool
    818 cancel_delayed_work(struct delayed_work *dw)
    819 {
    820 	bool cancelled_p = false;
    821 
    822 	linux_work_lock(&dw->work);
    823 	switch (dw->work.w_state) {
    824 	case WORK_IDLE:		/* Nothing to do.  */
    825 		break;
    826 
    827 	case WORK_DELAYED:
    828 		dw->work.w_state = WORK_DELAYED_CANCELLED;
    829 		linux_cancel_delayed_work_callout(dw, false);
    830 		cancelled_p = true;
    831 		break;
    832 
    833 	case WORK_PENDING:
    834 		dw->work.w_state = WORK_CANCELLED;
    835 		cancelled_p = true;
    836 		break;
    837 
    838 	case WORK_INVOKED:	/* Don't wait!  */
    839 		break;
    840 
    841 	case WORK_CANCELLED:	/* Already done.  */
    842 	case WORK_DELAYED_CANCELLED:
    843 		break;
    844 
    845 	default:
    846 		panic("delayed work %p in bad state: %d", dw,
    847 		    (int)dw->work.w_state);
    848 		break;
    849 	}
    850 	linux_work_unlock(&dw->work);
    851 
    852 	return cancelled_p;
    853 }
    854 
    855 bool
    856 cancel_delayed_work_sync(struct delayed_work *dw)
    857 {
    858 	bool cancelled_p = false;
    859 
    860 	linux_work_lock(&dw->work);
    861 	switch (dw->work.w_state) {
    862 	case WORK_IDLE:		/* Nothing to do.  */
    863 		break;
    864 
    865 	case WORK_DELAYED:
    866 		dw->work.w_state = WORK_DELAYED_CANCELLED;
    867 		linux_cancel_delayed_work_callout(dw, true);
    868 		cancelled_p = true;
    869 		break;
    870 
    871 	case WORK_PENDING:
    872 		dw->work.w_state = WORK_CANCELLED;
    873 		linux_wait_for_cancelled_work(&dw->work);
    874 		cancelled_p = true;
    875 		break;
    876 
    877 	case WORK_INVOKED:
    878 		linux_wait_for_invoked_work(&dw->work);
    879 		break;
    880 
    881 	case WORK_CANCELLED:	/* Already done.  */
    882 		break;
    883 
    884 	case WORK_DELAYED_CANCELLED:
    885 		linux_wait_for_delayed_cancelled_work(dw);
    886 		break;
    887 
    888 	default:
    889 		panic("delayed work %p in bad state: %d", dw,
    890 		    (int)dw->work.w_state);
    891 		break;
    892 	}
    893 	linux_work_unlock(&dw->work);
    894 
    895 	return cancelled_p;
    896 }
    897 
    898 void
    899 flush_delayed_work(struct delayed_work *dw)
    900 {
    901 	struct workqueue_struct *wq = dw->work.w_wq;
    902 
    903 	if (wq != NULL)
    904 		flush_workqueue(wq);
    905 }
    906 
    907 static void
    908 linux_cancel_delayed_work_callout(struct delayed_work *dw, bool wait)
    909 {
    910 	bool fired_p;
    911 
    912 	KASSERT(linux_work_locked(&dw->work));
    913 	KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    914 
    915 	if (wait) {
    916 		/*
    917 		 * We unlock, halt, and then relock, rather than
    918 		 * passing an interlock to callout_halt, for two
    919 		 * reasons:
    920 		 *
    921 		 * (1) The work lock is not a mutex(9), so we can't use it.
    922 		 * (2) The WORK_DELAYED_CANCELLED state serves as an interlock.
    923 		 */
    924 		linux_work_unlock(&dw->work);
    925 		fired_p = callout_halt(&dw->dw_callout, NULL);
    926 		linux_work_lock(&dw->work);
    927 	} else {
    928 		fired_p = callout_stop(&dw->dw_callout);
    929 	}
    930 
    931 	/*
    932 	 * fired_p means we didn't cancel the callout, so it must have
    933 	 * already begun and will clean up after itself.
    934 	 *
    935 	 * !fired_p means we cancelled it so we have to clean up after
    936 	 * it.  Nobody else should have changed the state in that case.
    937 	 */
    938 	if (!fired_p) {
    939 		struct workqueue_struct *wq;
    940 
    941 		KASSERT(linux_work_locked(&dw->work));
    942 		KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    943 
    944 		wq = dw->work.w_wq;
    945 		mutex_enter(&wq->wq_lock);
    946 		TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    947 		callout_destroy(&dw->dw_callout);
    948 		dw->work.w_state = WORK_IDLE;
    949 		dw->work.w_wq = NULL;
    950 		cv_broadcast(&wq->wq_cv);
    951 		mutex_exit(&wq->wq_lock);
    952 	}
    953 }
    954 
    955 static void
    956 linux_wait_for_delayed_cancelled_work(struct delayed_work *dw)
    957 {
    958 	struct workqueue_struct *wq;
    959 
    960 	KASSERT(linux_work_locked(&dw->work));
    961 	KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    962 
    963 	wq = dw->work.w_wq;
    964 	do {
    965 		mutex_enter(&wq->wq_lock);
    966 		linux_work_unlock(&dw->work);
    967 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    968 		mutex_exit(&wq->wq_lock);
    969 		linux_work_lock(&dw->work);
    970 	} while ((dw->work.w_state == WORK_DELAYED_CANCELLED) &&
    971 	    (dw->work.w_wq == wq));
    972 }
    973 
    974 static void
    975 linux_worker_intr(void *arg)
    976 {
    977 	struct delayed_work *dw = arg;
    978 	struct workqueue_struct *wq;
    979 
    980 	linux_work_lock(&dw->work);
    981 
    982 	KASSERT((dw->work.w_state == WORK_DELAYED) ||
    983 	    (dw->work.w_state == WORK_DELAYED_CANCELLED));
    984 
    985 	wq = dw->work.w_wq;
    986 	mutex_enter(&wq->wq_lock);
    987 
    988 	/* Queue the work, or return it to idle and alert any cancellers.  */
    989 	if (__predict_true(dw->work.w_state == WORK_DELAYED)) {
    990 		dw->work.w_state = WORK_PENDING;
    991 		workqueue_enqueue(dw->work.w_wq->wq_workqueue, &dw->work.w_wk,
    992 		    NULL);
    993 	} else {
    994 		KASSERT(dw->work.w_state == WORK_DELAYED_CANCELLED);
    995 		dw->work.w_state = WORK_IDLE;
    996 		dw->work.w_wq = NULL;
    997 		cv_broadcast(&wq->wq_cv);
    998 	}
    999 
   1000 	/* Either way, the callout is done.  */
   1001 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
   1002 	callout_destroy(&dw->dw_callout);
   1003 
   1004 	mutex_exit(&wq->wq_lock);
   1005 	linux_work_unlock(&dw->work);
   1006 }
   1007