Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.34
      1 /*	$NetBSD: linux_work.c,v 1.34 2018/08/27 15:04:45 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.34 2018/08/27 15:04:45 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/atomic.h>
     37 #include <sys/callout.h>
     38 #include <sys/condvar.h>
     39 #include <sys/errno.h>
     40 #include <sys/kmem.h>
     41 #include <sys/kthread.h>
     42 #include <sys/lwp.h>
     43 #include <sys/mutex.h>
     44 #include <sys/queue.h>
     45 
     46 #include <linux/workqueue.h>
     47 
     48 struct workqueue_struct {
     49 	kmutex_t			wq_lock;
     50 	kcondvar_t			wq_cv;
     51 	TAILQ_HEAD(, delayed_work)	wq_delayed;
     52 	TAILQ_HEAD(, work_struct)	wq_queue;
     53 	struct work_struct		*wq_current_work;
     54 	int				wq_flags;
     55 	struct lwp			*wq_lwp;
     56 	uint64_t			wq_gen;
     57 	bool				wq_requeued:1;
     58 	bool				wq_dying:1;
     59 };
     60 
     61 static void __dead	linux_workqueue_thread(void *);
     62 static void		linux_workqueue_timeout(void *);
     63 static struct workqueue_struct *
     64 			acquire_work(struct work_struct *,
     65 			    struct workqueue_struct *);
     66 static void		release_work(struct work_struct *,
     67 			    struct workqueue_struct *);
     68 static void		wait_for_current_work(struct work_struct *,
     69 			    struct workqueue_struct *);
     70 static void		dw_callout_init(struct workqueue_struct *,
     71 			    struct delayed_work *);
     72 static void		dw_callout_destroy(struct workqueue_struct *,
     73 			    struct delayed_work *);
     74 static void		cancel_delayed_work_done(struct workqueue_struct *,
     75 			    struct delayed_work *);
     76 
     77 static specificdata_key_t workqueue_key __read_mostly;
     78 
     79 struct workqueue_struct	*system_wq __read_mostly;
     80 struct workqueue_struct	*system_long_wq __read_mostly;
     81 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
     82 
     83 int
     84 linux_workqueue_init(void)
     85 {
     86 	int error;
     87 
     88 	error = lwp_specific_key_create(&workqueue_key, NULL);
     89 	if (error)
     90 		goto fail0;
     91 
     92 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
     93 	if (system_wq == NULL) {
     94 		error = ENOMEM;
     95 		goto fail1;
     96 	}
     97 
     98 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
     99 	if (system_long_wq == NULL) {
    100 		error = ENOMEM;
    101 		goto fail2;
    102 	}
    103 
    104 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    105 	if (system_long_wq == NULL) {
    106 		error = ENOMEM;
    107 		goto fail3;
    108 	}
    109 
    110 	return 0;
    111 
    112 fail4: __unused
    113 	destroy_workqueue(system_power_efficient_wq);
    114 fail3:	destroy_workqueue(system_long_wq);
    115 fail2:	destroy_workqueue(system_wq);
    116 fail1:	lwp_specific_key_delete(workqueue_key);
    117 fail0:	KASSERT(error);
    118 	return error;
    119 }
    120 
    121 void
    122 linux_workqueue_fini(void)
    123 {
    124 
    125 	destroy_workqueue(system_power_efficient_wq);
    126 	destroy_workqueue(system_long_wq);
    127 	destroy_workqueue(system_wq);
    128 	lwp_specific_key_delete(workqueue_key);
    129 }
    130 
    131 /*
    133  * Workqueues
    134  */
    135 
    136 struct workqueue_struct *
    137 alloc_ordered_workqueue(const char *name, int flags)
    138 {
    139 	struct workqueue_struct *wq;
    140 	int error;
    141 
    142 	KASSERT(flags == 0);
    143 
    144 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
    145 
    146 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_NONE);
    147 	cv_init(&wq->wq_cv, name);
    148 	TAILQ_INIT(&wq->wq_delayed);
    149 	TAILQ_INIT(&wq->wq_queue);
    150 	wq->wq_current_work = NULL;
    151 	wq->wq_flags = 0;
    152 	wq->wq_lwp = NULL;
    153 	wq->wq_gen = 0;
    154 	wq->wq_requeued = false;
    155 	wq->wq_dying = false;
    156 
    157 	error = kthread_create(PRI_NONE,
    158 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
    159 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
    160 	if (error)
    161 		goto fail0;
    162 
    163 	return wq;
    164 
    165 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    166 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    167 	cv_destroy(&wq->wq_cv);
    168 	mutex_destroy(&wq->wq_lock);
    169 	kmem_free(wq, sizeof(*wq));
    170 	return NULL;
    171 }
    172 
    173 void
    174 destroy_workqueue(struct workqueue_struct *wq)
    175 {
    176 
    177 	/*
    178 	 * Cancel all delayed work.  We do this first because any
    179 	 * delayed work that that has already timed out, which we can't
    180 	 * cancel, may have queued new work.
    181 	 */
    182 	mutex_enter(&wq->wq_lock);
    183 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
    184 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
    185 
    186 		KASSERT(dw->work.work_queue == wq);
    187 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
    188 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    189 			dw->dw_state == DELAYED_WORK_CANCELLED),
    190 		    "delayed work %p in bad state: %d",
    191 		    dw, dw->dw_state);
    192 
    193 		/*
    194 		 * Mark it cancelled and try to stop the callout before
    195 		 * it starts.
    196 		 *
    197 		 * If it's too late and the callout has already begun
    198 		 * to execute, then it will notice that we asked to
    199 		 * cancel it and remove itself from the queue before
    200 		 * returning.
    201 		 *
    202 		 * If we stopped the callout before it started,
    203 		 * however, then we can safely destroy the callout and
    204 		 * dissociate it from the workqueue ourselves.
    205 		 */
    206 		dw->dw_state = DELAYED_WORK_CANCELLED;
    207 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
    208 			cancel_delayed_work_done(wq, dw);
    209 	}
    210 	mutex_exit(&wq->wq_lock);
    211 
    212 	/*
    213 	 * At this point, no new work can be put on the queue.
    214 	 */
    215 
    216 	/* Tell the thread to exit.  */
    217 	mutex_enter(&wq->wq_lock);
    218 	wq->wq_dying = true;
    219 	cv_broadcast(&wq->wq_cv);
    220 	mutex_exit(&wq->wq_lock);
    221 
    222 	/* Wait for it to exit.  */
    223 	(void)kthread_join(wq->wq_lwp);
    224 
    225 	KASSERT(wq->wq_dying);
    226 	KASSERT(!wq->wq_requeued);
    227 	KASSERT(wq->wq_flags == 0);
    228 	KASSERT(wq->wq_current_work == NULL);
    229 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    230 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    231 	cv_destroy(&wq->wq_cv);
    232 	mutex_destroy(&wq->wq_lock);
    233 
    234 	kmem_free(wq, sizeof(*wq));
    235 }
    236 
    237 /*
    239  * Work thread and callout
    240  */
    241 
    242 static void __dead
    243 linux_workqueue_thread(void *cookie)
    244 {
    245 	struct workqueue_struct *const wq = cookie;
    246 	TAILQ_HEAD(, work_struct) tmp;
    247 
    248 	lwp_setspecific(workqueue_key, wq);
    249 
    250 	mutex_enter(&wq->wq_lock);
    251 	for (;;) {
    252 		/*
    253 		 * Wait until there's activity.  If there's no work and
    254 		 * we're dying, stop here.
    255 		 */
    256 		while (TAILQ_EMPTY(&wq->wq_queue) && !wq->wq_dying)
    257 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    258 		if (TAILQ_EMPTY(&wq->wq_queue)) {
    259 			KASSERT(wq->wq_dying);
    260 			break;
    261 		}
    262 
    263 		/* Grab a batch of work off the queue.  */
    264 		KASSERT(!TAILQ_EMPTY(&wq->wq_queue));
    265 		TAILQ_INIT(&tmp);
    266 		TAILQ_CONCAT(&tmp, &wq->wq_queue, work_entry);
    267 
    268 		/* Process each work item in the batch.  */
    269 		while (!TAILQ_EMPTY(&tmp)) {
    270 			struct work_struct *const work = TAILQ_FIRST(&tmp);
    271 
    272 			KASSERT(work->work_queue == wq);
    273 			TAILQ_REMOVE(&tmp, work, work_entry);
    274 			KASSERT(wq->wq_current_work == NULL);
    275 			wq->wq_current_work = work;
    276 
    277 			mutex_exit(&wq->wq_lock);
    278 			(*work->func)(work);
    279 			mutex_enter(&wq->wq_lock);
    280 
    281 			KASSERT(wq->wq_current_work == work);
    282 			KASSERT(work->work_queue == wq);
    283 			if (wq->wq_requeued)
    284 				wq->wq_requeued = false;
    285 			else
    286 				release_work(work, wq);
    287 			wq->wq_current_work = NULL;
    288 			cv_broadcast(&wq->wq_cv);
    289 		}
    290 
    291 		/* Notify flush that we've completed a batch of work.  */
    292 		wq->wq_gen++;
    293 		cv_broadcast(&wq->wq_cv);
    294 	}
    295 	mutex_exit(&wq->wq_lock);
    296 
    297 	kthread_exit(0);
    298 }
    299 
    300 static void
    301 linux_workqueue_timeout(void *cookie)
    302 {
    303 	struct delayed_work *const dw = cookie;
    304 	struct workqueue_struct *const wq = dw->work.work_queue;
    305 
    306 	KASSERT(wq != NULL);
    307 
    308 	mutex_enter(&wq->wq_lock);
    309 	KASSERT(dw->work.work_queue == wq);
    310 	switch (dw->dw_state) {
    311 	case DELAYED_WORK_IDLE:
    312 		panic("delayed work callout uninitialized: %p", dw);
    313 	case DELAYED_WORK_SCHEDULED:
    314 		dw_callout_destroy(wq, dw);
    315 		TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work, work_entry);
    316 		cv_broadcast(&wq->wq_cv);
    317 		break;
    318 	case DELAYED_WORK_RESCHEDULED:
    319 		dw->dw_state = DELAYED_WORK_SCHEDULED;
    320 		break;
    321 	case DELAYED_WORK_CANCELLED:
    322 		cancel_delayed_work_done(wq, dw);
    323 		/* Can't touch dw any more.  */
    324 		goto out;
    325 	default:
    326 		panic("delayed work callout in bad state: %p", dw);
    327 	}
    328 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
    329 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
    330 out:	mutex_exit(&wq->wq_lock);
    331 }
    332 
    333 struct work_struct *
    334 current_work(void)
    335 {
    336 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
    337 
    338 	/* If we're not a workqueue thread, then there's no work.  */
    339 	if (wq == NULL)
    340 		return NULL;
    341 
    342 	/*
    343 	 * Otherwise, this should be possible only while work is in
    344 	 * progress.  Return the current work item.
    345 	 */
    346 	KASSERT(wq->wq_current_work != NULL);
    347 	return wq->wq_current_work;
    348 }
    349 
    350 /*
    352  * Work
    353  */
    354 
    355 void
    356 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    357 {
    358 
    359 	work->work_queue = NULL;
    360 	work->func = fn;
    361 }
    362 
    363 static struct workqueue_struct *
    364 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
    365 {
    366 	struct workqueue_struct *wq0;
    367 
    368 	KASSERT(mutex_owned(&wq->wq_lock));
    369 
    370 	wq0 = atomic_cas_ptr(&work->work_queue, NULL, wq);
    371 	if (wq0 == NULL) {
    372 		membar_enter();
    373 		KASSERT(work->work_queue == wq);
    374 	}
    375 
    376 	return wq0;
    377 }
    378 
    379 static void
    380 release_work(struct work_struct *work, struct workqueue_struct *wq)
    381 {
    382 
    383 	KASSERT(work->work_queue == wq);
    384 	KASSERT(mutex_owned(&wq->wq_lock));
    385 
    386 	membar_exit();
    387 	work->work_queue = NULL;
    388 }
    389 
    390 bool
    391 schedule_work(struct work_struct *work)
    392 {
    393 
    394 	return queue_work(system_wq, work);
    395 }
    396 
    397 bool
    398 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    399 {
    400 	struct workqueue_struct *wq0;
    401 	bool newly_queued;
    402 
    403 	KASSERT(wq != NULL);
    404 
    405 	mutex_enter(&wq->wq_lock);
    406 	if (__predict_true((wq0 = acquire_work(work, wq)) == NULL)) {
    407 		/*
    408 		 * It wasn't on any workqueue at all.  Put it on this
    409 		 * one, and signal the worker thread that there is work
    410 		 * to do.
    411 		 */
    412 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    413 		newly_queued = true;
    414 		cv_broadcast(&wq->wq_cv);
    415 	} else {
    416 		/*
    417 		 * It was on a workqueue, which had better be this one.
    418 		 * Requeue it if it has been taken off the queue to
    419 		 * execute and hasn't been requeued yet.  The worker
    420 		 * thread should already be running, so no need to
    421 		 * signal it.
    422 		 */
    423 		KASSERT(wq0 == wq);
    424 		if (wq->wq_current_work == work && !wq->wq_requeued) {
    425 			/*
    426 			 * It has been taken off the queue to execute,
    427 			 * and it hasn't been put back on the queue
    428 			 * again.  Put it back on the queue.  No need
    429 			 * to signal the worker thread because it will
    430 			 * notice when it reacquires the lock after
    431 			 * doing the work.
    432 			 */
    433 			TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    434 			wq->wq_requeued = true;
    435 			newly_queued = true;
    436 		} else {
    437 			/* It is still on the queue; nothing to do.  */
    438 			newly_queued = false;
    439 		}
    440 	}
    441 	mutex_exit(&wq->wq_lock);
    442 
    443 	return newly_queued;
    444 }
    445 
    446 bool
    447 cancel_work(struct work_struct *work)
    448 {
    449 	struct workqueue_struct *wq;
    450 	bool cancelled_p = false;
    451 
    452 	/* If there's no workqueue, nothing to cancel.   */
    453 	if ((wq = work->work_queue) == NULL)
    454 		goto out;
    455 
    456 	mutex_enter(&wq->wq_lock);
    457 	if (__predict_false(work->work_queue != wq)) {
    458 		/*
    459 		 * It has finished execution or been cancelled by
    460 		 * another thread, and has been moved off the
    461 		 * workqueue, so it's too to cancel.
    462 		 */
    463 		cancelled_p = false;
    464 	} else if (wq->wq_current_work == work) {
    465 		/*
    466 		 * It has already begun execution, so it's too late to
    467 		 * cancel now.
    468 		 */
    469 		cancelled_p = false;
    470 	} else {
    471 		/*
    472 		 * It is still on the queue.  Take it off the queue and
    473 		 * report successful cancellation.
    474 		 */
    475 		TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    476 		cancelled_p = true;
    477 	}
    478 	mutex_exit(&wq->wq_lock);
    479 
    480 out:	return cancelled_p;
    481 }
    482 
    483 bool
    484 cancel_work_sync(struct work_struct *work)
    485 {
    486 	struct workqueue_struct *wq;
    487 	bool cancelled_p = false;
    488 
    489 	/* If there's no workqueue, nothing to cancel.   */
    490 	if ((wq = work->work_queue) == NULL)
    491 		goto out;
    492 
    493 	mutex_enter(&wq->wq_lock);
    494 	if (__predict_false(work->work_queue != wq)) {
    495 		/*
    496 		 * It has finished execution or been cancelled by
    497 		 * another thread, and has been moved off the
    498 		 * workqueue, so it's too to cancel.
    499 		 */
    500 		cancelled_p = false;
    501 	} else if (wq->wq_current_work == work) {
    502 		/*
    503 		 * It has already begun execution, so it's too late to
    504 		 * cancel now.  Wait for it to complete.
    505 		 */
    506 		wait_for_current_work(work, wq);
    507 		cancelled_p = false;
    508 	} else {
    509 		/*
    510 		 * It is still on the queue.  Take it off the queue and
    511 		 * report successful cancellation.
    512 		 */
    513 		TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    514 		cancelled_p = true;
    515 	}
    516 	mutex_exit(&wq->wq_lock);
    517 
    518 out:	return cancelled_p;
    519 }
    520 
    521 /*
    522  * wait_for_current_work(work, wq)
    523  *
    524  *	wq must be currently executing work.  Wait for it to finish.
    525  */
    526 static void
    527 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
    528 {
    529 	uint64_t gen;
    530 
    531 	KASSERT(mutex_owned(&wq->wq_lock));
    532 	KASSERT(work->work_queue == wq);
    533 	KASSERT(wq->wq_current_work == work);
    534 
    535 	/* Wait only one generation in case it gets requeued quickly.  */
    536 	gen = wq->wq_gen;
    537 	do {
    538 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    539 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
    540 }
    541 
    542 /*
    544  * Delayed work
    545  */
    546 
    547 void
    548 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    549 {
    550 
    551 	INIT_WORK(&dw->work, fn);
    552 	dw->dw_state = DELAYED_WORK_IDLE;
    553 
    554 	/*
    555 	 * Defer callout_init until we are going to schedule the
    556 	 * callout, which can then callout_destroy it, because
    557 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
    558 	 * we have no opportunity to call callout_destroy.
    559 	 */
    560 }
    561 
    562 bool
    563 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    564 {
    565 
    566 	return queue_delayed_work(system_wq, dw, ticks);
    567 }
    568 
    569 /*
    570  * dw_callout_init(wq, dw)
    571  *
    572  *	Initialize the callout of dw and transition to
    573  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
    574  */
    575 static void
    576 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
    577 {
    578 
    579 	KASSERT(mutex_owned(&wq->wq_lock));
    580 	KASSERT(dw->work.work_queue == wq);
    581 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    582 
    583 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    584 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
    585 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    586 	dw->dw_state = DELAYED_WORK_SCHEDULED;
    587 }
    588 
    589 /*
    590  * dw_callout_destroy(wq, dw)
    591  *
    592  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
    593  */
    594 static void
    595 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
    596 {
    597 
    598 	KASSERT(mutex_owned(&wq->wq_lock));
    599 	KASSERT(dw->work.work_queue == wq);
    600 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
    601 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    602 	    dw->dw_state == DELAYED_WORK_CANCELLED);
    603 
    604 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    605 	callout_destroy(&dw->dw_callout);
    606 	dw->dw_state = DELAYED_WORK_IDLE;
    607 }
    608 
    609 /*
    610  * cancel_delayed_work_done(wq, dw)
    611  *
    612  *	Complete cancellation of a delayed work: transition from
    613  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
    614  *	workqueue.  Caller must not touch dw after this returns.
    615  */
    616 static void
    617 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
    618 {
    619 
    620 	KASSERT(mutex_owned(&wq->wq_lock));
    621 	KASSERT(dw->work.work_queue == wq);
    622 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
    623 
    624 	dw_callout_destroy(wq, dw);
    625 	release_work(&dw->work, wq);
    626 	/* Can't touch dw after this point.  */
    627 }
    628 
    629 /*
    630  * queue_delayed_work(wq, dw, ticks)
    631  *
    632  *	If it is not currently scheduled, schedule dw to run after
    633  *	ticks.  If currently executing and not already rescheduled,
    634  *	reschedule it.  If ticks == 0, run without delay.
    635  */
    636 bool
    637 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    638     unsigned long ticks)
    639 {
    640 	struct workqueue_struct *wq0;
    641 	bool newly_queued;
    642 
    643 	mutex_enter(&wq->wq_lock);
    644 	if (__predict_true((wq0 = acquire_work(&dw->work, wq)) == NULL)) {
    645 		/*
    646 		 * It wasn't on any workqueue at all.  Schedule it to
    647 		 * run on this one.
    648 		 */
    649 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    650 		if (ticks == 0) {
    651 			TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work,
    652 			    work_entry);
    653 			cv_broadcast(&wq->wq_cv);
    654 		} else {
    655 			/*
    656 			 * Initialize a callout and schedule to run
    657 			 * after a delay.
    658 			 */
    659 			dw_callout_init(wq, dw);
    660 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    661 		}
    662 		newly_queued = true;
    663 	} else {
    664 		/*
    665 		 * It was on a workqueue, which had better be this one.
    666 		 *
    667 		 * - If it has already begun to run, and it is not yet
    668 		 *   scheduled to run again, schedule it again.
    669 		 *
    670 		 * - If the callout is cancelled, reschedule it.
    671 		 *
    672 		 * - Otherwise, leave it alone.
    673 		 */
    674 		KASSERT(wq0 == wq);
    675 		if (wq->wq_current_work != &dw->work || !wq->wq_requeued) {
    676 			/*
    677 			 * It is either scheduled, on the queue but not
    678 			 * in progress, or in progress but not on the
    679 			 * queue.
    680 			 */
    681 			switch (dw->dw_state) {
    682 			case DELAYED_WORK_IDLE:
    683 				/*
    684 				 * It is not scheduled to run, and it
    685 				 * is not on the queue if it is
    686 				 * running.
    687 				 */
    688 				if (ticks == 0) {
    689 					/*
    690 					 * If it's in progress, put it
    691 					 * on the queue to run as soon
    692 					 * as the worker thread gets to
    693 					 * it.  No need for a wakeup
    694 					 * because either the worker
    695 					 * thread already knows it is
    696 					 * on the queue, or will check
    697 					 * once it is done executing.
    698 					 */
    699 					if (wq->wq_current_work == &dw->work) {
    700 						KASSERT(!wq->wq_requeued);
    701 						TAILQ_INSERT_TAIL(&wq->wq_queue,
    702 						    &dw->work, work_entry);
    703 						wq->wq_requeued = true;
    704 					}
    705 				} else {
    706 					/*
    707 					 * Initialize a callout and
    708 					 * schedule it to run after the
    709 					 * specified delay.
    710 					 */
    711 					dw_callout_init(wq, dw);
    712 					callout_schedule(&dw->dw_callout,
    713 					    MIN(INT_MAX, ticks));
    714 				}
    715 				break;
    716 			case DELAYED_WORK_SCHEDULED:
    717 			case DELAYED_WORK_RESCHEDULED:
    718 				/*
    719 				 * It is already scheduled to run after
    720 				 * a delay.  Leave it be.
    721 				 */
    722 				break;
    723 			case DELAYED_WORK_CANCELLED:
    724 				/*
    725 				 * It was scheduled and the callout has
    726 				 * begun to execute, but it was
    727 				 * cancelled.  Reschedule it.
    728 				 */
    729 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    730 				callout_schedule(&dw->dw_callout,
    731 				    MIN(INT_MAX, ticks));
    732 				break;
    733 			default:
    734 				panic("invalid delayed work state: %d",
    735 				    dw->dw_state);
    736 			}
    737 		} else {
    738 			/*
    739 			 * It is in progress and it has been requeued.
    740 			 * It cannot be scheduled to run after a delay
    741 			 * at this point.  We just leave it be.
    742 			 */
    743 			KASSERTMSG((dw->dw_state == DELAYED_WORK_IDLE),
    744 			    "delayed work %p in wrong state: %d",
    745 			    dw, dw->dw_state);
    746 		}
    747 	}
    748 	mutex_exit(&wq->wq_lock);
    749 
    750 	return newly_queued;
    751 }
    752 
    753 /*
    754  * mod_delayed_work(wq, dw, ticks)
    755  *
    756  *	Schedule dw to run after ticks.  If currently scheduled,
    757  *	reschedule it.  If currently executing, reschedule it.  If
    758  *	ticks == 0, run without delay.
    759  */
    760 bool
    761 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    762     unsigned long ticks)
    763 {
    764 	struct workqueue_struct *wq0;
    765 	bool timer_modified;
    766 
    767 	mutex_enter(&wq->wq_lock);
    768 	if ((wq0 = acquire_work(&dw->work, wq)) == NULL) {
    769 		/*
    770 		 * It wasn't on any workqueue at all.  Schedule it to
    771 		 * run on this one.
    772 		 */
    773 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    774 		if (ticks == 0) {
    775 			/*
    776 			 * Run immediately: put it on the queue and
    777 			 * signal the worker thread.
    778 			 */
    779 			TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work,
    780 			    work_entry);
    781 			cv_broadcast(&wq->wq_cv);
    782 		} else {
    783 			/*
    784 			 * Initialize a callout and schedule to run
    785 			 * after a delay.
    786 			 */
    787 			dw_callout_init(wq, dw);
    788 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    789 		}
    790 		timer_modified = false;
    791 	} else {
    792 		/* It was on a workqueue, which had better be this one.  */
    793 		KASSERT(wq0 == wq);
    794 		switch (dw->dw_state) {
    795 		case DELAYED_WORK_IDLE:
    796 			/*
    797 			 * It is not scheduled: it is on the queue or
    798 			 * it is running or both.
    799 			 */
    800 			if (wq->wq_current_work != &dw->work ||
    801 			    wq->wq_requeued) {
    802 				/*
    803 				 * It is on the queue, and it may or
    804 				 * may not be running.
    805 				 */
    806 				if (ticks == 0) {
    807 					/*
    808 					 * We ask it to run
    809 					 * immediately.  Leave it on
    810 					 * the queue.
    811 					 */
    812 				} else {
    813 					/*
    814 					 * Take it off the queue and
    815 					 * schedule a callout to run it
    816 					 * after a delay.
    817 					 */
    818 					if (wq->wq_requeued) {
    819 						wq->wq_requeued = false;
    820 					} else {
    821 						KASSERT(wq->wq_current_work !=
    822 						    &dw->work);
    823 					}
    824 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    825 					    work_entry);
    826 					dw_callout_init(wq, dw);
    827 					callout_schedule(&dw->dw_callout,
    828 					    MIN(INT_MAX, ticks));
    829 				}
    830 				timer_modified = true;
    831 			} else {
    832 				/*
    833 				 * It is currently running and has not
    834 				 * been requeued.
    835 				 */
    836 				if (ticks == 0) {
    837 					/*
    838 					 * We ask it to run
    839 					 * immediately.  Put it on the
    840 					 * queue again.
    841 					 */
    842 					wq->wq_requeued = true;
    843 					TAILQ_INSERT_TAIL(&wq->wq_queue,
    844 					    &dw->work, work_entry);
    845 				} else {
    846 					/*
    847 					 * Schedule a callout to run it
    848 					 * after a delay.
    849 					 */
    850 					dw_callout_init(wq, dw);
    851 					callout_schedule(&dw->dw_callout,
    852 					    MIN(INT_MAX, ticks));
    853 				}
    854 				timer_modified = false;
    855 			}
    856 			break;
    857 		case DELAYED_WORK_SCHEDULED:
    858 			/*
    859 			 * It is scheduled to run after a delay.  Try
    860 			 * to stop it and reschedule it; if we can't,
    861 			 * either reschedule it or cancel it to put it
    862 			 * on the queue, and inform the callout.
    863 			 */
    864 			if (callout_stop(&dw->dw_callout)) {
    865 				/* Can't stop, callout has begun.  */
    866 				if (ticks == 0) {
    867 					/*
    868 					 * We don't actually need to do
    869 					 * anything.  The callout will
    870 					 * queue it as soon as it gets
    871 					 * the lock.
    872 					 */
    873 				} else {
    874 					/*
    875 					 * Schedule callout and tell
    876 					 * the instance that's running
    877 					 * now that it's been
    878 					 * rescheduled.
    879 					 */
    880 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
    881 					callout_schedule(&dw->dw_callout,
    882 					    MIN(INT_MAX, ticks));
    883 				}
    884 			} else {
    885 				if (ticks == 0) {
    886 					/*
    887 					 * Run immediately: destroy the
    888 					 * callout, put it on the
    889 					 * queue, and signal the worker
    890 					 * thread.
    891 					 */
    892 					dw_callout_destroy(wq, dw);
    893 					TAILQ_INSERT_TAIL(&wq->wq_queue,
    894 					    &dw->work, work_entry);
    895 					cv_broadcast(&wq->wq_cv);
    896 				} else {
    897 					/*
    898 					 * Reschedule the callout.  No
    899 					 * state change.
    900 					 */
    901 					callout_schedule(&dw->dw_callout,
    902 					    MIN(INT_MAX, ticks));
    903 				}
    904 			}
    905 			timer_modified = true;
    906 			break;
    907 		case DELAYED_WORK_RESCHEDULED:
    908 		case DELAYED_WORK_CANCELLED:
    909 			/*
    910 			 * Someone modified the timer _again_, or
    911 			 * cancelled it, after the callout started but
    912 			 * before the poor thing even had a chance to
    913 			 * acquire the lock.
    914 			 */
    915 			if (ticks == 0) {
    916 				/*
    917 				 * We can just switch back to
    918 				 * DELAYED_WORK_SCHEDULED so that the
    919 				 * callout will queue the work as soon
    920 				 * as it gets the lock.
    921 				 */
    922 				dw->dw_state = DELAYED_WORK_SCHEDULED;
    923 			} else {
    924 				/* Reschedule it.  */
    925 				callout_schedule(&dw->dw_callout,
    926 				    MIN(INT_MAX, ticks));
    927 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    928 			}
    929 			timer_modified = true;
    930 			break;
    931 		default:
    932 			panic("invalid delayed work state: %d", dw->dw_state);
    933 		}
    934 	}
    935 	mutex_exit(&wq->wq_lock);
    936 
    937 	return timer_modified;
    938 }
    939 
    940 bool
    941 cancel_delayed_work(struct delayed_work *dw)
    942 {
    943 	struct workqueue_struct *wq;
    944 	bool cancelled_p;
    945 
    946 	/* If there's no workqueue, nothing to cancel.   */
    947 	if ((wq = dw->work.work_queue) == NULL)
    948 		return false;
    949 
    950 	mutex_enter(&wq->wq_lock);
    951 	if (__predict_false(dw->work.work_queue != wq)) {
    952 		cancelled_p = false;
    953 	} else {
    954 		switch (dw->dw_state) {
    955 		case DELAYED_WORK_IDLE:
    956 			if (wq->wq_current_work == &dw->work) {
    957 				/*
    958 				 * Too late, it's already running.  If
    959 				 * it's been requeued, tough -- it'll
    960 				 * run again.
    961 				 */
    962 				cancelled_p = false;
    963 			} else {
    964 				/* Got in before it started.  Remove it.  */
    965 				TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    966 				    work_entry);
    967 				cancelled_p = true;
    968 			}
    969 			break;
    970 		case DELAYED_WORK_SCHEDULED:
    971 			/*
    972 			 * If it is scheduled, mark it cancelled and
    973 			 * try to stop the callout before it starts.
    974 			 *
    975 			 * If it's too late and the callout has already
    976 			 * begun to execute, tough.
    977 			 *
    978 			 * If we stopped the callout before it started,
    979 			 * however, then destroy the callout and
    980 			 * dissociate it from the workqueue ourselves.
    981 			 */
    982 			dw->dw_state = DELAYED_WORK_CANCELLED;
    983 			cancelled_p = true;
    984 			if (!callout_stop(&dw->dw_callout))
    985 				cancel_delayed_work_done(wq, dw);
    986 			break;
    987 		case DELAYED_WORK_RESCHEDULED:
    988 			/*
    989 			 * If it is being rescheduled, the callout has
    990 			 * already fired.  We must ask it to cancel.
    991 			 */
    992 			dw->dw_state = DELAYED_WORK_CANCELLED;
    993 			cancelled_p = true;
    994 			break;
    995 		case DELAYED_WORK_CANCELLED:
    996 			/*
    997 			 * If it is being cancelled, the callout has
    998 			 * already fired.  There is nothing more for us
    999 			 * to do.  Someone else claims credit for
   1000 			 * cancelling it.
   1001 			 */
   1002 			cancelled_p = false;
   1003 			break;
   1004 		default:
   1005 			panic("invalid delayed work state: %d",
   1006 			    dw->dw_state);
   1007 		}
   1008 	}
   1009 	mutex_exit(&wq->wq_lock);
   1010 
   1011 	return cancelled_p;
   1012 }
   1013 
   1014 bool
   1015 cancel_delayed_work_sync(struct delayed_work *dw)
   1016 {
   1017 	struct workqueue_struct *wq;
   1018 	bool cancelled_p;
   1019 
   1020 	/* If there's no workqueue, nothing to cancel.  */
   1021 	if ((wq = dw->work.work_queue) == NULL)
   1022 		return false;
   1023 
   1024 	mutex_enter(&wq->wq_lock);
   1025 	if (__predict_false(dw->work.work_queue != wq)) {
   1026 		cancelled_p = false;
   1027 	} else {
   1028 		switch (dw->dw_state) {
   1029 		case DELAYED_WORK_IDLE:
   1030 			if (wq->wq_current_work == &dw->work) {
   1031 				/*
   1032 				 * Too late, it's already running.
   1033 				 * First, make sure it's not requeued.
   1034 				 * Then wait for it to complete.
   1035 				 */
   1036 				if (wq->wq_requeued) {
   1037 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
   1038 					    work_entry);
   1039 					wq->wq_requeued = false;
   1040 				}
   1041 				wait_for_current_work(&dw->work, wq);
   1042 				cancelled_p = false;
   1043 			} else {
   1044 				/* Got in before it started.  Remove it.  */
   1045 				TAILQ_REMOVE(&wq->wq_queue, &dw->work,
   1046 				    work_entry);
   1047 				cancelled_p = true;
   1048 			}
   1049 			break;
   1050 		case DELAYED_WORK_SCHEDULED:
   1051 			/*
   1052 			 * If it is scheduled, mark it cancelled and
   1053 			 * try to stop the callout before it starts.
   1054 			 *
   1055 			 * If it's too late and the callout has already
   1056 			 * begun to execute, we must wait for it to
   1057 			 * complete.  But we got in soon enough to ask
   1058 			 * the callout not to run, so we successfully
   1059 			 * cancelled it in that case.
   1060 			 *
   1061 			 * If we stopped the callout before it started,
   1062 			 * however, then destroy the callout and
   1063 			 * dissociate it from the workqueue ourselves.
   1064 			 */
   1065 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1066 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1067 				cancel_delayed_work_done(wq, dw);
   1068 			cancelled_p = true;
   1069 			break;
   1070 		case DELAYED_WORK_RESCHEDULED:
   1071 			/*
   1072 			 * If it is being rescheduled, the callout has
   1073 			 * already fired.  We must ask it to cancel and
   1074 			 * wait for it to complete.
   1075 			 */
   1076 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1077 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1078 			cancelled_p = true;
   1079 			break;
   1080 		case DELAYED_WORK_CANCELLED:
   1081 			/*
   1082 			 * If it is being cancelled, the callout has
   1083 			 * already fired.  We need only wait for it to
   1084 			 * complete.  Someone else, however, claims
   1085 			 * credit for cancelling it.
   1086 			 */
   1087 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1088 			cancelled_p = false;
   1089 			break;
   1090 		default:
   1091 			panic("invalid delayed work state: %d",
   1092 			    dw->dw_state);
   1093 		}
   1094 	}
   1095 	mutex_exit(&wq->wq_lock);
   1096 
   1097 	return cancelled_p;
   1098 }
   1099 
   1100 /*
   1102  * Flush
   1103  */
   1104 
   1105 void
   1106 flush_scheduled_work(void)
   1107 {
   1108 
   1109 	flush_workqueue(system_wq);
   1110 }
   1111 
   1112 static void
   1113 flush_workqueue_locked(struct workqueue_struct *wq)
   1114 {
   1115 	uint64_t gen;
   1116 
   1117 	KASSERT(mutex_owned(&wq->wq_lock));
   1118 
   1119 	/* Get the current generation number.  */
   1120 	gen = wq->wq_gen;
   1121 
   1122 	/*
   1123 	 * If there's a batch of work in progress, we must wait for the
   1124 	 * worker thread to finish that batch.
   1125 	 */
   1126 	if (wq->wq_current_work != NULL)
   1127 		gen++;
   1128 
   1129 	/*
   1130 	 * If there's any work yet to be claimed from the queue by the
   1131 	 * worker thread, we must wait for it to finish one more batch
   1132 	 * too.
   1133 	 */
   1134 	if (!TAILQ_EMPTY(&wq->wq_queue))
   1135 		gen++;
   1136 
   1137 	/* Wait until the generation number has caught up.  */
   1138 	while (wq->wq_gen < gen)
   1139 		cv_wait(&wq->wq_cv, &wq->wq_lock);
   1140 }
   1141 
   1142 void
   1143 flush_workqueue(struct workqueue_struct *wq)
   1144 {
   1145 
   1146 	mutex_enter(&wq->wq_lock);
   1147 	flush_workqueue_locked(wq);
   1148 	mutex_exit(&wq->wq_lock);
   1149 }
   1150 
   1151 void
   1152 flush_work(struct work_struct *work)
   1153 {
   1154 	struct workqueue_struct *wq;
   1155 
   1156 	/* If there's no workqueue, nothing to flush.  */
   1157 	if ((wq = work->work_queue) == NULL)
   1158 		return;
   1159 
   1160 	flush_workqueue(wq);
   1161 }
   1162 
   1163 void
   1164 flush_delayed_work(struct delayed_work *dw)
   1165 {
   1166 	struct workqueue_struct *wq;
   1167 
   1168 	/* If there's no workqueue, nothing to flush.  */
   1169 	if ((wq = dw->work.work_queue) == NULL)
   1170 		return;
   1171 
   1172 	mutex_enter(&wq->wq_lock);
   1173 	if (__predict_true(dw->work.work_queue == wq)) {
   1174 		switch (dw->dw_state) {
   1175 		case DELAYED_WORK_IDLE:
   1176 			/*
   1177 			 * It has a workqueue assigned and the callout
   1178 			 * is idle, so it must be in progress or on the
   1179 			 * queue.  In that case, wait for it to
   1180 			 * complete.  Waiting for the whole queue to
   1181 			 * flush is overkill, but doesn't hurt.
   1182 			 */
   1183 			flush_workqueue_locked(wq);
   1184 			break;
   1185 		case DELAYED_WORK_SCHEDULED:
   1186 		case DELAYED_WORK_RESCHEDULED:
   1187 		case DELAYED_WORK_CANCELLED:
   1188 			/*
   1189 			 * The callout is still scheduled to run.
   1190 			 * Notify it that we are cancelling, and try to
   1191 			 * stop the callout before it runs.
   1192 			 *
   1193 			 * If we do stop the callout, we are now
   1194 			 * responsible for dissociating the work from
   1195 			 * the queue.
   1196 			 *
   1197 			 * Otherwise, wait for it to complete and
   1198 			 * dissociate itself -- it will not put itself
   1199 			 * on the workqueue once it is cancelled.
   1200 			 */
   1201 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1202 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1203 				cancel_delayed_work_done(wq, dw);
   1204 		default:
   1205 			panic("invalid delayed work state: %d",
   1206 			    dw->dw_state);
   1207 		}
   1208 	}
   1209 	mutex_exit(&wq->wq_lock);
   1210 }
   1211