Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.1.26.1
      1 /*	$NetBSD: linux_work.c,v 1.1.26.1 2019/06/10 22:07:45 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.1.26.1 2019/06/10 22:07:45 christos Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/atomic.h>
     37 #include <sys/callout.h>
     38 #include <sys/condvar.h>
     39 #include <sys/errno.h>
     40 #include <sys/kmem.h>
     41 #include <sys/kthread.h>
     42 #include <sys/lwp.h>
     43 #include <sys/mutex.h>
     44 #ifndef _MODULE
     45 #include <sys/once.h>
     46 #endif
     47 #include <sys/queue.h>
     48 #include <sys/sdt.h>
     49 
     50 #include <linux/workqueue.h>
     51 
     52 TAILQ_HEAD(work_head, work_struct);
     53 TAILQ_HEAD(dwork_head, delayed_work);
     54 
     55 struct workqueue_struct {
     56 	kmutex_t		wq_lock;
     57 	kcondvar_t		wq_cv;
     58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
     59 	struct work_head	wq_queue;   /* work to run */
     60 	struct work_head	wq_dqueue;  /* delayed work to run now */
     61 	struct work_struct	*wq_current_work;
     62 	int			wq_flags;
     63 	bool			wq_dying;
     64 	uint64_t		wq_gen;
     65 	struct lwp		*wq_lwp;
     66 };
     67 
     68 static void __dead	linux_workqueue_thread(void *);
     69 static void		linux_workqueue_timeout(void *);
     70 static bool		work_claimed(struct work_struct *,
     71 			    struct workqueue_struct *);
     72 static struct workqueue_struct *
     73 			work_queue(struct work_struct *);
     74 static bool		acquire_work(struct work_struct *,
     75 			    struct workqueue_struct *);
     76 static void		release_work(struct work_struct *,
     77 			    struct workqueue_struct *);
     78 static void		wait_for_current_work(struct work_struct *,
     79 			    struct workqueue_struct *);
     80 static void		dw_callout_init(struct workqueue_struct *,
     81 			    struct delayed_work *);
     82 static void		dw_callout_destroy(struct workqueue_struct *,
     83 			    struct delayed_work *);
     84 static void		cancel_delayed_work_done(struct workqueue_struct *,
     85 			    struct delayed_work *);
     86 
     87 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
     88     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     89 SDT_PROBE_DEFINE2(sdt, linux, work, release,
     90     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     91 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
     92     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     93 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
     94     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     95 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
     96     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
     97     "unsigned long"/*ticks*/);
     98 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
     99     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    100 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
    101     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    102 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
    103     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    104 SDT_PROBE_DEFINE2(sdt, linux, work, run,
    105     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    106 SDT_PROBE_DEFINE2(sdt, linux, work, done,
    107     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    108 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
    109     "struct workqueue_struct *"/*wq*/);
    110 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
    111     "struct workqueue_struct *"/*wq*/);
    112 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
    113     "struct workqueue_struct *"/*wq*/);
    114 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
    115     "struct workqueue_struct *"/*wq*/);
    116 
    117 static specificdata_key_t workqueue_key __read_mostly;
    118 
    119 struct workqueue_struct	*system_wq __read_mostly;
    120 struct workqueue_struct	*system_long_wq __read_mostly;
    121 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
    122 
    123 static inline uintptr_t
    124 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
    125 {
    126 
    127 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
    128 }
    129 
    130 /*
    131  * linux_workqueue_init()
    132  *
    133  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
    134  *	NetBSD error on failure.
    135  */
    136 static int
    137 linux_workqueue_init0(void)
    138 {
    139 	int error;
    140 
    141 	error = lwp_specific_key_create(&workqueue_key, NULL);
    142 	if (error)
    143 		goto fail0;
    144 
    145 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
    146 	if (system_wq == NULL) {
    147 		error = ENOMEM;
    148 		goto fail1;
    149 	}
    150 
    151 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
    152 	if (system_long_wq == NULL) {
    153 		error = ENOMEM;
    154 		goto fail2;
    155 	}
    156 
    157 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    158 	if (system_long_wq == NULL) {
    159 		error = ENOMEM;
    160 		goto fail3;
    161 	}
    162 
    163 	return 0;
    164 
    165 fail4: __unused
    166 	destroy_workqueue(system_power_efficient_wq);
    167 fail3:	destroy_workqueue(system_long_wq);
    168 fail2:	destroy_workqueue(system_wq);
    169 fail1:	lwp_specific_key_delete(workqueue_key);
    170 fail0:	KASSERT(error);
    171 	return error;
    172 }
    173 
    174 /*
    175  * linux_workqueue_fini()
    176  *
    177  *	Destroy the Linux workqueue subsystem.  Never fails.
    178  */
    179 static void
    180 linux_workqueue_fini0(void)
    181 {
    182 
    183 	destroy_workqueue(system_power_efficient_wq);
    184 	destroy_workqueue(system_long_wq);
    185 	destroy_workqueue(system_wq);
    186 	lwp_specific_key_delete(workqueue_key);
    187 }
    188 
    189 #ifndef _MODULE
    190 static ONCE_DECL(linux_workqueue_init_once);
    191 #endif
    192 
    193 int
    194 linux_workqueue_init(void)
    195 {
    196 #ifdef _MODULE
    197 	return linux_workqueue_init0();
    198 #else
    199 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
    200 #endif
    201 }
    202 
    203 void
    204 linux_workqueue_fini(void)
    205 {
    206 #ifdef _MODULE
    207 	return linux_workqueue_fini0();
    208 #else
    209 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
    210 #endif
    211 }
    212 
    213 /*
    215  * Workqueues
    216  */
    217 
    218 /*
    219  * alloc_ordered_workqueue(name, flags)
    220  *
    221  *	Create a workqueue of the given name.  No flags are currently
    222  *	defined.  Return NULL on failure, pointer to struct
    223  *	workqueue_struct object on success.
    224  */
    225 struct workqueue_struct *
    226 alloc_ordered_workqueue(const char *name, int flags)
    227 {
    228 	struct workqueue_struct *wq;
    229 	int error;
    230 
    231 	KASSERT(flags == 0);
    232 
    233 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
    234 
    235 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
    236 	cv_init(&wq->wq_cv, name);
    237 	TAILQ_INIT(&wq->wq_delayed);
    238 	TAILQ_INIT(&wq->wq_queue);
    239 	TAILQ_INIT(&wq->wq_dqueue);
    240 	wq->wq_current_work = NULL;
    241 	wq->wq_flags = 0;
    242 	wq->wq_dying = false;
    243 	wq->wq_gen = 0;
    244 	wq->wq_lwp = NULL;
    245 
    246 	error = kthread_create(PRI_NONE,
    247 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
    248 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
    249 	if (error)
    250 		goto fail0;
    251 
    252 	return wq;
    253 
    254 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    255 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    256 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    257 	cv_destroy(&wq->wq_cv);
    258 	mutex_destroy(&wq->wq_lock);
    259 	kmem_free(wq, sizeof(*wq));
    260 	return NULL;
    261 }
    262 
    263 /*
    264  * destroy_workqueue(wq)
    265  *
    266  *	Destroy a workqueue created with wq.  Cancel any pending
    267  *	delayed work.  Wait for all queued work to complete.
    268  *
    269  *	May sleep.
    270  */
    271 void
    272 destroy_workqueue(struct workqueue_struct *wq)
    273 {
    274 
    275 	/*
    276 	 * Cancel all delayed work.  We do this first because any
    277 	 * delayed work that that has already timed out, which we can't
    278 	 * cancel, may have queued new work.
    279 	 */
    280 	mutex_enter(&wq->wq_lock);
    281 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
    282 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
    283 
    284 		KASSERT(work_queue(&dw->work) == wq);
    285 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
    286 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    287 			dw->dw_state == DELAYED_WORK_CANCELLED),
    288 		    "delayed work %p in bad state: %d",
    289 		    dw, dw->dw_state);
    290 
    291 		/*
    292 		 * Mark it cancelled and try to stop the callout before
    293 		 * it starts.
    294 		 *
    295 		 * If it's too late and the callout has already begun
    296 		 * to execute, then it will notice that we asked to
    297 		 * cancel it and remove itself from the queue before
    298 		 * returning.
    299 		 *
    300 		 * If we stopped the callout before it started,
    301 		 * however, then we can safely destroy the callout and
    302 		 * dissociate it from the workqueue ourselves.
    303 		 */
    304 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
    305 		dw->dw_state = DELAYED_WORK_CANCELLED;
    306 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
    307 			cancel_delayed_work_done(wq, dw);
    308 	}
    309 	mutex_exit(&wq->wq_lock);
    310 
    311 	/*
    312 	 * At this point, no new work can be put on the queue.
    313 	 */
    314 
    315 	/* Tell the thread to exit.  */
    316 	mutex_enter(&wq->wq_lock);
    317 	wq->wq_dying = true;
    318 	cv_broadcast(&wq->wq_cv);
    319 	mutex_exit(&wq->wq_lock);
    320 
    321 	/* Wait for it to exit.  */
    322 	(void)kthread_join(wq->wq_lwp);
    323 
    324 	KASSERT(wq->wq_dying);
    325 	KASSERT(wq->wq_flags == 0);
    326 	KASSERT(wq->wq_current_work == NULL);
    327 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    328 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    329 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    330 	cv_destroy(&wq->wq_cv);
    331 	mutex_destroy(&wq->wq_lock);
    332 
    333 	kmem_free(wq, sizeof(*wq));
    334 }
    335 
    336 /*
    338  * Work thread and callout
    339  */
    340 
    341 /*
    342  * linux_workqueue_thread(cookie)
    343  *
    344  *	Main function for a workqueue's worker thread.  Waits until
    345  *	there is work queued, grabs a batch of work off the queue,
    346  *	executes it all, bumps the generation number, and repeats,
    347  *	until dying.
    348  */
    349 static void __dead
    350 linux_workqueue_thread(void *cookie)
    351 {
    352 	struct workqueue_struct *const wq = cookie;
    353 	struct work_head queue, dqueue;
    354 	struct work_head *const q[2] = { &queue, &dqueue };
    355 	unsigned i;
    356 
    357 	lwp_setspecific(workqueue_key, wq);
    358 
    359 	mutex_enter(&wq->wq_lock);
    360 	for (;;) {
    361 		/*
    362 		 * Wait until there's activity.  If there's no work and
    363 		 * we're dying, stop here.
    364 		 */
    365 		if (TAILQ_EMPTY(&wq->wq_queue) &&
    366 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
    367 			if (wq->wq_dying)
    368 				break;
    369 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    370 			continue;
    371 		}
    372 
    373 		/* Grab a batch of work off the queue.  */
    374 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
    375 		TAILQ_INIT(&queue);
    376 		TAILQ_INIT(&dqueue);
    377 		TAILQ_CONCAT(&queue, &wq->wq_queue, work_entry);
    378 		TAILQ_CONCAT(&dqueue, &wq->wq_dqueue, work_entry);
    379 
    380 		/* Process each work item in the batch.  */
    381 		for (i = 0; i < 2; i++) {
    382 			while (!TAILQ_EMPTY(q[i])) {
    383 				struct work_struct *work = TAILQ_FIRST(q[i]);
    384 				void (*func)(struct work_struct *);
    385 
    386 				KASSERT(work_queue(work) == wq);
    387 				KASSERT(work_claimed(work, wq));
    388 				KASSERTMSG((q[i] != &dqueue ||
    389 					container_of(work, struct delayed_work,
    390 					    work)->dw_state ==
    391 					DELAYED_WORK_IDLE),
    392 				    "delayed work %p queued and scheduled",
    393 				    work);
    394 
    395 				TAILQ_REMOVE(q[i], work, work_entry);
    396 				KASSERT(wq->wq_current_work == NULL);
    397 				wq->wq_current_work = work;
    398 				func = work->func;
    399 				release_work(work, wq);
    400 				/* Can't dereference work after this point.  */
    401 
    402 				mutex_exit(&wq->wq_lock);
    403 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
    404 				(*func)(work);
    405 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
    406 				mutex_enter(&wq->wq_lock);
    407 
    408 				KASSERT(wq->wq_current_work == work);
    409 				wq->wq_current_work = NULL;
    410 				cv_broadcast(&wq->wq_cv);
    411 			}
    412 		}
    413 
    414 		/* Notify flush that we've completed a batch of work.  */
    415 		wq->wq_gen++;
    416 		cv_broadcast(&wq->wq_cv);
    417 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
    418 	}
    419 	mutex_exit(&wq->wq_lock);
    420 
    421 	kthread_exit(0);
    422 }
    423 
    424 /*
    425  * linux_workqueue_timeout(cookie)
    426  *
    427  *	Delayed work timeout callback.
    428  *
    429  *	- If scheduled, queue it.
    430  *	- If rescheduled, callout_schedule ourselves again.
    431  *	- If cancelled, destroy the callout and release the work from
    432  *        the workqueue.
    433  */
    434 static void
    435 linux_workqueue_timeout(void *cookie)
    436 {
    437 	struct delayed_work *const dw = cookie;
    438 	struct workqueue_struct *const wq = work_queue(&dw->work);
    439 
    440 	KASSERTMSG(wq != NULL,
    441 	    "delayed work %p state %d resched %d",
    442 	    dw, dw->dw_state, dw->dw_resched);
    443 
    444 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
    445 
    446 	mutex_enter(&wq->wq_lock);
    447 	KASSERT(work_queue(&dw->work) == wq);
    448 	switch (dw->dw_state) {
    449 	case DELAYED_WORK_IDLE:
    450 		panic("delayed work callout uninitialized: %p", dw);
    451 	case DELAYED_WORK_SCHEDULED:
    452 		dw_callout_destroy(wq, dw);
    453 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
    454 		cv_broadcast(&wq->wq_cv);
    455 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    456 		break;
    457 	case DELAYED_WORK_RESCHEDULED:
    458 		KASSERT(dw->dw_resched >= 0);
    459 		callout_schedule(&dw->dw_callout, dw->dw_resched);
    460 		dw->dw_state = DELAYED_WORK_SCHEDULED;
    461 		dw->dw_resched = -1;
    462 		break;
    463 	case DELAYED_WORK_CANCELLED:
    464 		cancel_delayed_work_done(wq, dw);
    465 		/* Can't dereference dw after this point.  */
    466 		goto out;
    467 	default:
    468 		panic("delayed work callout in bad state: %p", dw);
    469 	}
    470 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
    471 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
    472 out:	mutex_exit(&wq->wq_lock);
    473 }
    474 
    475 /*
    476  * current_work()
    477  *
    478  *	If in a workqueue worker thread, return the work it is
    479  *	currently executing.  Otherwise return NULL.
    480  */
    481 struct work_struct *
    482 current_work(void)
    483 {
    484 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
    485 
    486 	/* If we're not a workqueue thread, then there's no work.  */
    487 	if (wq == NULL)
    488 		return NULL;
    489 
    490 	/*
    491 	 * Otherwise, this should be possible only while work is in
    492 	 * progress.  Return the current work item.
    493 	 */
    494 	KASSERT(wq->wq_current_work != NULL);
    495 	return wq->wq_current_work;
    496 }
    497 
    498 /*
    500  * Work
    501  */
    502 
    503 /*
    504  * INIT_WORK(work, fn)
    505  *
    506  *	Initialize work for use with a workqueue to call fn in a worker
    507  *	thread.  There is no corresponding destruction operation.
    508  */
    509 void
    510 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    511 {
    512 
    513 	work->work_owner = 0;
    514 	work->func = fn;
    515 }
    516 
    517 /*
    518  * work_claimed(work, wq)
    519  *
    520  *	True if work is currently claimed by a workqueue, meaning it is
    521  *	either on the queue or scheduled in a callout.  The workqueue
    522  *	must be wq, and caller must hold wq's lock.
    523  */
    524 static bool
    525 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
    526 {
    527 
    528 	KASSERT(work_queue(work) == wq);
    529 	KASSERT(mutex_owned(&wq->wq_lock));
    530 
    531 	return work->work_owner & 1;
    532 }
    533 
    534 /*
    535  * work_queue(work)
    536  *
    537  *	Return the last queue that work was queued on, or NULL if it
    538  *	was never queued.
    539  */
    540 static struct workqueue_struct *
    541 work_queue(struct work_struct *work)
    542 {
    543 
    544 	return (struct workqueue_struct *)(work->work_owner & ~(uintptr_t)1);
    545 }
    546 
    547 /*
    548  * acquire_work(work, wq)
    549  *
    550  *	Try to claim work for wq.  If work is already claimed, it must
    551  *	be claimed by wq; return false.  If work is not already
    552  *	claimed, claim it, issue a memory barrier to match any prior
    553  *	release_work, and return true.
    554  *
    555  *	Caller must hold wq's lock.
    556  */
    557 static bool
    558 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
    559 {
    560 	uintptr_t owner0, owner;
    561 
    562 	KASSERT(mutex_owned(&wq->wq_lock));
    563 	KASSERT(((uintptr_t)wq & 1) == 0);
    564 
    565 	owner = (uintptr_t)wq | 1;
    566 	do {
    567 		owner0 = work->work_owner;
    568 		if (owner0 & 1) {
    569 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
    570 			return false;
    571 		}
    572 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
    573 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
    574 	    owner0);
    575 
    576 	KASSERT(work_queue(work) == wq);
    577 	membar_enter();
    578 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
    579 	return true;
    580 }
    581 
    582 /*
    583  * release_work(work, wq)
    584  *
    585  *	Issue a memory barrier to match any subsequent acquire_work and
    586  *	dissociate work from wq.
    587  *
    588  *	Caller must hold wq's lock and work must be associated with wq.
    589  */
    590 static void
    591 release_work(struct work_struct *work, struct workqueue_struct *wq)
    592 {
    593 
    594 	KASSERT(work_queue(work) == wq);
    595 	KASSERT(mutex_owned(&wq->wq_lock));
    596 
    597 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
    598 	membar_exit();
    599 
    600 	/*
    601 	 * Non-interlocked r/m/w is safe here because nobody else can
    602 	 * write to this while the claimed bit is setand the workqueue
    603 	 * lock is held.
    604 	 */
    605 	work->work_owner &= ~(uintptr_t)1;
    606 }
    607 
    608 /*
    609  * schedule_work(work)
    610  *
    611  *	If work is not already queued on system_wq, queue it to be run
    612  *	by system_wq's worker thread when it next can.  True if it was
    613  *	newly queued, false if it was already queued.  If the work was
    614  *	already running, queue it to run again.
    615  *
    616  *	Caller must ensure work is not queued to run on a different
    617  *	workqueue.
    618  */
    619 bool
    620 schedule_work(struct work_struct *work)
    621 {
    622 
    623 	return queue_work(system_wq, work);
    624 }
    625 
    626 /*
    627  * queue_work(wq, work)
    628  *
    629  *	If work is not already queued on wq, queue it to be run by wq's
    630  *	worker thread when it next can.  True if it was newly queued,
    631  *	false if it was already queued.  If the work was already
    632  *	running, queue it to run again.
    633  *
    634  *	Caller must ensure work is not queued to run on a different
    635  *	workqueue.
    636  */
    637 bool
    638 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    639 {
    640 	bool newly_queued;
    641 
    642 	KASSERT(wq != NULL);
    643 
    644 	mutex_enter(&wq->wq_lock);
    645 	if (__predict_true(acquire_work(work, wq))) {
    646 		/*
    647 		 * It wasn't on any workqueue at all.  Put it on this
    648 		 * one, and signal the worker thread that there is work
    649 		 * to do.
    650 		 */
    651 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    652 		cv_broadcast(&wq->wq_cv);
    653 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
    654 		newly_queued = true;
    655 	} else {
    656 		/*
    657 		 * It was already on this workqueue.  Nothing to do
    658 		 * since it is already queued.
    659 		 */
    660 		newly_queued = false;
    661 	}
    662 	mutex_exit(&wq->wq_lock);
    663 
    664 	return newly_queued;
    665 }
    666 
    667 /*
    668  * cancel_work(work)
    669  *
    670  *	If work was queued, remove it from the queue and return true.
    671  *	If work was not queued, return false.  Work may still be
    672  *	running when this returns.
    673  */
    674 bool
    675 cancel_work(struct work_struct *work)
    676 {
    677 	struct workqueue_struct *wq;
    678 	bool cancelled_p = false;
    679 
    680 	/* If there's no workqueue, nothing to cancel.   */
    681 	if ((wq = work_queue(work)) == NULL)
    682 		goto out;
    683 
    684 	mutex_enter(&wq->wq_lock);
    685 	if (__predict_false(work_queue(work) != wq)) {
    686 		/*
    687 		 * It has finished execution or been cancelled by
    688 		 * another thread, and has been moved off the
    689 		 * workqueue, so it's too to cancel.
    690 		 */
    691 		cancelled_p = false;
    692 	} else {
    693 		/* Check whether it's on the queue.  */
    694 		if (work_claimed(work, wq)) {
    695 			/*
    696 			 * It is still on the queue.  Take it off the
    697 			 * queue and report successful cancellation.
    698 			 */
    699 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    700 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    701 			release_work(work, wq);
    702 			/* Can't dereference work after this point.  */
    703 			cancelled_p = true;
    704 		} else {
    705 			/* Not on the queue.  Couldn't cancel it.  */
    706 			cancelled_p = false;
    707 		}
    708 	}
    709 	mutex_exit(&wq->wq_lock);
    710 
    711 out:	return cancelled_p;
    712 }
    713 
    714 /*
    715  * cancel_work_sync(work)
    716  *
    717  *	If work was queued, remove it from the queue and return true.
    718  *	If work was not queued, return false.  Either way, if work is
    719  *	currently running, wait for it to complete.
    720  *
    721  *	May sleep.
    722  */
    723 bool
    724 cancel_work_sync(struct work_struct *work)
    725 {
    726 	struct workqueue_struct *wq;
    727 	bool cancelled_p = false;
    728 
    729 	/* If there's no workqueue, nothing to cancel.   */
    730 	if ((wq = work_queue(work)) == NULL)
    731 		goto out;
    732 
    733 	mutex_enter(&wq->wq_lock);
    734 	if (__predict_false(work_queue(work) != wq)) {
    735 		/*
    736 		 * It has finished execution or been cancelled by
    737 		 * another thread, and has been moved off the
    738 		 * workqueue, so it's too late to cancel.
    739 		 */
    740 		cancelled_p = false;
    741 	} else {
    742 		/* Check whether it's on the queue.  */
    743 		if (work_claimed(work, wq)) {
    744 			/*
    745 			 * It is still on the queue.  Take it off the
    746 			 * queue and report successful cancellation.
    747 			 */
    748 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    749 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    750 			release_work(work, wq);
    751 			/* Can't dereference work after this point.  */
    752 			cancelled_p = true;
    753 		} else {
    754 			/* Not on the queue.  Couldn't cancel it.  */
    755 			cancelled_p = false;
    756 		}
    757 		/* If it's still running, wait for it to complete.  */
    758 		if (wq->wq_current_work == work)
    759 			wait_for_current_work(work, wq);
    760 	}
    761 	mutex_exit(&wq->wq_lock);
    762 
    763 out:	return cancelled_p;
    764 }
    765 
    766 /*
    767  * wait_for_current_work(work, wq)
    768  *
    769  *	wq must be currently executing work.  Wait for it to finish.
    770  *
    771  *	Does not dereference work.
    772  */
    773 static void
    774 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
    775 {
    776 	uint64_t gen;
    777 
    778 	KASSERT(mutex_owned(&wq->wq_lock));
    779 	KASSERT(wq->wq_current_work == work);
    780 
    781 	/* Wait only one generation in case it gets requeued quickly.  */
    782 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
    783 	gen = wq->wq_gen;
    784 	do {
    785 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    786 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
    787 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
    788 }
    789 
    790 /*
    792  * Delayed work
    793  */
    794 
    795 /*
    796  * INIT_DELAYED_WORK(dw, fn)
    797  *
    798  *	Initialize dw for use with a workqueue to call fn in a worker
    799  *	thread after a delay.  There is no corresponding destruction
    800  *	operation.
    801  */
    802 void
    803 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    804 {
    805 
    806 	INIT_WORK(&dw->work, fn);
    807 	dw->dw_state = DELAYED_WORK_IDLE;
    808 	dw->dw_resched = -1;
    809 
    810 	/*
    811 	 * Defer callout_init until we are going to schedule the
    812 	 * callout, which can then callout_destroy it, because
    813 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
    814 	 * we have no opportunity to call callout_destroy.
    815 	 */
    816 }
    817 
    818 /*
    819  * schedule_delayed_work(dw, ticks)
    820  *
    821  *	If it is not currently scheduled, schedule dw to run after
    822  *	ticks on system_wq.  If currently executing and not already
    823  *	rescheduled, reschedule it.  True if it was newly scheduled,
    824  *	false if it was already scheduled.
    825  *
    826  *	If ticks == 0, queue it to run as soon as the worker can,
    827  *	without waiting for the next callout tick to run.
    828  */
    829 bool
    830 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    831 {
    832 
    833 	return queue_delayed_work(system_wq, dw, ticks);
    834 }
    835 
    836 /*
    837  * dw_callout_init(wq, dw)
    838  *
    839  *	Initialize the callout of dw and transition to
    840  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
    841  */
    842 static void
    843 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
    844 {
    845 
    846 	KASSERT(mutex_owned(&wq->wq_lock));
    847 	KASSERT(work_queue(&dw->work) == wq);
    848 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    849 
    850 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    851 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
    852 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    853 	dw->dw_state = DELAYED_WORK_SCHEDULED;
    854 }
    855 
    856 /*
    857  * dw_callout_destroy(wq, dw)
    858  *
    859  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
    860  */
    861 static void
    862 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
    863 {
    864 
    865 	KASSERT(mutex_owned(&wq->wq_lock));
    866 	KASSERT(work_queue(&dw->work) == wq);
    867 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
    868 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    869 	    dw->dw_state == DELAYED_WORK_CANCELLED);
    870 
    871 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    872 	callout_destroy(&dw->dw_callout);
    873 	dw->dw_resched = -1;
    874 	dw->dw_state = DELAYED_WORK_IDLE;
    875 }
    876 
    877 /*
    878  * cancel_delayed_work_done(wq, dw)
    879  *
    880  *	Complete cancellation of a delayed work: transition from
    881  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
    882  *	workqueue.  Caller must not dereference dw after this returns.
    883  */
    884 static void
    885 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
    886 {
    887 
    888 	KASSERT(mutex_owned(&wq->wq_lock));
    889 	KASSERT(work_queue(&dw->work) == wq);
    890 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
    891 
    892 	dw_callout_destroy(wq, dw);
    893 	release_work(&dw->work, wq);
    894 	/* Can't dereference dw after this point.  */
    895 }
    896 
    897 /*
    898  * queue_delayed_work(wq, dw, ticks)
    899  *
    900  *	If it is not currently scheduled, schedule dw to run after
    901  *	ticks on wq.  If currently queued, remove it from the queue
    902  *	first.
    903  *
    904  *	If ticks == 0, queue it to run as soon as the worker can,
    905  *	without waiting for the next callout tick to run.
    906  */
    907 bool
    908 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    909     unsigned long ticks)
    910 {
    911 	bool newly_queued;
    912 
    913 	mutex_enter(&wq->wq_lock);
    914 	if (__predict_true(acquire_work(&dw->work, wq))) {
    915 		/*
    916 		 * It wasn't on any workqueue at all.  Schedule it to
    917 		 * run on this one.
    918 		 */
    919 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    920 		if (ticks == 0) {
    921 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
    922 			    work_entry);
    923 			cv_broadcast(&wq->wq_cv);
    924 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    925 		} else {
    926 			/*
    927 			 * Initialize a callout and schedule to run
    928 			 * after a delay.
    929 			 */
    930 			dw_callout_init(wq, dw);
    931 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    932 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
    933 		}
    934 		newly_queued = true;
    935 	} else {
    936 		/* It was already on this workqueue.  */
    937 		switch (dw->dw_state) {
    938 		case DELAYED_WORK_IDLE:
    939 		case DELAYED_WORK_SCHEDULED:
    940 		case DELAYED_WORK_RESCHEDULED:
    941 			/* On the queue or already scheduled.  Leave it.  */
    942 			newly_queued = false;
    943 			break;
    944 		case DELAYED_WORK_CANCELLED:
    945 			/*
    946 			 * Scheduled and the callout began, but it was
    947 			 * cancelled.  Reschedule it.
    948 			 */
    949 			if (ticks == 0) {
    950 				dw->dw_state = DELAYED_WORK_SCHEDULED;
    951 				SDT_PROBE2(sdt, linux, work, queue,
    952 				    &dw->work, wq);
    953 			} else {
    954 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    955 				dw->dw_resched = MIN(INT_MAX, ticks);
    956 				SDT_PROBE3(sdt, linux, work, schedule,
    957 				    dw, wq, ticks);
    958 			}
    959 			newly_queued = true;
    960 			break;
    961 		default:
    962 			panic("invalid delayed work state: %d",
    963 			    dw->dw_state);
    964 		}
    965 	}
    966 	mutex_exit(&wq->wq_lock);
    967 
    968 	return newly_queued;
    969 }
    970 
    971 /*
    972  * mod_delayed_work(wq, dw, ticks)
    973  *
    974  *	Schedule dw to run after ticks.  If scheduled or queued,
    975  *	reschedule.  If ticks == 0, run without delay.
    976  *
    977  *	True if it modified the timer of an already scheduled work,
    978  *	false if it newly scheduled the work.
    979  */
    980 bool
    981 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    982     unsigned long ticks)
    983 {
    984 	bool timer_modified;
    985 
    986 	mutex_enter(&wq->wq_lock);
    987 	if (acquire_work(&dw->work, wq)) {
    988 		/*
    989 		 * It wasn't on any workqueue at all.  Schedule it to
    990 		 * run on this one.
    991 		 */
    992 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    993 		if (ticks == 0) {
    994 			/*
    995 			 * Run immediately: put it on the queue and
    996 			 * signal the worker thread.
    997 			 */
    998 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
    999 			    work_entry);
   1000 			cv_broadcast(&wq->wq_cv);
   1001 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
   1002 		} else {
   1003 			/*
   1004 			 * Initialize a callout and schedule to run
   1005 			 * after a delay.
   1006 			 */
   1007 			dw_callout_init(wq, dw);
   1008 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
   1009 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
   1010 		}
   1011 		timer_modified = false;
   1012 	} else {
   1013 		/* It was already on this workqueue.  */
   1014 		switch (dw->dw_state) {
   1015 		case DELAYED_WORK_IDLE:
   1016 			/* On the queue.  */
   1017 			if (ticks == 0) {
   1018 				/* Leave it be.  */
   1019 				SDT_PROBE2(sdt, linux, work, cancel,
   1020 				    &dw->work, wq);
   1021 				SDT_PROBE2(sdt, linux, work, queue,
   1022 				    &dw->work, wq);
   1023 			} else {
   1024 				/* Remove from the queue and schedule.  */
   1025 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1026 				    work_entry);
   1027 				dw_callout_init(wq, dw);
   1028 				callout_schedule(&dw->dw_callout,
   1029 				    MIN(INT_MAX, ticks));
   1030 				SDT_PROBE2(sdt, linux, work, cancel,
   1031 				    &dw->work, wq);
   1032 				SDT_PROBE3(sdt, linux, work, schedule,
   1033 				    dw, wq, ticks);
   1034 			}
   1035 			timer_modified = true;
   1036 			break;
   1037 		case DELAYED_WORK_SCHEDULED:
   1038 			/*
   1039 			 * It is scheduled to run after a delay.  Try
   1040 			 * to stop it and reschedule it; if we can't,
   1041 			 * either reschedule it or cancel it to put it
   1042 			 * on the queue, and inform the callout.
   1043 			 */
   1044 			if (callout_stop(&dw->dw_callout)) {
   1045 				/* Can't stop, callout has begun.  */
   1046 				if (ticks == 0) {
   1047 					/*
   1048 					 * We don't actually need to do
   1049 					 * anything.  The callout will
   1050 					 * queue it as soon as it gets
   1051 					 * the lock.
   1052 					 */
   1053 					SDT_PROBE2(sdt, linux, work, cancel,
   1054 					    &dw->work, wq);
   1055 					SDT_PROBE2(sdt, linux, work, queue,
   1056 					    &dw->work, wq);
   1057 				} else {
   1058 					/* Ask the callout to reschedule.  */
   1059 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1060 					dw->dw_resched = MIN(INT_MAX, ticks);
   1061 					SDT_PROBE2(sdt, linux, work, cancel,
   1062 					    &dw->work, wq);
   1063 					SDT_PROBE3(sdt, linux, work, schedule,
   1064 					    dw, wq, ticks);
   1065 				}
   1066 			} else {
   1067 				/* We stopped the callout before it began.  */
   1068 				if (ticks == 0) {
   1069 					/*
   1070 					 * Run immediately: destroy the
   1071 					 * callout, put it on the
   1072 					 * queue, and signal the worker
   1073 					 * thread.
   1074 					 */
   1075 					dw_callout_destroy(wq, dw);
   1076 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
   1077 					    &dw->work, work_entry);
   1078 					cv_broadcast(&wq->wq_cv);
   1079 					SDT_PROBE2(sdt, linux, work, cancel,
   1080 					    &dw->work, wq);
   1081 					SDT_PROBE2(sdt, linux, work, queue,
   1082 					    &dw->work, wq);
   1083 				} else {
   1084 					/*
   1085 					 * Reschedule the callout.  No
   1086 					 * state change.
   1087 					 */
   1088 					callout_schedule(&dw->dw_callout,
   1089 					    MIN(INT_MAX, ticks));
   1090 					SDT_PROBE2(sdt, linux, work, cancel,
   1091 					    &dw->work, wq);
   1092 					SDT_PROBE3(sdt, linux, work, schedule,
   1093 					    dw, wq, ticks);
   1094 				}
   1095 			}
   1096 			timer_modified = true;
   1097 			break;
   1098 		case DELAYED_WORK_RESCHEDULED:
   1099 			/*
   1100 			 * Someone rescheduled it after the callout
   1101 			 * started but before the poor thing even had a
   1102 			 * chance to acquire the lock.
   1103 			 */
   1104 			if (ticks == 0) {
   1105 				/*
   1106 				 * We can just switch back to
   1107 				 * DELAYED_WORK_SCHEDULED so that the
   1108 				 * callout will queue the work as soon
   1109 				 * as it gets the lock.
   1110 				 */
   1111 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1112 				dw->dw_resched = -1;
   1113 				SDT_PROBE2(sdt, linux, work, cancel,
   1114 				    &dw->work, wq);
   1115 				SDT_PROBE2(sdt, linux, work, queue,
   1116 				    &dw->work, wq);
   1117 			} else {
   1118 				/* Change the rescheduled time.  */
   1119 				dw->dw_resched = ticks;
   1120 				SDT_PROBE2(sdt, linux, work, cancel,
   1121 				    &dw->work, wq);
   1122 				SDT_PROBE3(sdt, linux, work, schedule,
   1123 				    dw, wq, ticks);
   1124 			}
   1125 			timer_modified = true;
   1126 			break;
   1127 		case DELAYED_WORK_CANCELLED:
   1128 			/*
   1129 			 * Someone cancelled it after the callout
   1130 			 * started but before the poor thing even had a
   1131 			 * chance to acquire the lock.
   1132 			 */
   1133 			if (ticks == 0) {
   1134 				/*
   1135 				 * We can just switch back to
   1136 				 * DELAYED_WORK_SCHEDULED so that the
   1137 				 * callout will queue the work as soon
   1138 				 * as it gets the lock.
   1139 				 */
   1140 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1141 				SDT_PROBE2(sdt, linux, work, queue,
   1142 				    &dw->work, wq);
   1143 			} else {
   1144 				/* Ask it to reschedule.  */
   1145 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1146 				dw->dw_resched = MIN(INT_MAX, ticks);
   1147 				SDT_PROBE3(sdt, linux, work, schedule,
   1148 				    dw, wq, ticks);
   1149 			}
   1150 			timer_modified = false;
   1151 			break;
   1152 		default:
   1153 			panic("invalid delayed work state: %d", dw->dw_state);
   1154 		}
   1155 	}
   1156 	mutex_exit(&wq->wq_lock);
   1157 
   1158 	return timer_modified;
   1159 }
   1160 
   1161 /*
   1162  * cancel_delayed_work(dw)
   1163  *
   1164  *	If work was scheduled or queued, remove it from the schedule or
   1165  *	queue and return true.  If work was not scheduled or queued,
   1166  *	return false.  Note that work may already be running; if it
   1167  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1168  *	will return false, and either way, cancel_delayed_work will NOT
   1169  *	wait for the work to complete.
   1170  */
   1171 bool
   1172 cancel_delayed_work(struct delayed_work *dw)
   1173 {
   1174 	struct workqueue_struct *wq;
   1175 	bool cancelled_p;
   1176 
   1177 	/* If there's no workqueue, nothing to cancel.   */
   1178 	if ((wq = work_queue(&dw->work)) == NULL)
   1179 		return false;
   1180 
   1181 	mutex_enter(&wq->wq_lock);
   1182 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1183 		cancelled_p = false;
   1184 	} else {
   1185 		switch (dw->dw_state) {
   1186 		case DELAYED_WORK_IDLE:
   1187 			/*
   1188 			 * It is either on the queue or already running
   1189 			 * or both.
   1190 			 */
   1191 			if (work_claimed(&dw->work, wq)) {
   1192 				/* On the queue.  Remove and release.  */
   1193 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1194 				    work_entry);
   1195 				SDT_PROBE2(sdt, linux, work, cancel,
   1196 				    &dw->work, wq);
   1197 				release_work(&dw->work, wq);
   1198 				/* Can't dereference dw after this point.  */
   1199 				cancelled_p = true;
   1200 			} else {
   1201 				/* Not on the queue, so didn't cancel.  */
   1202 				cancelled_p = false;
   1203 			}
   1204 			break;
   1205 		case DELAYED_WORK_SCHEDULED:
   1206 			/*
   1207 			 * If it is scheduled, mark it cancelled and
   1208 			 * try to stop the callout before it starts.
   1209 			 *
   1210 			 * If it's too late and the callout has already
   1211 			 * begun to execute, tough.
   1212 			 *
   1213 			 * If we stopped the callout before it started,
   1214 			 * however, then destroy the callout and
   1215 			 * dissociate it from the workqueue ourselves.
   1216 			 */
   1217 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1218 			cancelled_p = true;
   1219 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1220 			if (!callout_stop(&dw->dw_callout))
   1221 				cancel_delayed_work_done(wq, dw);
   1222 			break;
   1223 		case DELAYED_WORK_RESCHEDULED:
   1224 			/*
   1225 			 * If it is being rescheduled, the callout has
   1226 			 * already fired.  We must ask it to cancel.
   1227 			 */
   1228 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1229 			dw->dw_resched = -1;
   1230 			cancelled_p = true;
   1231 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1232 			break;
   1233 		case DELAYED_WORK_CANCELLED:
   1234 			/*
   1235 			 * If it is being cancelled, the callout has
   1236 			 * already fired.  There is nothing more for us
   1237 			 * to do.  Someone else claims credit for
   1238 			 * cancelling it.
   1239 			 */
   1240 			cancelled_p = false;
   1241 			break;
   1242 		default:
   1243 			panic("invalid delayed work state: %d",
   1244 			    dw->dw_state);
   1245 		}
   1246 	}
   1247 	mutex_exit(&wq->wq_lock);
   1248 
   1249 	return cancelled_p;
   1250 }
   1251 
   1252 /*
   1253  * cancel_delayed_work_sync(dw)
   1254  *
   1255  *	If work was scheduled or queued, remove it from the schedule or
   1256  *	queue and return true.  If work was not scheduled or queued,
   1257  *	return false.  Note that work may already be running; if it
   1258  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1259  *	will return false; either way, wait for it to complete.
   1260  */
   1261 bool
   1262 cancel_delayed_work_sync(struct delayed_work *dw)
   1263 {
   1264 	struct workqueue_struct *wq;
   1265 	bool cancelled_p;
   1266 
   1267 	/* If there's no workqueue, nothing to cancel.  */
   1268 	if ((wq = work_queue(&dw->work)) == NULL)
   1269 		return false;
   1270 
   1271 	mutex_enter(&wq->wq_lock);
   1272 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1273 		cancelled_p = false;
   1274 	} else {
   1275 		switch (dw->dw_state) {
   1276 		case DELAYED_WORK_IDLE:
   1277 			/*
   1278 			 * It is either on the queue or already running
   1279 			 * or both.
   1280 			 */
   1281 			if (work_claimed(&dw->work, wq)) {
   1282 				/* On the queue.  Remove and release.  */
   1283 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1284 				    work_entry);
   1285 				SDT_PROBE2(sdt, linux, work, cancel,
   1286 				    &dw->work, wq);
   1287 				release_work(&dw->work, wq);
   1288 				/* Can't dereference dw after this point.  */
   1289 				cancelled_p = true;
   1290 			} else {
   1291 				/* Not on the queue, so didn't cancel. */
   1292 				cancelled_p = false;
   1293 			}
   1294 			/* If it's still running, wait for it to complete.  */
   1295 			if (wq->wq_current_work == &dw->work)
   1296 				wait_for_current_work(&dw->work, wq);
   1297 			break;
   1298 		case DELAYED_WORK_SCHEDULED:
   1299 			/*
   1300 			 * If it is scheduled, mark it cancelled and
   1301 			 * try to stop the callout before it starts.
   1302 			 *
   1303 			 * If it's too late and the callout has already
   1304 			 * begun to execute, we must wait for it to
   1305 			 * complete.  But we got in soon enough to ask
   1306 			 * the callout not to run, so we successfully
   1307 			 * cancelled it in that case.
   1308 			 *
   1309 			 * If we stopped the callout before it started,
   1310 			 * then we must destroy the callout and
   1311 			 * dissociate it from the workqueue ourselves.
   1312 			 */
   1313 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1314 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1315 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1316 				cancel_delayed_work_done(wq, dw);
   1317 			cancelled_p = true;
   1318 			break;
   1319 		case DELAYED_WORK_RESCHEDULED:
   1320 			/*
   1321 			 * If it is being rescheduled, the callout has
   1322 			 * already fired.  We must ask it to cancel and
   1323 			 * wait for it to complete.
   1324 			 */
   1325 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1326 			dw->dw_resched = -1;
   1327 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1328 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1329 			cancelled_p = true;
   1330 			break;
   1331 		case DELAYED_WORK_CANCELLED:
   1332 			/*
   1333 			 * If it is being cancelled, the callout has
   1334 			 * already fired.  We need only wait for it to
   1335 			 * complete.  Someone else, however, claims
   1336 			 * credit for cancelling it.
   1337 			 */
   1338 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1339 			cancelled_p = false;
   1340 			break;
   1341 		default:
   1342 			panic("invalid delayed work state: %d",
   1343 			    dw->dw_state);
   1344 		}
   1345 	}
   1346 	mutex_exit(&wq->wq_lock);
   1347 
   1348 	return cancelled_p;
   1349 }
   1350 
   1351 /*
   1353  * Flush
   1354  */
   1355 
   1356 /*
   1357  * flush_scheduled_work()
   1358  *
   1359  *	Wait for all work queued on system_wq to complete.  This does
   1360  *	not include delayed work.
   1361  */
   1362 void
   1363 flush_scheduled_work(void)
   1364 {
   1365 
   1366 	flush_workqueue(system_wq);
   1367 }
   1368 
   1369 /*
   1370  * flush_workqueue_locked(wq)
   1371  *
   1372  *	Wait for all work queued on wq to complete.  This does not
   1373  *	include delayed work.
   1374  *
   1375  *	Caller must hold wq's lock.
   1376  */
   1377 static void
   1378 flush_workqueue_locked(struct workqueue_struct *wq)
   1379 {
   1380 	uint64_t gen;
   1381 
   1382 	KASSERT(mutex_owned(&wq->wq_lock));
   1383 
   1384 	/* Get the current generation number.  */
   1385 	gen = wq->wq_gen;
   1386 
   1387 	/*
   1388 	 * If there's a batch of work in progress, we must wait for the
   1389 	 * worker thread to finish that batch.
   1390 	 */
   1391 	if (wq->wq_current_work != NULL)
   1392 		gen++;
   1393 
   1394 	/*
   1395 	 * If there's any work yet to be claimed from the queue by the
   1396 	 * worker thread, we must wait for it to finish one more batch
   1397 	 * too.
   1398 	 */
   1399 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue))
   1400 		gen++;
   1401 
   1402 	/* Wait until the generation number has caught up.  */
   1403 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
   1404 	while (wq->wq_gen < gen)
   1405 		cv_wait(&wq->wq_cv, &wq->wq_lock);
   1406 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
   1407 }
   1408 
   1409 /*
   1410  * flush_workqueue(wq)
   1411  *
   1412  *	Wait for all work queued on wq to complete.  This does not
   1413  *	include delayed work.
   1414  */
   1415 void
   1416 flush_workqueue(struct workqueue_struct *wq)
   1417 {
   1418 
   1419 	mutex_enter(&wq->wq_lock);
   1420 	flush_workqueue_locked(wq);
   1421 	mutex_exit(&wq->wq_lock);
   1422 }
   1423 
   1424 /*
   1425  * flush_work(work)
   1426  *
   1427  *	If work is queued or currently executing, wait for it to
   1428  *	complete.
   1429  */
   1430 void
   1431 flush_work(struct work_struct *work)
   1432 {
   1433 	struct workqueue_struct *wq;
   1434 
   1435 	/* If there's no workqueue, nothing to flush.  */
   1436 	if ((wq = work_queue(work)) == NULL)
   1437 		return;
   1438 
   1439 	flush_workqueue(wq);
   1440 }
   1441 
   1442 /*
   1443  * flush_delayed_work(dw)
   1444  *
   1445  *	If dw is scheduled to run after a delay, queue it immediately
   1446  *	instead.  Then, if dw is queued or currently executing, wait
   1447  *	for it to complete.
   1448  */
   1449 void
   1450 flush_delayed_work(struct delayed_work *dw)
   1451 {
   1452 	struct workqueue_struct *wq;
   1453 
   1454 	/* If there's no workqueue, nothing to flush.  */
   1455 	if ((wq = work_queue(&dw->work)) == NULL)
   1456 		return;
   1457 
   1458 	mutex_enter(&wq->wq_lock);
   1459 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1460 		/*
   1461 		 * Moved off the queue already (and possibly to another
   1462 		 * queue, though that would be ill-advised), so it must
   1463 		 * have completed, and we have nothing more to do.
   1464 		 */
   1465 	} else {
   1466 		switch (dw->dw_state) {
   1467 		case DELAYED_WORK_IDLE:
   1468 			/*
   1469 			 * It has a workqueue assigned and the callout
   1470 			 * is idle, so it must be in progress or on the
   1471 			 * queue.  In that case, we'll wait for it to
   1472 			 * complete.
   1473 			 */
   1474 			break;
   1475 		case DELAYED_WORK_SCHEDULED:
   1476 		case DELAYED_WORK_RESCHEDULED:
   1477 		case DELAYED_WORK_CANCELLED:
   1478 			/*
   1479 			 * The callout is scheduled, and may have even
   1480 			 * started.  Mark it as scheduled so that if
   1481 			 * the callout has fired it will queue the work
   1482 			 * itself.  Try to stop the callout -- if we
   1483 			 * can, queue the work now; if we can't, wait
   1484 			 * for the callout to complete, which entails
   1485 			 * queueing it.
   1486 			 */
   1487 			dw->dw_state = DELAYED_WORK_SCHEDULED;
   1488 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
   1489 				/*
   1490 				 * We stopped it before it ran.  No
   1491 				 * state change in the interim is
   1492 				 * possible.  Destroy the callout and
   1493 				 * queue it ourselves.
   1494 				 */
   1495 				KASSERT(dw->dw_state ==
   1496 				    DELAYED_WORK_SCHEDULED);
   1497 				dw_callout_destroy(wq, dw);
   1498 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
   1499 				    work_entry);
   1500 				cv_broadcast(&wq->wq_cv);
   1501 				SDT_PROBE2(sdt, linux, work, queue,
   1502 				    &dw->work, wq);
   1503 			}
   1504 			break;
   1505 		default:
   1506 			panic("invalid delayed work state: %d", dw->dw_state);
   1507 		}
   1508 		/*
   1509 		 * Waiting for the whole queue to flush is overkill,
   1510 		 * but doesn't hurt.
   1511 		 */
   1512 		flush_workqueue_locked(wq);
   1513 	}
   1514 	mutex_exit(&wq->wq_lock);
   1515 }
   1516