Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.44.4.1
      1 /*	$NetBSD: linux_work.c,v 1.44.4.1 2020/02/12 20:02:19 martin Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.44.4.1 2020/02/12 20:02:19 martin Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/atomic.h>
     37 #include <sys/callout.h>
     38 #include <sys/condvar.h>
     39 #include <sys/errno.h>
     40 #include <sys/kmem.h>
     41 #include <sys/kthread.h>
     42 #include <sys/lwp.h>
     43 #include <sys/mutex.h>
     44 #ifndef _MODULE
     45 #include <sys/once.h>
     46 #endif
     47 #include <sys/queue.h>
     48 #include <sys/sdt.h>
     49 
     50 #include <linux/workqueue.h>
     51 
     52 TAILQ_HEAD(work_head, work_struct);
     53 TAILQ_HEAD(dwork_head, delayed_work);
     54 
     55 struct workqueue_struct {
     56 	kmutex_t		wq_lock;
     57 	kcondvar_t		wq_cv;
     58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
     59 	struct work_head	wq_queue;   /* work to run */
     60 	struct work_head	wq_dqueue;  /* delayed work to run now */
     61 	struct work_struct	*wq_current_work;
     62 	int			wq_flags;
     63 	bool			wq_dying;
     64 	uint64_t		wq_gen;
     65 	struct lwp		*wq_lwp;
     66 };
     67 
     68 static void __dead	linux_workqueue_thread(void *);
     69 static void		linux_workqueue_timeout(void *);
     70 static bool		work_claimed(struct work_struct *,
     71 			    struct workqueue_struct *);
     72 static struct workqueue_struct *
     73 			work_queue(struct work_struct *);
     74 static bool		acquire_work(struct work_struct *,
     75 			    struct workqueue_struct *);
     76 static void		release_work(struct work_struct *,
     77 			    struct workqueue_struct *);
     78 static void		wait_for_current_work(struct work_struct *,
     79 			    struct workqueue_struct *);
     80 static void		dw_callout_init(struct workqueue_struct *,
     81 			    struct delayed_work *);
     82 static void		dw_callout_destroy(struct workqueue_struct *,
     83 			    struct delayed_work *);
     84 static void		cancel_delayed_work_done(struct workqueue_struct *,
     85 			    struct delayed_work *);
     86 
     87 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
     88     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     89 SDT_PROBE_DEFINE2(sdt, linux, work, release,
     90     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     91 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
     92     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     93 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
     94     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     95 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
     96     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
     97     "unsigned long"/*ticks*/);
     98 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
     99     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    100 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
    101     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    102 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
    103     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    104 SDT_PROBE_DEFINE2(sdt, linux, work, run,
    105     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    106 SDT_PROBE_DEFINE2(sdt, linux, work, done,
    107     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    108 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
    109     "struct workqueue_struct *"/*wq*/);
    110 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
    111     "struct workqueue_struct *"/*wq*/);
    112 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
    113     "struct workqueue_struct *"/*wq*/);
    114 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
    115     "struct workqueue_struct *"/*wq*/);
    116 
    117 static specificdata_key_t workqueue_key __read_mostly;
    118 
    119 struct workqueue_struct	*system_wq __read_mostly;
    120 struct workqueue_struct	*system_long_wq __read_mostly;
    121 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
    122 
    123 static inline uintptr_t
    124 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
    125 {
    126 
    127 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
    128 }
    129 
    130 /*
    131  * linux_workqueue_init()
    132  *
    133  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
    134  *	NetBSD error on failure.
    135  */
    136 static int
    137 linux_workqueue_init0(void)
    138 {
    139 	int error;
    140 
    141 	error = lwp_specific_key_create(&workqueue_key, NULL);
    142 	if (error)
    143 		goto fail0;
    144 
    145 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
    146 	if (system_wq == NULL) {
    147 		error = ENOMEM;
    148 		goto fail1;
    149 	}
    150 
    151 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
    152 	if (system_long_wq == NULL) {
    153 		error = ENOMEM;
    154 		goto fail2;
    155 	}
    156 
    157 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    158 	if (system_long_wq == NULL) {
    159 		error = ENOMEM;
    160 		goto fail3;
    161 	}
    162 
    163 	return 0;
    164 
    165 fail4: __unused
    166 	destroy_workqueue(system_power_efficient_wq);
    167 fail3:	destroy_workqueue(system_long_wq);
    168 fail2:	destroy_workqueue(system_wq);
    169 fail1:	lwp_specific_key_delete(workqueue_key);
    170 fail0:	KASSERT(error);
    171 	return error;
    172 }
    173 
    174 /*
    175  * linux_workqueue_fini()
    176  *
    177  *	Destroy the Linux workqueue subsystem.  Never fails.
    178  */
    179 static void
    180 linux_workqueue_fini0(void)
    181 {
    182 
    183 	destroy_workqueue(system_power_efficient_wq);
    184 	destroy_workqueue(system_long_wq);
    185 	destroy_workqueue(system_wq);
    186 	lwp_specific_key_delete(workqueue_key);
    187 }
    188 
    189 #ifndef _MODULE
    190 static ONCE_DECL(linux_workqueue_init_once);
    191 #endif
    192 
    193 int
    194 linux_workqueue_init(void)
    195 {
    196 #ifdef _MODULE
    197 	return linux_workqueue_init0();
    198 #else
    199 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
    200 #endif
    201 }
    202 
    203 void
    204 linux_workqueue_fini(void)
    205 {
    206 #ifdef _MODULE
    207 	return linux_workqueue_fini0();
    208 #else
    209 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
    210 #endif
    211 }
    212 
    213 /*
    215  * Workqueues
    216  */
    217 
    218 /*
    219  * alloc_ordered_workqueue(name, flags)
    220  *
    221  *	Create a workqueue of the given name.  No flags are currently
    222  *	defined.  Return NULL on failure, pointer to struct
    223  *	workqueue_struct object on success.
    224  */
    225 struct workqueue_struct *
    226 alloc_ordered_workqueue(const char *name, int flags)
    227 {
    228 	struct workqueue_struct *wq;
    229 	int error;
    230 
    231 	KASSERT(flags == 0);
    232 
    233 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
    234 
    235 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
    236 	cv_init(&wq->wq_cv, name);
    237 	TAILQ_INIT(&wq->wq_delayed);
    238 	TAILQ_INIT(&wq->wq_queue);
    239 	TAILQ_INIT(&wq->wq_dqueue);
    240 	wq->wq_current_work = NULL;
    241 	wq->wq_flags = 0;
    242 	wq->wq_dying = false;
    243 	wq->wq_gen = 0;
    244 	wq->wq_lwp = NULL;
    245 
    246 	error = kthread_create(PRI_NONE,
    247 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
    248 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
    249 	if (error)
    250 		goto fail0;
    251 
    252 	return wq;
    253 
    254 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    255 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    256 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    257 	cv_destroy(&wq->wq_cv);
    258 	mutex_destroy(&wq->wq_lock);
    259 	kmem_free(wq, sizeof(*wq));
    260 	return NULL;
    261 }
    262 
    263 /*
    264  * destroy_workqueue(wq)
    265  *
    266  *	Destroy a workqueue created with wq.  Cancel any pending
    267  *	delayed work.  Wait for all queued work to complete.
    268  *
    269  *	May sleep.
    270  */
    271 void
    272 destroy_workqueue(struct workqueue_struct *wq)
    273 {
    274 
    275 	/*
    276 	 * Cancel all delayed work.  We do this first because any
    277 	 * delayed work that that has already timed out, which we can't
    278 	 * cancel, may have queued new work.
    279 	 */
    280 	mutex_enter(&wq->wq_lock);
    281 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
    282 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
    283 
    284 		KASSERT(work_queue(&dw->work) == wq);
    285 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
    286 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    287 			dw->dw_state == DELAYED_WORK_CANCELLED),
    288 		    "delayed work %p in bad state: %d",
    289 		    dw, dw->dw_state);
    290 
    291 		/*
    292 		 * Mark it cancelled and try to stop the callout before
    293 		 * it starts.
    294 		 *
    295 		 * If it's too late and the callout has already begun
    296 		 * to execute, then it will notice that we asked to
    297 		 * cancel it and remove itself from the queue before
    298 		 * returning.
    299 		 *
    300 		 * If we stopped the callout before it started,
    301 		 * however, then we can safely destroy the callout and
    302 		 * dissociate it from the workqueue ourselves.
    303 		 */
    304 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
    305 		dw->dw_state = DELAYED_WORK_CANCELLED;
    306 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
    307 			cancel_delayed_work_done(wq, dw);
    308 	}
    309 	mutex_exit(&wq->wq_lock);
    310 
    311 	/*
    312 	 * At this point, no new work can be put on the queue.
    313 	 */
    314 
    315 	/* Tell the thread to exit.  */
    316 	mutex_enter(&wq->wq_lock);
    317 	wq->wq_dying = true;
    318 	cv_broadcast(&wq->wq_cv);
    319 	mutex_exit(&wq->wq_lock);
    320 
    321 	/* Wait for it to exit.  */
    322 	(void)kthread_join(wq->wq_lwp);
    323 
    324 	KASSERT(wq->wq_dying);
    325 	KASSERT(wq->wq_flags == 0);
    326 	KASSERT(wq->wq_current_work == NULL);
    327 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    328 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    329 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    330 	cv_destroy(&wq->wq_cv);
    331 	mutex_destroy(&wq->wq_lock);
    332 
    333 	kmem_free(wq, sizeof(*wq));
    334 }
    335 
    336 /*
    338  * Work thread and callout
    339  */
    340 
    341 /*
    342  * linux_workqueue_thread(cookie)
    343  *
    344  *	Main function for a workqueue's worker thread.  Waits until
    345  *	there is work queued, grabs a batch of work off the queue,
    346  *	executes it all, bumps the generation number, and repeats,
    347  *	until dying.
    348  */
    349 static void __dead
    350 linux_workqueue_thread(void *cookie)
    351 {
    352 	struct workqueue_struct *const wq = cookie;
    353 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
    354 	struct work_struct marker, *work;
    355 	unsigned i;
    356 
    357 	lwp_setspecific(workqueue_key, wq);
    358 
    359 	mutex_enter(&wq->wq_lock);
    360 	for (;;) {
    361 		/*
    362 		 * Wait until there's activity.  If there's no work and
    363 		 * we're dying, stop here.
    364 		 */
    365 		if (TAILQ_EMPTY(&wq->wq_queue) &&
    366 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
    367 			if (wq->wq_dying)
    368 				break;
    369 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    370 			continue;
    371 		}
    372 
    373 		/*
    374 		 * Start a batch of work.  Use a marker to delimit when
    375 		 * the batch ends so we can advance the generation
    376 		 * after the batch.
    377 		 */
    378 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
    379 		for (i = 0; i < 2; i++) {
    380 			if (TAILQ_EMPTY(q[i]))
    381 				continue;
    382 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
    383 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
    384 				void (*func)(struct work_struct *);
    385 
    386 				KASSERT(work_queue(work) == wq);
    387 				KASSERT(work_claimed(work, wq));
    388 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
    389 					container_of(work, struct delayed_work,
    390 					    work)->dw_state ==
    391 					DELAYED_WORK_IDLE),
    392 				    "delayed work %p queued and scheduled",
    393 				    work);
    394 
    395 				TAILQ_REMOVE(q[i], work, work_entry);
    396 				KASSERT(wq->wq_current_work == NULL);
    397 				wq->wq_current_work = work;
    398 				func = work->func;
    399 				release_work(work, wq);
    400 				/* Can't dereference work after this point.  */
    401 
    402 				mutex_exit(&wq->wq_lock);
    403 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
    404 				(*func)(work);
    405 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
    406 				mutex_enter(&wq->wq_lock);
    407 
    408 				KASSERT(wq->wq_current_work == work);
    409 				wq->wq_current_work = NULL;
    410 				cv_broadcast(&wq->wq_cv);
    411 			}
    412 			TAILQ_REMOVE(q[i], &marker, work_entry);
    413 		}
    414 
    415 		/* Notify flush that we've completed a batch of work.  */
    416 		wq->wq_gen++;
    417 		cv_broadcast(&wq->wq_cv);
    418 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
    419 	}
    420 	mutex_exit(&wq->wq_lock);
    421 
    422 	kthread_exit(0);
    423 }
    424 
    425 /*
    426  * linux_workqueue_timeout(cookie)
    427  *
    428  *	Delayed work timeout callback.
    429  *
    430  *	- If scheduled, queue it.
    431  *	- If rescheduled, callout_schedule ourselves again.
    432  *	- If cancelled, destroy the callout and release the work from
    433  *        the workqueue.
    434  */
    435 static void
    436 linux_workqueue_timeout(void *cookie)
    437 {
    438 	struct delayed_work *const dw = cookie;
    439 	struct workqueue_struct *const wq = work_queue(&dw->work);
    440 
    441 	KASSERTMSG(wq != NULL,
    442 	    "delayed work %p state %d resched %d",
    443 	    dw, dw->dw_state, dw->dw_resched);
    444 
    445 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
    446 
    447 	mutex_enter(&wq->wq_lock);
    448 	KASSERT(work_queue(&dw->work) == wq);
    449 	switch (dw->dw_state) {
    450 	case DELAYED_WORK_IDLE:
    451 		panic("delayed work callout uninitialized: %p", dw);
    452 	case DELAYED_WORK_SCHEDULED:
    453 		dw_callout_destroy(wq, dw);
    454 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
    455 		cv_broadcast(&wq->wq_cv);
    456 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    457 		break;
    458 	case DELAYED_WORK_RESCHEDULED:
    459 		KASSERT(dw->dw_resched >= 0);
    460 		callout_schedule(&dw->dw_callout, dw->dw_resched);
    461 		dw->dw_state = DELAYED_WORK_SCHEDULED;
    462 		dw->dw_resched = -1;
    463 		break;
    464 	case DELAYED_WORK_CANCELLED:
    465 		cancel_delayed_work_done(wq, dw);
    466 		/* Can't dereference dw after this point.  */
    467 		goto out;
    468 	default:
    469 		panic("delayed work callout in bad state: %p", dw);
    470 	}
    471 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
    472 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
    473 out:	mutex_exit(&wq->wq_lock);
    474 }
    475 
    476 /*
    477  * current_work()
    478  *
    479  *	If in a workqueue worker thread, return the work it is
    480  *	currently executing.  Otherwise return NULL.
    481  */
    482 struct work_struct *
    483 current_work(void)
    484 {
    485 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
    486 
    487 	/* If we're not a workqueue thread, then there's no work.  */
    488 	if (wq == NULL)
    489 		return NULL;
    490 
    491 	/*
    492 	 * Otherwise, this should be possible only while work is in
    493 	 * progress.  Return the current work item.
    494 	 */
    495 	KASSERT(wq->wq_current_work != NULL);
    496 	return wq->wq_current_work;
    497 }
    498 
    499 /*
    501  * Work
    502  */
    503 
    504 /*
    505  * INIT_WORK(work, fn)
    506  *
    507  *	Initialize work for use with a workqueue to call fn in a worker
    508  *	thread.  There is no corresponding destruction operation.
    509  */
    510 void
    511 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    512 {
    513 
    514 	work->work_owner = 0;
    515 	work->func = fn;
    516 }
    517 
    518 /*
    519  * work_claimed(work, wq)
    520  *
    521  *	True if work is currently claimed by a workqueue, meaning it is
    522  *	either on the queue or scheduled in a callout.  The workqueue
    523  *	must be wq, and caller must hold wq's lock.
    524  */
    525 static bool
    526 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
    527 {
    528 
    529 	KASSERT(work_queue(work) == wq);
    530 	KASSERT(mutex_owned(&wq->wq_lock));
    531 
    532 	return work->work_owner & 1;
    533 }
    534 
    535 /*
    536  * work_queue(work)
    537  *
    538  *	Return the last queue that work was queued on, or NULL if it
    539  *	was never queued.
    540  */
    541 static struct workqueue_struct *
    542 work_queue(struct work_struct *work)
    543 {
    544 
    545 	return (struct workqueue_struct *)(work->work_owner & ~(uintptr_t)1);
    546 }
    547 
    548 /*
    549  * acquire_work(work, wq)
    550  *
    551  *	Try to claim work for wq.  If work is already claimed, it must
    552  *	be claimed by wq; return false.  If work is not already
    553  *	claimed, claim it, issue a memory barrier to match any prior
    554  *	release_work, and return true.
    555  *
    556  *	Caller must hold wq's lock.
    557  */
    558 static bool
    559 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
    560 {
    561 	uintptr_t owner0, owner;
    562 
    563 	KASSERT(mutex_owned(&wq->wq_lock));
    564 	KASSERT(((uintptr_t)wq & 1) == 0);
    565 
    566 	owner = (uintptr_t)wq | 1;
    567 	do {
    568 		owner0 = work->work_owner;
    569 		if (owner0 & 1) {
    570 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
    571 			return false;
    572 		}
    573 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
    574 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
    575 	    owner0);
    576 
    577 	KASSERT(work_queue(work) == wq);
    578 	membar_enter();
    579 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
    580 	return true;
    581 }
    582 
    583 /*
    584  * release_work(work, wq)
    585  *
    586  *	Issue a memory barrier to match any subsequent acquire_work and
    587  *	dissociate work from wq.
    588  *
    589  *	Caller must hold wq's lock and work must be associated with wq.
    590  */
    591 static void
    592 release_work(struct work_struct *work, struct workqueue_struct *wq)
    593 {
    594 
    595 	KASSERT(work_queue(work) == wq);
    596 	KASSERT(mutex_owned(&wq->wq_lock));
    597 
    598 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
    599 	membar_exit();
    600 
    601 	/*
    602 	 * Non-interlocked r/m/w is safe here because nobody else can
    603 	 * write to this while the claimed bit is setand the workqueue
    604 	 * lock is held.
    605 	 */
    606 	work->work_owner &= ~(uintptr_t)1;
    607 }
    608 
    609 /*
    610  * schedule_work(work)
    611  *
    612  *	If work is not already queued on system_wq, queue it to be run
    613  *	by system_wq's worker thread when it next can.  True if it was
    614  *	newly queued, false if it was already queued.  If the work was
    615  *	already running, queue it to run again.
    616  *
    617  *	Caller must ensure work is not queued to run on a different
    618  *	workqueue.
    619  */
    620 bool
    621 schedule_work(struct work_struct *work)
    622 {
    623 
    624 	return queue_work(system_wq, work);
    625 }
    626 
    627 /*
    628  * queue_work(wq, work)
    629  *
    630  *	If work is not already queued on wq, queue it to be run by wq's
    631  *	worker thread when it next can.  True if it was newly queued,
    632  *	false if it was already queued.  If the work was already
    633  *	running, queue it to run again.
    634  *
    635  *	Caller must ensure work is not queued to run on a different
    636  *	workqueue.
    637  */
    638 bool
    639 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    640 {
    641 	bool newly_queued;
    642 
    643 	KASSERT(wq != NULL);
    644 
    645 	mutex_enter(&wq->wq_lock);
    646 	if (__predict_true(acquire_work(work, wq))) {
    647 		/*
    648 		 * It wasn't on any workqueue at all.  Put it on this
    649 		 * one, and signal the worker thread that there is work
    650 		 * to do.
    651 		 */
    652 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    653 		cv_broadcast(&wq->wq_cv);
    654 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
    655 		newly_queued = true;
    656 	} else {
    657 		/*
    658 		 * It was already on this workqueue.  Nothing to do
    659 		 * since it is already queued.
    660 		 */
    661 		newly_queued = false;
    662 	}
    663 	mutex_exit(&wq->wq_lock);
    664 
    665 	return newly_queued;
    666 }
    667 
    668 /*
    669  * cancel_work(work)
    670  *
    671  *	If work was queued, remove it from the queue and return true.
    672  *	If work was not queued, return false.  Work may still be
    673  *	running when this returns.
    674  */
    675 bool
    676 cancel_work(struct work_struct *work)
    677 {
    678 	struct workqueue_struct *wq;
    679 	bool cancelled_p = false;
    680 
    681 	/* If there's no workqueue, nothing to cancel.   */
    682 	if ((wq = work_queue(work)) == NULL)
    683 		goto out;
    684 
    685 	mutex_enter(&wq->wq_lock);
    686 	if (__predict_false(work_queue(work) != wq)) {
    687 		/*
    688 		 * It has finished execution or been cancelled by
    689 		 * another thread, and has been moved off the
    690 		 * workqueue, so it's too to cancel.
    691 		 */
    692 		cancelled_p = false;
    693 	} else {
    694 		/* Check whether it's on the queue.  */
    695 		if (work_claimed(work, wq)) {
    696 			/*
    697 			 * It is still on the queue.  Take it off the
    698 			 * queue and report successful cancellation.
    699 			 */
    700 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    701 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    702 			release_work(work, wq);
    703 			/* Can't dereference work after this point.  */
    704 			cancelled_p = true;
    705 		} else {
    706 			/* Not on the queue.  Couldn't cancel it.  */
    707 			cancelled_p = false;
    708 		}
    709 	}
    710 	mutex_exit(&wq->wq_lock);
    711 
    712 out:	return cancelled_p;
    713 }
    714 
    715 /*
    716  * cancel_work_sync(work)
    717  *
    718  *	If work was queued, remove it from the queue and return true.
    719  *	If work was not queued, return false.  Either way, if work is
    720  *	currently running, wait for it to complete.
    721  *
    722  *	May sleep.
    723  */
    724 bool
    725 cancel_work_sync(struct work_struct *work)
    726 {
    727 	struct workqueue_struct *wq;
    728 	bool cancelled_p = false;
    729 
    730 	/* If there's no workqueue, nothing to cancel.   */
    731 	if ((wq = work_queue(work)) == NULL)
    732 		goto out;
    733 
    734 	mutex_enter(&wq->wq_lock);
    735 	if (__predict_false(work_queue(work) != wq)) {
    736 		/*
    737 		 * It has finished execution or been cancelled by
    738 		 * another thread, and has been moved off the
    739 		 * workqueue, so it's too late to cancel.
    740 		 */
    741 		cancelled_p = false;
    742 	} else {
    743 		/* Check whether it's on the queue.  */
    744 		if (work_claimed(work, wq)) {
    745 			/*
    746 			 * It is still on the queue.  Take it off the
    747 			 * queue and report successful cancellation.
    748 			 */
    749 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    750 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    751 			release_work(work, wq);
    752 			/* Can't dereference work after this point.  */
    753 			cancelled_p = true;
    754 		} else {
    755 			/* Not on the queue.  Couldn't cancel it.  */
    756 			cancelled_p = false;
    757 		}
    758 		/* If it's still running, wait for it to complete.  */
    759 		if (wq->wq_current_work == work)
    760 			wait_for_current_work(work, wq);
    761 	}
    762 	mutex_exit(&wq->wq_lock);
    763 
    764 out:	return cancelled_p;
    765 }
    766 
    767 /*
    768  * wait_for_current_work(work, wq)
    769  *
    770  *	wq must be currently executing work.  Wait for it to finish.
    771  *
    772  *	Does not dereference work.
    773  */
    774 static void
    775 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
    776 {
    777 	uint64_t gen;
    778 
    779 	KASSERT(mutex_owned(&wq->wq_lock));
    780 	KASSERT(wq->wq_current_work == work);
    781 
    782 	/* Wait only one generation in case it gets requeued quickly.  */
    783 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
    784 	gen = wq->wq_gen;
    785 	do {
    786 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    787 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
    788 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
    789 }
    790 
    791 /*
    793  * Delayed work
    794  */
    795 
    796 /*
    797  * INIT_DELAYED_WORK(dw, fn)
    798  *
    799  *	Initialize dw for use with a workqueue to call fn in a worker
    800  *	thread after a delay.  There is no corresponding destruction
    801  *	operation.
    802  */
    803 void
    804 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    805 {
    806 
    807 	INIT_WORK(&dw->work, fn);
    808 	dw->dw_state = DELAYED_WORK_IDLE;
    809 	dw->dw_resched = -1;
    810 
    811 	/*
    812 	 * Defer callout_init until we are going to schedule the
    813 	 * callout, which can then callout_destroy it, because
    814 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
    815 	 * we have no opportunity to call callout_destroy.
    816 	 */
    817 }
    818 
    819 /*
    820  * schedule_delayed_work(dw, ticks)
    821  *
    822  *	If it is not currently scheduled, schedule dw to run after
    823  *	ticks on system_wq.  If currently executing and not already
    824  *	rescheduled, reschedule it.  True if it was newly scheduled,
    825  *	false if it was already scheduled.
    826  *
    827  *	If ticks == 0, queue it to run as soon as the worker can,
    828  *	without waiting for the next callout tick to run.
    829  */
    830 bool
    831 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    832 {
    833 
    834 	return queue_delayed_work(system_wq, dw, ticks);
    835 }
    836 
    837 /*
    838  * dw_callout_init(wq, dw)
    839  *
    840  *	Initialize the callout of dw and transition to
    841  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
    842  */
    843 static void
    844 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
    845 {
    846 
    847 	KASSERT(mutex_owned(&wq->wq_lock));
    848 	KASSERT(work_queue(&dw->work) == wq);
    849 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    850 
    851 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    852 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
    853 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    854 	dw->dw_state = DELAYED_WORK_SCHEDULED;
    855 }
    856 
    857 /*
    858  * dw_callout_destroy(wq, dw)
    859  *
    860  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
    861  */
    862 static void
    863 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
    864 {
    865 
    866 	KASSERT(mutex_owned(&wq->wq_lock));
    867 	KASSERT(work_queue(&dw->work) == wq);
    868 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
    869 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    870 	    dw->dw_state == DELAYED_WORK_CANCELLED);
    871 
    872 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    873 	callout_destroy(&dw->dw_callout);
    874 	dw->dw_resched = -1;
    875 	dw->dw_state = DELAYED_WORK_IDLE;
    876 }
    877 
    878 /*
    879  * cancel_delayed_work_done(wq, dw)
    880  *
    881  *	Complete cancellation of a delayed work: transition from
    882  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
    883  *	workqueue.  Caller must not dereference dw after this returns.
    884  */
    885 static void
    886 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
    887 {
    888 
    889 	KASSERT(mutex_owned(&wq->wq_lock));
    890 	KASSERT(work_queue(&dw->work) == wq);
    891 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
    892 
    893 	dw_callout_destroy(wq, dw);
    894 	release_work(&dw->work, wq);
    895 	/* Can't dereference dw after this point.  */
    896 }
    897 
    898 /*
    899  * queue_delayed_work(wq, dw, ticks)
    900  *
    901  *	If it is not currently scheduled, schedule dw to run after
    902  *	ticks on wq.  If currently queued, remove it from the queue
    903  *	first.
    904  *
    905  *	If ticks == 0, queue it to run as soon as the worker can,
    906  *	without waiting for the next callout tick to run.
    907  */
    908 bool
    909 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    910     unsigned long ticks)
    911 {
    912 	bool newly_queued;
    913 
    914 	mutex_enter(&wq->wq_lock);
    915 	if (__predict_true(acquire_work(&dw->work, wq))) {
    916 		/*
    917 		 * It wasn't on any workqueue at all.  Schedule it to
    918 		 * run on this one.
    919 		 */
    920 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    921 		if (ticks == 0) {
    922 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
    923 			    work_entry);
    924 			cv_broadcast(&wq->wq_cv);
    925 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    926 		} else {
    927 			/*
    928 			 * Initialize a callout and schedule to run
    929 			 * after a delay.
    930 			 */
    931 			dw_callout_init(wq, dw);
    932 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    933 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
    934 		}
    935 		newly_queued = true;
    936 	} else {
    937 		/* It was already on this workqueue.  */
    938 		switch (dw->dw_state) {
    939 		case DELAYED_WORK_IDLE:
    940 		case DELAYED_WORK_SCHEDULED:
    941 		case DELAYED_WORK_RESCHEDULED:
    942 			/* On the queue or already scheduled.  Leave it.  */
    943 			newly_queued = false;
    944 			break;
    945 		case DELAYED_WORK_CANCELLED:
    946 			/*
    947 			 * Scheduled and the callout began, but it was
    948 			 * cancelled.  Reschedule it.
    949 			 */
    950 			if (ticks == 0) {
    951 				dw->dw_state = DELAYED_WORK_SCHEDULED;
    952 				SDT_PROBE2(sdt, linux, work, queue,
    953 				    &dw->work, wq);
    954 			} else {
    955 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    956 				dw->dw_resched = MIN(INT_MAX, ticks);
    957 				SDT_PROBE3(sdt, linux, work, schedule,
    958 				    dw, wq, ticks);
    959 			}
    960 			newly_queued = true;
    961 			break;
    962 		default:
    963 			panic("invalid delayed work state: %d",
    964 			    dw->dw_state);
    965 		}
    966 	}
    967 	mutex_exit(&wq->wq_lock);
    968 
    969 	return newly_queued;
    970 }
    971 
    972 /*
    973  * mod_delayed_work(wq, dw, ticks)
    974  *
    975  *	Schedule dw to run after ticks.  If scheduled or queued,
    976  *	reschedule.  If ticks == 0, run without delay.
    977  *
    978  *	True if it modified the timer of an already scheduled work,
    979  *	false if it newly scheduled the work.
    980  */
    981 bool
    982 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    983     unsigned long ticks)
    984 {
    985 	bool timer_modified;
    986 
    987 	mutex_enter(&wq->wq_lock);
    988 	if (acquire_work(&dw->work, wq)) {
    989 		/*
    990 		 * It wasn't on any workqueue at all.  Schedule it to
    991 		 * run on this one.
    992 		 */
    993 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    994 		if (ticks == 0) {
    995 			/*
    996 			 * Run immediately: put it on the queue and
    997 			 * signal the worker thread.
    998 			 */
    999 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
   1000 			    work_entry);
   1001 			cv_broadcast(&wq->wq_cv);
   1002 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
   1003 		} else {
   1004 			/*
   1005 			 * Initialize a callout and schedule to run
   1006 			 * after a delay.
   1007 			 */
   1008 			dw_callout_init(wq, dw);
   1009 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
   1010 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
   1011 		}
   1012 		timer_modified = false;
   1013 	} else {
   1014 		/* It was already on this workqueue.  */
   1015 		switch (dw->dw_state) {
   1016 		case DELAYED_WORK_IDLE:
   1017 			/* On the queue.  */
   1018 			if (ticks == 0) {
   1019 				/* Leave it be.  */
   1020 				SDT_PROBE2(sdt, linux, work, cancel,
   1021 				    &dw->work, wq);
   1022 				SDT_PROBE2(sdt, linux, work, queue,
   1023 				    &dw->work, wq);
   1024 			} else {
   1025 				/* Remove from the queue and schedule.  */
   1026 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1027 				    work_entry);
   1028 				dw_callout_init(wq, dw);
   1029 				callout_schedule(&dw->dw_callout,
   1030 				    MIN(INT_MAX, ticks));
   1031 				SDT_PROBE2(sdt, linux, work, cancel,
   1032 				    &dw->work, wq);
   1033 				SDT_PROBE3(sdt, linux, work, schedule,
   1034 				    dw, wq, ticks);
   1035 			}
   1036 			timer_modified = true;
   1037 			break;
   1038 		case DELAYED_WORK_SCHEDULED:
   1039 			/*
   1040 			 * It is scheduled to run after a delay.  Try
   1041 			 * to stop it and reschedule it; if we can't,
   1042 			 * either reschedule it or cancel it to put it
   1043 			 * on the queue, and inform the callout.
   1044 			 */
   1045 			if (callout_stop(&dw->dw_callout)) {
   1046 				/* Can't stop, callout has begun.  */
   1047 				if (ticks == 0) {
   1048 					/*
   1049 					 * We don't actually need to do
   1050 					 * anything.  The callout will
   1051 					 * queue it as soon as it gets
   1052 					 * the lock.
   1053 					 */
   1054 					SDT_PROBE2(sdt, linux, work, cancel,
   1055 					    &dw->work, wq);
   1056 					SDT_PROBE2(sdt, linux, work, queue,
   1057 					    &dw->work, wq);
   1058 				} else {
   1059 					/* Ask the callout to reschedule.  */
   1060 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1061 					dw->dw_resched = MIN(INT_MAX, ticks);
   1062 					SDT_PROBE2(sdt, linux, work, cancel,
   1063 					    &dw->work, wq);
   1064 					SDT_PROBE3(sdt, linux, work, schedule,
   1065 					    dw, wq, ticks);
   1066 				}
   1067 			} else {
   1068 				/* We stopped the callout before it began.  */
   1069 				if (ticks == 0) {
   1070 					/*
   1071 					 * Run immediately: destroy the
   1072 					 * callout, put it on the
   1073 					 * queue, and signal the worker
   1074 					 * thread.
   1075 					 */
   1076 					dw_callout_destroy(wq, dw);
   1077 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
   1078 					    &dw->work, work_entry);
   1079 					cv_broadcast(&wq->wq_cv);
   1080 					SDT_PROBE2(sdt, linux, work, cancel,
   1081 					    &dw->work, wq);
   1082 					SDT_PROBE2(sdt, linux, work, queue,
   1083 					    &dw->work, wq);
   1084 				} else {
   1085 					/*
   1086 					 * Reschedule the callout.  No
   1087 					 * state change.
   1088 					 */
   1089 					callout_schedule(&dw->dw_callout,
   1090 					    MIN(INT_MAX, ticks));
   1091 					SDT_PROBE2(sdt, linux, work, cancel,
   1092 					    &dw->work, wq);
   1093 					SDT_PROBE3(sdt, linux, work, schedule,
   1094 					    dw, wq, ticks);
   1095 				}
   1096 			}
   1097 			timer_modified = true;
   1098 			break;
   1099 		case DELAYED_WORK_RESCHEDULED:
   1100 			/*
   1101 			 * Someone rescheduled it after the callout
   1102 			 * started but before the poor thing even had a
   1103 			 * chance to acquire the lock.
   1104 			 */
   1105 			if (ticks == 0) {
   1106 				/*
   1107 				 * We can just switch back to
   1108 				 * DELAYED_WORK_SCHEDULED so that the
   1109 				 * callout will queue the work as soon
   1110 				 * as it gets the lock.
   1111 				 */
   1112 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1113 				dw->dw_resched = -1;
   1114 				SDT_PROBE2(sdt, linux, work, cancel,
   1115 				    &dw->work, wq);
   1116 				SDT_PROBE2(sdt, linux, work, queue,
   1117 				    &dw->work, wq);
   1118 			} else {
   1119 				/* Change the rescheduled time.  */
   1120 				dw->dw_resched = ticks;
   1121 				SDT_PROBE2(sdt, linux, work, cancel,
   1122 				    &dw->work, wq);
   1123 				SDT_PROBE3(sdt, linux, work, schedule,
   1124 				    dw, wq, ticks);
   1125 			}
   1126 			timer_modified = true;
   1127 			break;
   1128 		case DELAYED_WORK_CANCELLED:
   1129 			/*
   1130 			 * Someone cancelled it after the callout
   1131 			 * started but before the poor thing even had a
   1132 			 * chance to acquire the lock.
   1133 			 */
   1134 			if (ticks == 0) {
   1135 				/*
   1136 				 * We can just switch back to
   1137 				 * DELAYED_WORK_SCHEDULED so that the
   1138 				 * callout will queue the work as soon
   1139 				 * as it gets the lock.
   1140 				 */
   1141 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1142 				SDT_PROBE2(sdt, linux, work, queue,
   1143 				    &dw->work, wq);
   1144 			} else {
   1145 				/* Ask it to reschedule.  */
   1146 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1147 				dw->dw_resched = MIN(INT_MAX, ticks);
   1148 				SDT_PROBE3(sdt, linux, work, schedule,
   1149 				    dw, wq, ticks);
   1150 			}
   1151 			timer_modified = false;
   1152 			break;
   1153 		default:
   1154 			panic("invalid delayed work state: %d", dw->dw_state);
   1155 		}
   1156 	}
   1157 	mutex_exit(&wq->wq_lock);
   1158 
   1159 	return timer_modified;
   1160 }
   1161 
   1162 /*
   1163  * cancel_delayed_work(dw)
   1164  *
   1165  *	If work was scheduled or queued, remove it from the schedule or
   1166  *	queue and return true.  If work was not scheduled or queued,
   1167  *	return false.  Note that work may already be running; if it
   1168  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1169  *	will return false, and either way, cancel_delayed_work will NOT
   1170  *	wait for the work to complete.
   1171  */
   1172 bool
   1173 cancel_delayed_work(struct delayed_work *dw)
   1174 {
   1175 	struct workqueue_struct *wq;
   1176 	bool cancelled_p;
   1177 
   1178 	/* If there's no workqueue, nothing to cancel.   */
   1179 	if ((wq = work_queue(&dw->work)) == NULL)
   1180 		return false;
   1181 
   1182 	mutex_enter(&wq->wq_lock);
   1183 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1184 		cancelled_p = false;
   1185 	} else {
   1186 		switch (dw->dw_state) {
   1187 		case DELAYED_WORK_IDLE:
   1188 			/*
   1189 			 * It is either on the queue or already running
   1190 			 * or both.
   1191 			 */
   1192 			if (work_claimed(&dw->work, wq)) {
   1193 				/* On the queue.  Remove and release.  */
   1194 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1195 				    work_entry);
   1196 				SDT_PROBE2(sdt, linux, work, cancel,
   1197 				    &dw->work, wq);
   1198 				release_work(&dw->work, wq);
   1199 				/* Can't dereference dw after this point.  */
   1200 				cancelled_p = true;
   1201 			} else {
   1202 				/* Not on the queue, so didn't cancel.  */
   1203 				cancelled_p = false;
   1204 			}
   1205 			break;
   1206 		case DELAYED_WORK_SCHEDULED:
   1207 			/*
   1208 			 * If it is scheduled, mark it cancelled and
   1209 			 * try to stop the callout before it starts.
   1210 			 *
   1211 			 * If it's too late and the callout has already
   1212 			 * begun to execute, tough.
   1213 			 *
   1214 			 * If we stopped the callout before it started,
   1215 			 * however, then destroy the callout and
   1216 			 * dissociate it from the workqueue ourselves.
   1217 			 */
   1218 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1219 			cancelled_p = true;
   1220 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1221 			if (!callout_stop(&dw->dw_callout))
   1222 				cancel_delayed_work_done(wq, dw);
   1223 			break;
   1224 		case DELAYED_WORK_RESCHEDULED:
   1225 			/*
   1226 			 * If it is being rescheduled, the callout has
   1227 			 * already fired.  We must ask it to cancel.
   1228 			 */
   1229 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1230 			dw->dw_resched = -1;
   1231 			cancelled_p = true;
   1232 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1233 			break;
   1234 		case DELAYED_WORK_CANCELLED:
   1235 			/*
   1236 			 * If it is being cancelled, the callout has
   1237 			 * already fired.  There is nothing more for us
   1238 			 * to do.  Someone else claims credit for
   1239 			 * cancelling it.
   1240 			 */
   1241 			cancelled_p = false;
   1242 			break;
   1243 		default:
   1244 			panic("invalid delayed work state: %d",
   1245 			    dw->dw_state);
   1246 		}
   1247 	}
   1248 	mutex_exit(&wq->wq_lock);
   1249 
   1250 	return cancelled_p;
   1251 }
   1252 
   1253 /*
   1254  * cancel_delayed_work_sync(dw)
   1255  *
   1256  *	If work was scheduled or queued, remove it from the schedule or
   1257  *	queue and return true.  If work was not scheduled or queued,
   1258  *	return false.  Note that work may already be running; if it
   1259  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1260  *	will return false; either way, wait for it to complete.
   1261  */
   1262 bool
   1263 cancel_delayed_work_sync(struct delayed_work *dw)
   1264 {
   1265 	struct workqueue_struct *wq;
   1266 	bool cancelled_p;
   1267 
   1268 	/* If there's no workqueue, nothing to cancel.  */
   1269 	if ((wq = work_queue(&dw->work)) == NULL)
   1270 		return false;
   1271 
   1272 	mutex_enter(&wq->wq_lock);
   1273 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1274 		cancelled_p = false;
   1275 	} else {
   1276 		switch (dw->dw_state) {
   1277 		case DELAYED_WORK_IDLE:
   1278 			/*
   1279 			 * It is either on the queue or already running
   1280 			 * or both.
   1281 			 */
   1282 			if (work_claimed(&dw->work, wq)) {
   1283 				/* On the queue.  Remove and release.  */
   1284 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1285 				    work_entry);
   1286 				SDT_PROBE2(sdt, linux, work, cancel,
   1287 				    &dw->work, wq);
   1288 				release_work(&dw->work, wq);
   1289 				/* Can't dereference dw after this point.  */
   1290 				cancelled_p = true;
   1291 			} else {
   1292 				/* Not on the queue, so didn't cancel. */
   1293 				cancelled_p = false;
   1294 			}
   1295 			/* If it's still running, wait for it to complete.  */
   1296 			if (wq->wq_current_work == &dw->work)
   1297 				wait_for_current_work(&dw->work, wq);
   1298 			break;
   1299 		case DELAYED_WORK_SCHEDULED:
   1300 			/*
   1301 			 * If it is scheduled, mark it cancelled and
   1302 			 * try to stop the callout before it starts.
   1303 			 *
   1304 			 * If it's too late and the callout has already
   1305 			 * begun to execute, we must wait for it to
   1306 			 * complete.  But we got in soon enough to ask
   1307 			 * the callout not to run, so we successfully
   1308 			 * cancelled it in that case.
   1309 			 *
   1310 			 * If we stopped the callout before it started,
   1311 			 * then we must destroy the callout and
   1312 			 * dissociate it from the workqueue ourselves.
   1313 			 */
   1314 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1315 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1316 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1317 				cancel_delayed_work_done(wq, dw);
   1318 			cancelled_p = true;
   1319 			break;
   1320 		case DELAYED_WORK_RESCHEDULED:
   1321 			/*
   1322 			 * If it is being rescheduled, the callout has
   1323 			 * already fired.  We must ask it to cancel and
   1324 			 * wait for it to complete.
   1325 			 */
   1326 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1327 			dw->dw_resched = -1;
   1328 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1329 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1330 			cancelled_p = true;
   1331 			break;
   1332 		case DELAYED_WORK_CANCELLED:
   1333 			/*
   1334 			 * If it is being cancelled, the callout has
   1335 			 * already fired.  We need only wait for it to
   1336 			 * complete.  Someone else, however, claims
   1337 			 * credit for cancelling it.
   1338 			 */
   1339 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1340 			cancelled_p = false;
   1341 			break;
   1342 		default:
   1343 			panic("invalid delayed work state: %d",
   1344 			    dw->dw_state);
   1345 		}
   1346 	}
   1347 	mutex_exit(&wq->wq_lock);
   1348 
   1349 	return cancelled_p;
   1350 }
   1351 
   1352 /*
   1354  * Flush
   1355  */
   1356 
   1357 /*
   1358  * flush_scheduled_work()
   1359  *
   1360  *	Wait for all work queued on system_wq to complete.  This does
   1361  *	not include delayed work.
   1362  */
   1363 void
   1364 flush_scheduled_work(void)
   1365 {
   1366 
   1367 	flush_workqueue(system_wq);
   1368 }
   1369 
   1370 /*
   1371  * flush_workqueue_locked(wq)
   1372  *
   1373  *	Wait for all work queued on wq to complete.  This does not
   1374  *	include delayed work.
   1375  *
   1376  *	Caller must hold wq's lock.
   1377  */
   1378 static void
   1379 flush_workqueue_locked(struct workqueue_struct *wq)
   1380 {
   1381 	uint64_t gen;
   1382 
   1383 	KASSERT(mutex_owned(&wq->wq_lock));
   1384 
   1385 	/* Get the current generation number.  */
   1386 	gen = wq->wq_gen;
   1387 
   1388 	/*
   1389 	 * If there's a batch of work in progress, we must wait for the
   1390 	 * worker thread to finish that batch.
   1391 	 */
   1392 	if (wq->wq_current_work != NULL)
   1393 		gen++;
   1394 
   1395 	/*
   1396 	 * If there's any work yet to be claimed from the queue by the
   1397 	 * worker thread, we must wait for it to finish one more batch
   1398 	 * too.
   1399 	 */
   1400 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue))
   1401 		gen++;
   1402 
   1403 	/* Wait until the generation number has caught up.  */
   1404 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
   1405 	while (wq->wq_gen < gen)
   1406 		cv_wait(&wq->wq_cv, &wq->wq_lock);
   1407 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
   1408 }
   1409 
   1410 /*
   1411  * flush_workqueue(wq)
   1412  *
   1413  *	Wait for all work queued on wq to complete.  This does not
   1414  *	include delayed work.
   1415  */
   1416 void
   1417 flush_workqueue(struct workqueue_struct *wq)
   1418 {
   1419 
   1420 	mutex_enter(&wq->wq_lock);
   1421 	flush_workqueue_locked(wq);
   1422 	mutex_exit(&wq->wq_lock);
   1423 }
   1424 
   1425 /*
   1426  * flush_work(work)
   1427  *
   1428  *	If work is queued or currently executing, wait for it to
   1429  *	complete.
   1430  */
   1431 void
   1432 flush_work(struct work_struct *work)
   1433 {
   1434 	struct workqueue_struct *wq;
   1435 
   1436 	/* If there's no workqueue, nothing to flush.  */
   1437 	if ((wq = work_queue(work)) == NULL)
   1438 		return;
   1439 
   1440 	flush_workqueue(wq);
   1441 }
   1442 
   1443 /*
   1444  * flush_delayed_work(dw)
   1445  *
   1446  *	If dw is scheduled to run after a delay, queue it immediately
   1447  *	instead.  Then, if dw is queued or currently executing, wait
   1448  *	for it to complete.
   1449  */
   1450 void
   1451 flush_delayed_work(struct delayed_work *dw)
   1452 {
   1453 	struct workqueue_struct *wq;
   1454 
   1455 	/* If there's no workqueue, nothing to flush.  */
   1456 	if ((wq = work_queue(&dw->work)) == NULL)
   1457 		return;
   1458 
   1459 	mutex_enter(&wq->wq_lock);
   1460 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1461 		/*
   1462 		 * Moved off the queue already (and possibly to another
   1463 		 * queue, though that would be ill-advised), so it must
   1464 		 * have completed, and we have nothing more to do.
   1465 		 */
   1466 	} else {
   1467 		switch (dw->dw_state) {
   1468 		case DELAYED_WORK_IDLE:
   1469 			/*
   1470 			 * It has a workqueue assigned and the callout
   1471 			 * is idle, so it must be in progress or on the
   1472 			 * queue.  In that case, we'll wait for it to
   1473 			 * complete.
   1474 			 */
   1475 			break;
   1476 		case DELAYED_WORK_SCHEDULED:
   1477 		case DELAYED_WORK_RESCHEDULED:
   1478 		case DELAYED_WORK_CANCELLED:
   1479 			/*
   1480 			 * The callout is scheduled, and may have even
   1481 			 * started.  Mark it as scheduled so that if
   1482 			 * the callout has fired it will queue the work
   1483 			 * itself.  Try to stop the callout -- if we
   1484 			 * can, queue the work now; if we can't, wait
   1485 			 * for the callout to complete, which entails
   1486 			 * queueing it.
   1487 			 */
   1488 			dw->dw_state = DELAYED_WORK_SCHEDULED;
   1489 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
   1490 				/*
   1491 				 * We stopped it before it ran.  No
   1492 				 * state change in the interim is
   1493 				 * possible.  Destroy the callout and
   1494 				 * queue it ourselves.
   1495 				 */
   1496 				KASSERT(dw->dw_state ==
   1497 				    DELAYED_WORK_SCHEDULED);
   1498 				dw_callout_destroy(wq, dw);
   1499 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
   1500 				    work_entry);
   1501 				cv_broadcast(&wq->wq_cv);
   1502 				SDT_PROBE2(sdt, linux, work, queue,
   1503 				    &dw->work, wq);
   1504 			}
   1505 			break;
   1506 		default:
   1507 			panic("invalid delayed work state: %d", dw->dw_state);
   1508 		}
   1509 		/*
   1510 		 * Waiting for the whole queue to flush is overkill,
   1511 		 * but doesn't hurt.
   1512 		 */
   1513 		flush_workqueue_locked(wq);
   1514 	}
   1515 	mutex_exit(&wq->wq_lock);
   1516 }
   1517