Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.52
      1 /*	$NetBSD: linux_work.c,v 1.52 2021/12/19 01:51:02 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.52 2021/12/19 01:51:02 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/atomic.h>
     37 #include <sys/callout.h>
     38 #include <sys/condvar.h>
     39 #include <sys/errno.h>
     40 #include <sys/kmem.h>
     41 #include <sys/kthread.h>
     42 #include <sys/lwp.h>
     43 #include <sys/mutex.h>
     44 #ifndef _MODULE
     45 #include <sys/once.h>
     46 #endif
     47 #include <sys/queue.h>
     48 #include <sys/sdt.h>
     49 
     50 #include <linux/workqueue.h>
     51 
     52 TAILQ_HEAD(work_head, work_struct);
     53 TAILQ_HEAD(dwork_head, delayed_work);
     54 
     55 struct workqueue_struct {
     56 	kmutex_t		wq_lock;
     57 	kcondvar_t		wq_cv;
     58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
     59 	struct work_head	wq_queue;   /* work to run */
     60 	struct work_head	wq_dqueue;  /* delayed work to run now */
     61 	struct work_struct	*wq_current_work;
     62 	int			wq_flags;
     63 	bool			wq_dying;
     64 	uint64_t		wq_gen;
     65 	struct lwp		*wq_lwp;
     66 	const char		*wq_name;
     67 };
     68 
     69 static void __dead	linux_workqueue_thread(void *);
     70 static void		linux_workqueue_timeout(void *);
     71 static bool		work_claimed(struct work_struct *,
     72 			    struct workqueue_struct *);
     73 static struct workqueue_struct *
     74 			work_queue(struct work_struct *);
     75 static bool		acquire_work(struct work_struct *,
     76 			    struct workqueue_struct *);
     77 static void		release_work(struct work_struct *,
     78 			    struct workqueue_struct *);
     79 static void		wait_for_current_work(struct work_struct *,
     80 			    struct workqueue_struct *);
     81 static void		dw_callout_init(struct workqueue_struct *,
     82 			    struct delayed_work *);
     83 static void		dw_callout_destroy(struct workqueue_struct *,
     84 			    struct delayed_work *);
     85 static void		cancel_delayed_work_done(struct workqueue_struct *,
     86 			    struct delayed_work *);
     87 
     88 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
     89     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     90 SDT_PROBE_DEFINE2(sdt, linux, work, release,
     91     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     92 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
     93     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     94 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
     95     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     96 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
     97     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
     98     "unsigned long"/*ticks*/);
     99 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
    100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    101 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
    102     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    103 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
    104     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    105 SDT_PROBE_DEFINE2(sdt, linux, work, run,
    106     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    107 SDT_PROBE_DEFINE2(sdt, linux, work, done,
    108     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    109 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
    110     "struct workqueue_struct *"/*wq*/);
    111 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
    112     "struct workqueue_struct *"/*wq*/);
    113 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
    114     "struct workqueue_struct *"/*wq*/);
    115 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
    116     "struct workqueue_struct *"/*wq*/);
    117 
    118 static specificdata_key_t workqueue_key __read_mostly;
    119 
    120 struct workqueue_struct	*system_wq __read_mostly;
    121 struct workqueue_struct	*system_long_wq __read_mostly;
    122 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
    123 struct workqueue_struct	*system_unbound_wq __read_mostly;
    124 
    125 static inline uintptr_t
    126 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
    127 {
    128 
    129 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
    130 }
    131 
    132 /*
    133  * linux_workqueue_init()
    134  *
    135  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
    136  *	NetBSD error on failure.
    137  */
    138 static int
    139 linux_workqueue_init0(void)
    140 {
    141 	int error;
    142 
    143 	error = lwp_specific_key_create(&workqueue_key, NULL);
    144 	if (error)
    145 		goto fail0;
    146 
    147 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
    148 	if (system_wq == NULL) {
    149 		error = ENOMEM;
    150 		goto fail1;
    151 	}
    152 
    153 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
    154 	if (system_long_wq == NULL) {
    155 		error = ENOMEM;
    156 		goto fail2;
    157 	}
    158 
    159 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    160 	if (system_power_efficient_wq == NULL) {
    161 		error = ENOMEM;
    162 		goto fail3;
    163 	}
    164 
    165 	system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
    166 	if (system_unbound_wq == NULL) {
    167 		error = ENOMEM;
    168 		goto fail4;
    169 	}
    170 
    171 	return 0;
    172 
    173 fail5: __unused
    174 	destroy_workqueue(system_unbound_wq);
    175 fail4:	destroy_workqueue(system_power_efficient_wq);
    176 fail3:	destroy_workqueue(system_long_wq);
    177 fail2:	destroy_workqueue(system_wq);
    178 fail1:	lwp_specific_key_delete(workqueue_key);
    179 fail0:	KASSERT(error);
    180 	return error;
    181 }
    182 
    183 /*
    184  * linux_workqueue_fini()
    185  *
    186  *	Destroy the Linux workqueue subsystem.  Never fails.
    187  */
    188 static void
    189 linux_workqueue_fini0(void)
    190 {
    191 
    192 	destroy_workqueue(system_power_efficient_wq);
    193 	destroy_workqueue(system_long_wq);
    194 	destroy_workqueue(system_wq);
    195 	lwp_specific_key_delete(workqueue_key);
    196 }
    197 
    198 #ifndef _MODULE
    199 static ONCE_DECL(linux_workqueue_init_once);
    200 #endif
    201 
    202 int
    203 linux_workqueue_init(void)
    204 {
    205 #ifdef _MODULE
    206 	return linux_workqueue_init0();
    207 #else
    208 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
    209 #endif
    210 }
    211 
    212 void
    213 linux_workqueue_fini(void)
    214 {
    215 #ifdef _MODULE
    216 	return linux_workqueue_fini0();
    217 #else
    218 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
    219 #endif
    220 }
    221 
    222 /*
    224  * Workqueues
    225  */
    226 
    227 /*
    228  * alloc_workqueue(name, flags, max_active)
    229  *
    230  *	Create a workqueue of the given name.  max_active is the
    231  *	maximum number of work items in flight, or 0 for the default.
    232  *	Return NULL on failure, pointer to struct workqueue_struct
    233  *	object on success.
    234  */
    235 struct workqueue_struct *
    236 alloc_workqueue(const char *name, int flags, unsigned max_active)
    237 {
    238 	struct workqueue_struct *wq;
    239 	int error;
    240 
    241 	KASSERT(max_active == 0 || max_active == 1);
    242 
    243 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
    244 
    245 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
    246 	cv_init(&wq->wq_cv, name);
    247 	TAILQ_INIT(&wq->wq_delayed);
    248 	TAILQ_INIT(&wq->wq_queue);
    249 	TAILQ_INIT(&wq->wq_dqueue);
    250 	wq->wq_current_work = NULL;
    251 	wq->wq_flags = 0;
    252 	wq->wq_dying = false;
    253 	wq->wq_gen = 0;
    254 	wq->wq_lwp = NULL;
    255 	wq->wq_name = name;
    256 
    257 	error = kthread_create(PRI_NONE,
    258 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
    259 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
    260 	if (error)
    261 		goto fail0;
    262 
    263 	return wq;
    264 
    265 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    266 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    267 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    268 	cv_destroy(&wq->wq_cv);
    269 	mutex_destroy(&wq->wq_lock);
    270 	kmem_free(wq, sizeof(*wq));
    271 	return NULL;
    272 }
    273 
    274 /*
    275  * alloc_ordered_workqueue(name, flags)
    276  *
    277  *	Same as alloc_workqueue(name, flags, 1).
    278  */
    279 struct workqueue_struct *
    280 alloc_ordered_workqueue(const char *name, int flags)
    281 {
    282 
    283 	return alloc_workqueue(name, flags, 1);
    284 }
    285 
    286 /*
    287  * destroy_workqueue(wq)
    288  *
    289  *	Destroy a workqueue created with wq.  Cancel any pending
    290  *	delayed work.  Wait for all queued work to complete.
    291  *
    292  *	May sleep.
    293  */
    294 void
    295 destroy_workqueue(struct workqueue_struct *wq)
    296 {
    297 
    298 	/*
    299 	 * Cancel all delayed work.  We do this first because any
    300 	 * delayed work that that has already timed out, which we can't
    301 	 * cancel, may have queued new work.
    302 	 */
    303 	mutex_enter(&wq->wq_lock);
    304 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
    305 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
    306 
    307 		KASSERT(work_queue(&dw->work) == wq);
    308 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
    309 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    310 			dw->dw_state == DELAYED_WORK_CANCELLED),
    311 		    "delayed work %p in bad state: %d",
    312 		    dw, dw->dw_state);
    313 
    314 		/*
    315 		 * Mark it cancelled and try to stop the callout before
    316 		 * it starts.
    317 		 *
    318 		 * If it's too late and the callout has already begun
    319 		 * to execute, then it will notice that we asked to
    320 		 * cancel it and remove itself from the queue before
    321 		 * returning.
    322 		 *
    323 		 * If we stopped the callout before it started,
    324 		 * however, then we can safely destroy the callout and
    325 		 * dissociate it from the workqueue ourselves.
    326 		 */
    327 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
    328 		dw->dw_state = DELAYED_WORK_CANCELLED;
    329 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
    330 			cancel_delayed_work_done(wq, dw);
    331 	}
    332 	mutex_exit(&wq->wq_lock);
    333 
    334 	/*
    335 	 * At this point, no new work can be put on the queue.
    336 	 */
    337 
    338 	/* Tell the thread to exit.  */
    339 	mutex_enter(&wq->wq_lock);
    340 	wq->wq_dying = true;
    341 	cv_broadcast(&wq->wq_cv);
    342 	mutex_exit(&wq->wq_lock);
    343 
    344 	/* Wait for it to exit.  */
    345 	(void)kthread_join(wq->wq_lwp);
    346 
    347 	KASSERT(wq->wq_dying);
    348 	KASSERT(wq->wq_flags == 0);
    349 	KASSERT(wq->wq_current_work == NULL);
    350 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    351 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    352 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    353 	cv_destroy(&wq->wq_cv);
    354 	mutex_destroy(&wq->wq_lock);
    355 
    356 	kmem_free(wq, sizeof(*wq));
    357 }
    358 
    359 /*
    361  * Work thread and callout
    362  */
    363 
    364 /*
    365  * linux_workqueue_thread(cookie)
    366  *
    367  *	Main function for a workqueue's worker thread.  Waits until
    368  *	there is work queued, grabs a batch of work off the queue,
    369  *	executes it all, bumps the generation number, and repeats,
    370  *	until dying.
    371  */
    372 static void __dead
    373 linux_workqueue_thread(void *cookie)
    374 {
    375 	struct workqueue_struct *const wq = cookie;
    376 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
    377 	struct work_struct marker, *work;
    378 	unsigned i;
    379 
    380 	lwp_setspecific(workqueue_key, wq);
    381 
    382 	mutex_enter(&wq->wq_lock);
    383 	for (;;) {
    384 		/*
    385 		 * Wait until there's activity.  If there's no work and
    386 		 * we're dying, stop here.
    387 		 */
    388 		if (TAILQ_EMPTY(&wq->wq_queue) &&
    389 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
    390 			if (wq->wq_dying)
    391 				break;
    392 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    393 			continue;
    394 		}
    395 
    396 		/*
    397 		 * Start a batch of work.  Use a marker to delimit when
    398 		 * the batch ends so we can advance the generation
    399 		 * after the batch.
    400 		 */
    401 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
    402 		for (i = 0; i < 2; i++) {
    403 			if (TAILQ_EMPTY(q[i]))
    404 				continue;
    405 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
    406 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
    407 				void (*func)(struct work_struct *);
    408 
    409 				KASSERT(work_queue(work) == wq);
    410 				KASSERT(work_claimed(work, wq));
    411 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
    412 					container_of(work, struct delayed_work,
    413 					    work)->dw_state ==
    414 					DELAYED_WORK_IDLE),
    415 				    "delayed work %p queued and scheduled",
    416 				    work);
    417 
    418 				TAILQ_REMOVE(q[i], work, work_entry);
    419 				KASSERT(wq->wq_current_work == NULL);
    420 				wq->wq_current_work = work;
    421 				func = work->func;
    422 				release_work(work, wq);
    423 				/* Can't dereference work after this point.  */
    424 
    425 				mutex_exit(&wq->wq_lock);
    426 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
    427 				(*func)(work);
    428 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
    429 				mutex_enter(&wq->wq_lock);
    430 
    431 				KASSERT(wq->wq_current_work == work);
    432 				wq->wq_current_work = NULL;
    433 				cv_broadcast(&wq->wq_cv);
    434 			}
    435 			TAILQ_REMOVE(q[i], &marker, work_entry);
    436 		}
    437 
    438 		/* Notify flush that we've completed a batch of work.  */
    439 		wq->wq_gen++;
    440 		cv_broadcast(&wq->wq_cv);
    441 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
    442 	}
    443 	mutex_exit(&wq->wq_lock);
    444 
    445 	kthread_exit(0);
    446 }
    447 
    448 /*
    449  * linux_workqueue_timeout(cookie)
    450  *
    451  *	Delayed work timeout callback.
    452  *
    453  *	- If scheduled, queue it.
    454  *	- If rescheduled, callout_schedule ourselves again.
    455  *	- If cancelled, destroy the callout and release the work from
    456  *        the workqueue.
    457  */
    458 static void
    459 linux_workqueue_timeout(void *cookie)
    460 {
    461 	struct delayed_work *const dw = cookie;
    462 	struct workqueue_struct *const wq = work_queue(&dw->work);
    463 
    464 	KASSERTMSG(wq != NULL,
    465 	    "delayed work %p state %d resched %d",
    466 	    dw, dw->dw_state, dw->dw_resched);
    467 
    468 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
    469 
    470 	mutex_enter(&wq->wq_lock);
    471 	KASSERT(work_queue(&dw->work) == wq);
    472 	switch (dw->dw_state) {
    473 	case DELAYED_WORK_IDLE:
    474 		panic("delayed work callout uninitialized: %p", dw);
    475 	case DELAYED_WORK_SCHEDULED:
    476 		dw_callout_destroy(wq, dw);
    477 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
    478 		cv_broadcast(&wq->wq_cv);
    479 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    480 		break;
    481 	case DELAYED_WORK_RESCHEDULED:
    482 		KASSERT(dw->dw_resched >= 0);
    483 		callout_schedule(&dw->dw_callout, dw->dw_resched);
    484 		dw->dw_state = DELAYED_WORK_SCHEDULED;
    485 		dw->dw_resched = -1;
    486 		break;
    487 	case DELAYED_WORK_CANCELLED:
    488 		cancel_delayed_work_done(wq, dw);
    489 		/* Can't dereference dw after this point.  */
    490 		goto out;
    491 	default:
    492 		panic("delayed work callout in bad state: %p", dw);
    493 	}
    494 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
    495 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
    496 out:	mutex_exit(&wq->wq_lock);
    497 }
    498 
    499 /*
    500  * current_work()
    501  *
    502  *	If in a workqueue worker thread, return the work it is
    503  *	currently executing.  Otherwise return NULL.
    504  */
    505 struct work_struct *
    506 current_work(void)
    507 {
    508 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
    509 
    510 	/* If we're not a workqueue thread, then there's no work.  */
    511 	if (wq == NULL)
    512 		return NULL;
    513 
    514 	/*
    515 	 * Otherwise, this should be possible only while work is in
    516 	 * progress.  Return the current work item.
    517 	 */
    518 	KASSERT(wq->wq_current_work != NULL);
    519 	return wq->wq_current_work;
    520 }
    521 
    522 /*
    524  * Work
    525  */
    526 
    527 /*
    528  * INIT_WORK(work, fn)
    529  *
    530  *	Initialize work for use with a workqueue to call fn in a worker
    531  *	thread.  There is no corresponding destruction operation.
    532  */
    533 void
    534 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    535 {
    536 
    537 	work->work_owner = 0;
    538 	work->func = fn;
    539 }
    540 
    541 /*
    542  * work_claimed(work, wq)
    543  *
    544  *	True if work is currently claimed by a workqueue, meaning it is
    545  *	either on the queue or scheduled in a callout.  The workqueue
    546  *	must be wq, and caller must hold wq's lock.
    547  */
    548 static bool
    549 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
    550 {
    551 
    552 	KASSERT(work_queue(work) == wq);
    553 	KASSERT(mutex_owned(&wq->wq_lock));
    554 
    555 	return work->work_owner & 1;
    556 }
    557 
    558 /*
    559  * work_pending(work)
    560  *
    561  *	True if work is currently claimed by any workqueue, scheduled
    562  *	to run on that workqueue.
    563  */
    564 bool
    565 work_pending(const struct work_struct *work)
    566 {
    567 
    568 	return work->work_owner & 1;
    569 }
    570 
    571 /*
    572  * work_queue(work)
    573  *
    574  *	Return the last queue that work was queued on, or NULL if it
    575  *	was never queued.
    576  */
    577 static struct workqueue_struct *
    578 work_queue(struct work_struct *work)
    579 {
    580 
    581 	return (struct workqueue_struct *)(work->work_owner & ~(uintptr_t)1);
    582 }
    583 
    584 /*
    585  * acquire_work(work, wq)
    586  *
    587  *	Try to claim work for wq.  If work is already claimed, it must
    588  *	be claimed by wq; return false.  If work is not already
    589  *	claimed, claim it, issue a memory barrier to match any prior
    590  *	release_work, and return true.
    591  *
    592  *	Caller must hold wq's lock.
    593  */
    594 static bool
    595 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
    596 {
    597 	uintptr_t owner0, owner;
    598 
    599 	KASSERT(mutex_owned(&wq->wq_lock));
    600 	KASSERT(((uintptr_t)wq & 1) == 0);
    601 
    602 	owner = (uintptr_t)wq | 1;
    603 	do {
    604 		owner0 = work->work_owner;
    605 		if (owner0 & 1) {
    606 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
    607 			return false;
    608 		}
    609 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
    610 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
    611 	    owner0);
    612 
    613 	KASSERT(work_queue(work) == wq);
    614 	membar_enter();
    615 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
    616 	return true;
    617 }
    618 
    619 /*
    620  * release_work(work, wq)
    621  *
    622  *	Issue a memory barrier to match any subsequent acquire_work and
    623  *	dissociate work from wq.
    624  *
    625  *	Caller must hold wq's lock and work must be associated with wq.
    626  */
    627 static void
    628 release_work(struct work_struct *work, struct workqueue_struct *wq)
    629 {
    630 
    631 	KASSERT(work_queue(work) == wq);
    632 	KASSERT(mutex_owned(&wq->wq_lock));
    633 
    634 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
    635 	membar_exit();
    636 
    637 	/*
    638 	 * Non-interlocked r/m/w is safe here because nobody else can
    639 	 * write to this while the claimed bit is setand the workqueue
    640 	 * lock is held.
    641 	 */
    642 	work->work_owner &= ~(uintptr_t)1;
    643 }
    644 
    645 /*
    646  * schedule_work(work)
    647  *
    648  *	If work is not already queued on system_wq, queue it to be run
    649  *	by system_wq's worker thread when it next can.  True if it was
    650  *	newly queued, false if it was already queued.  If the work was
    651  *	already running, queue it to run again.
    652  *
    653  *	Caller must ensure work is not queued to run on a different
    654  *	workqueue.
    655  */
    656 bool
    657 schedule_work(struct work_struct *work)
    658 {
    659 
    660 	return queue_work(system_wq, work);
    661 }
    662 
    663 /*
    664  * queue_work(wq, work)
    665  *
    666  *	If work is not already queued on wq, queue it to be run by wq's
    667  *	worker thread when it next can.  True if it was newly queued,
    668  *	false if it was already queued.  If the work was already
    669  *	running, queue it to run again.
    670  *
    671  *	Caller must ensure work is not queued to run on a different
    672  *	workqueue.
    673  */
    674 bool
    675 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    676 {
    677 	bool newly_queued;
    678 
    679 	KASSERT(wq != NULL);
    680 
    681 	mutex_enter(&wq->wq_lock);
    682 	if (__predict_true(acquire_work(work, wq))) {
    683 		/*
    684 		 * It wasn't on any workqueue at all.  Put it on this
    685 		 * one, and signal the worker thread that there is work
    686 		 * to do.
    687 		 */
    688 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    689 		cv_broadcast(&wq->wq_cv);
    690 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
    691 		newly_queued = true;
    692 	} else {
    693 		/*
    694 		 * It was already on this workqueue.  Nothing to do
    695 		 * since it is already queued.
    696 		 */
    697 		newly_queued = false;
    698 	}
    699 	mutex_exit(&wq->wq_lock);
    700 
    701 	return newly_queued;
    702 }
    703 
    704 /*
    705  * cancel_work(work)
    706  *
    707  *	If work was queued, remove it from the queue and return true.
    708  *	If work was not queued, return false.  Work may still be
    709  *	running when this returns.
    710  */
    711 bool
    712 cancel_work(struct work_struct *work)
    713 {
    714 	struct workqueue_struct *wq;
    715 	bool cancelled_p = false;
    716 
    717 	/* If there's no workqueue, nothing to cancel.   */
    718 	if ((wq = work_queue(work)) == NULL)
    719 		goto out;
    720 
    721 	mutex_enter(&wq->wq_lock);
    722 	if (__predict_false(work_queue(work) != wq)) {
    723 		/*
    724 		 * It has finished execution or been cancelled by
    725 		 * another thread, and has been moved off the
    726 		 * workqueue, so it's too to cancel.
    727 		 */
    728 		cancelled_p = false;
    729 	} else {
    730 		/* Check whether it's on the queue.  */
    731 		if (work_claimed(work, wq)) {
    732 			/*
    733 			 * It is still on the queue.  Take it off the
    734 			 * queue and report successful cancellation.
    735 			 */
    736 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    737 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    738 			release_work(work, wq);
    739 			/* Can't dereference work after this point.  */
    740 			cancelled_p = true;
    741 		} else {
    742 			/* Not on the queue.  Couldn't cancel it.  */
    743 			cancelled_p = false;
    744 		}
    745 	}
    746 	mutex_exit(&wq->wq_lock);
    747 
    748 out:	return cancelled_p;
    749 }
    750 
    751 /*
    752  * cancel_work_sync(work)
    753  *
    754  *	If work was queued, remove it from the queue and return true.
    755  *	If work was not queued, return false.  Either way, if work is
    756  *	currently running, wait for it to complete.
    757  *
    758  *	May sleep.
    759  */
    760 bool
    761 cancel_work_sync(struct work_struct *work)
    762 {
    763 	struct workqueue_struct *wq;
    764 	bool cancelled_p = false;
    765 
    766 	/* If there's no workqueue, nothing to cancel.   */
    767 	if ((wq = work_queue(work)) == NULL)
    768 		goto out;
    769 
    770 	mutex_enter(&wq->wq_lock);
    771 	if (__predict_false(work_queue(work) != wq)) {
    772 		/*
    773 		 * It has finished execution or been cancelled by
    774 		 * another thread, and has been moved off the
    775 		 * workqueue, so it's too late to cancel.
    776 		 */
    777 		cancelled_p = false;
    778 	} else {
    779 		/* Check whether it's on the queue.  */
    780 		if (work_claimed(work, wq)) {
    781 			/*
    782 			 * It is still on the queue.  Take it off the
    783 			 * queue and report successful cancellation.
    784 			 */
    785 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    786 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    787 			release_work(work, wq);
    788 			/* Can't dereference work after this point.  */
    789 			cancelled_p = true;
    790 		} else {
    791 			/* Not on the queue.  Couldn't cancel it.  */
    792 			cancelled_p = false;
    793 		}
    794 		/* If it's still running, wait for it to complete.  */
    795 		if (wq->wq_current_work == work)
    796 			wait_for_current_work(work, wq);
    797 	}
    798 	mutex_exit(&wq->wq_lock);
    799 
    800 out:	return cancelled_p;
    801 }
    802 
    803 /*
    804  * wait_for_current_work(work, wq)
    805  *
    806  *	wq must be currently executing work.  Wait for it to finish.
    807  *
    808  *	Does not dereference work.
    809  */
    810 static void
    811 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
    812 {
    813 	uint64_t gen;
    814 
    815 	KASSERT(mutex_owned(&wq->wq_lock));
    816 	KASSERT(wq->wq_current_work == work);
    817 
    818 	/* Wait only one generation in case it gets requeued quickly.  */
    819 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
    820 	gen = wq->wq_gen;
    821 	do {
    822 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    823 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
    824 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
    825 }
    826 
    827 /*
    829  * Delayed work
    830  */
    831 
    832 /*
    833  * INIT_DELAYED_WORK(dw, fn)
    834  *
    835  *	Initialize dw for use with a workqueue to call fn in a worker
    836  *	thread after a delay.  There is no corresponding destruction
    837  *	operation.
    838  */
    839 void
    840 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    841 {
    842 
    843 	INIT_WORK(&dw->work, fn);
    844 	dw->dw_state = DELAYED_WORK_IDLE;
    845 	dw->dw_resched = -1;
    846 
    847 	/*
    848 	 * Defer callout_init until we are going to schedule the
    849 	 * callout, which can then callout_destroy it, because
    850 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
    851 	 * we have no opportunity to call callout_destroy.
    852 	 */
    853 }
    854 
    855 /*
    856  * schedule_delayed_work(dw, ticks)
    857  *
    858  *	If it is not currently scheduled, schedule dw to run after
    859  *	ticks on system_wq.  If currently executing and not already
    860  *	rescheduled, reschedule it.  True if it was newly scheduled,
    861  *	false if it was already scheduled.
    862  *
    863  *	If ticks == 0, queue it to run as soon as the worker can,
    864  *	without waiting for the next callout tick to run.
    865  */
    866 bool
    867 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    868 {
    869 
    870 	return queue_delayed_work(system_wq, dw, ticks);
    871 }
    872 
    873 /*
    874  * dw_callout_init(wq, dw)
    875  *
    876  *	Initialize the callout of dw and transition to
    877  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
    878  */
    879 static void
    880 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
    881 {
    882 
    883 	KASSERT(mutex_owned(&wq->wq_lock));
    884 	KASSERT(work_queue(&dw->work) == wq);
    885 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    886 
    887 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    888 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
    889 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    890 	dw->dw_state = DELAYED_WORK_SCHEDULED;
    891 }
    892 
    893 /*
    894  * dw_callout_destroy(wq, dw)
    895  *
    896  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
    897  */
    898 static void
    899 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
    900 {
    901 
    902 	KASSERT(mutex_owned(&wq->wq_lock));
    903 	KASSERT(work_queue(&dw->work) == wq);
    904 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
    905 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    906 	    dw->dw_state == DELAYED_WORK_CANCELLED);
    907 
    908 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    909 	callout_destroy(&dw->dw_callout);
    910 	dw->dw_resched = -1;
    911 	dw->dw_state = DELAYED_WORK_IDLE;
    912 }
    913 
    914 /*
    915  * cancel_delayed_work_done(wq, dw)
    916  *
    917  *	Complete cancellation of a delayed work: transition from
    918  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
    919  *	workqueue.  Caller must not dereference dw after this returns.
    920  */
    921 static void
    922 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
    923 {
    924 
    925 	KASSERT(mutex_owned(&wq->wq_lock));
    926 	KASSERT(work_queue(&dw->work) == wq);
    927 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
    928 
    929 	dw_callout_destroy(wq, dw);
    930 	release_work(&dw->work, wq);
    931 	/* Can't dereference dw after this point.  */
    932 }
    933 
    934 /*
    935  * queue_delayed_work(wq, dw, ticks)
    936  *
    937  *	If it is not currently scheduled, schedule dw to run after
    938  *	ticks on wq.  If currently queued, remove it from the queue
    939  *	first.
    940  *
    941  *	If ticks == 0, queue it to run as soon as the worker can,
    942  *	without waiting for the next callout tick to run.
    943  */
    944 bool
    945 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    946     unsigned long ticks)
    947 {
    948 	bool newly_queued;
    949 
    950 	mutex_enter(&wq->wq_lock);
    951 	if (__predict_true(acquire_work(&dw->work, wq))) {
    952 		/*
    953 		 * It wasn't on any workqueue at all.  Schedule it to
    954 		 * run on this one.
    955 		 */
    956 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    957 		if (ticks == 0) {
    958 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
    959 			    work_entry);
    960 			cv_broadcast(&wq->wq_cv);
    961 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    962 		} else {
    963 			/*
    964 			 * Initialize a callout and schedule to run
    965 			 * after a delay.
    966 			 */
    967 			dw_callout_init(wq, dw);
    968 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    969 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
    970 		}
    971 		newly_queued = true;
    972 	} else {
    973 		/* It was already on this workqueue.  */
    974 		switch (dw->dw_state) {
    975 		case DELAYED_WORK_IDLE:
    976 		case DELAYED_WORK_SCHEDULED:
    977 		case DELAYED_WORK_RESCHEDULED:
    978 			/* On the queue or already scheduled.  Leave it.  */
    979 			newly_queued = false;
    980 			break;
    981 		case DELAYED_WORK_CANCELLED:
    982 			/*
    983 			 * Scheduled and the callout began, but it was
    984 			 * cancelled.  Reschedule it.
    985 			 */
    986 			if (ticks == 0) {
    987 				dw->dw_state = DELAYED_WORK_SCHEDULED;
    988 				SDT_PROBE2(sdt, linux, work, queue,
    989 				    &dw->work, wq);
    990 			} else {
    991 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    992 				dw->dw_resched = MIN(INT_MAX, ticks);
    993 				SDT_PROBE3(sdt, linux, work, schedule,
    994 				    dw, wq, ticks);
    995 			}
    996 			newly_queued = true;
    997 			break;
    998 		default:
    999 			panic("invalid delayed work state: %d",
   1000 			    dw->dw_state);
   1001 		}
   1002 	}
   1003 	mutex_exit(&wq->wq_lock);
   1004 
   1005 	return newly_queued;
   1006 }
   1007 
   1008 /*
   1009  * mod_delayed_work(wq, dw, ticks)
   1010  *
   1011  *	Schedule dw to run after ticks.  If scheduled or queued,
   1012  *	reschedule.  If ticks == 0, run without delay.
   1013  *
   1014  *	True if it modified the timer of an already scheduled work,
   1015  *	false if it newly scheduled the work.
   1016  */
   1017 bool
   1018 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
   1019     unsigned long ticks)
   1020 {
   1021 	bool timer_modified;
   1022 
   1023 	mutex_enter(&wq->wq_lock);
   1024 	if (acquire_work(&dw->work, wq)) {
   1025 		/*
   1026 		 * It wasn't on any workqueue at all.  Schedule it to
   1027 		 * run on this one.
   1028 		 */
   1029 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
   1030 		if (ticks == 0) {
   1031 			/*
   1032 			 * Run immediately: put it on the queue and
   1033 			 * signal the worker thread.
   1034 			 */
   1035 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
   1036 			    work_entry);
   1037 			cv_broadcast(&wq->wq_cv);
   1038 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
   1039 		} else {
   1040 			/*
   1041 			 * Initialize a callout and schedule to run
   1042 			 * after a delay.
   1043 			 */
   1044 			dw_callout_init(wq, dw);
   1045 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
   1046 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
   1047 		}
   1048 		timer_modified = false;
   1049 	} else {
   1050 		/* It was already on this workqueue.  */
   1051 		switch (dw->dw_state) {
   1052 		case DELAYED_WORK_IDLE:
   1053 			/* On the queue.  */
   1054 			if (ticks == 0) {
   1055 				/* Leave it be.  */
   1056 				SDT_PROBE2(sdt, linux, work, cancel,
   1057 				    &dw->work, wq);
   1058 				SDT_PROBE2(sdt, linux, work, queue,
   1059 				    &dw->work, wq);
   1060 			} else {
   1061 				/* Remove from the queue and schedule.  */
   1062 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1063 				    work_entry);
   1064 				dw_callout_init(wq, dw);
   1065 				callout_schedule(&dw->dw_callout,
   1066 				    MIN(INT_MAX, ticks));
   1067 				SDT_PROBE2(sdt, linux, work, cancel,
   1068 				    &dw->work, wq);
   1069 				SDT_PROBE3(sdt, linux, work, schedule,
   1070 				    dw, wq, ticks);
   1071 			}
   1072 			timer_modified = true;
   1073 			break;
   1074 		case DELAYED_WORK_SCHEDULED:
   1075 			/*
   1076 			 * It is scheduled to run after a delay.  Try
   1077 			 * to stop it and reschedule it; if we can't,
   1078 			 * either reschedule it or cancel it to put it
   1079 			 * on the queue, and inform the callout.
   1080 			 */
   1081 			if (callout_stop(&dw->dw_callout)) {
   1082 				/* Can't stop, callout has begun.  */
   1083 				if (ticks == 0) {
   1084 					/*
   1085 					 * We don't actually need to do
   1086 					 * anything.  The callout will
   1087 					 * queue it as soon as it gets
   1088 					 * the lock.
   1089 					 */
   1090 					SDT_PROBE2(sdt, linux, work, cancel,
   1091 					    &dw->work, wq);
   1092 					SDT_PROBE2(sdt, linux, work, queue,
   1093 					    &dw->work, wq);
   1094 				} else {
   1095 					/* Ask the callout to reschedule.  */
   1096 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1097 					dw->dw_resched = MIN(INT_MAX, ticks);
   1098 					SDT_PROBE2(sdt, linux, work, cancel,
   1099 					    &dw->work, wq);
   1100 					SDT_PROBE3(sdt, linux, work, schedule,
   1101 					    dw, wq, ticks);
   1102 				}
   1103 			} else {
   1104 				/* We stopped the callout before it began.  */
   1105 				if (ticks == 0) {
   1106 					/*
   1107 					 * Run immediately: destroy the
   1108 					 * callout, put it on the
   1109 					 * queue, and signal the worker
   1110 					 * thread.
   1111 					 */
   1112 					dw_callout_destroy(wq, dw);
   1113 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
   1114 					    &dw->work, work_entry);
   1115 					cv_broadcast(&wq->wq_cv);
   1116 					SDT_PROBE2(sdt, linux, work, cancel,
   1117 					    &dw->work, wq);
   1118 					SDT_PROBE2(sdt, linux, work, queue,
   1119 					    &dw->work, wq);
   1120 				} else {
   1121 					/*
   1122 					 * Reschedule the callout.  No
   1123 					 * state change.
   1124 					 */
   1125 					callout_schedule(&dw->dw_callout,
   1126 					    MIN(INT_MAX, ticks));
   1127 					SDT_PROBE2(sdt, linux, work, cancel,
   1128 					    &dw->work, wq);
   1129 					SDT_PROBE3(sdt, linux, work, schedule,
   1130 					    dw, wq, ticks);
   1131 				}
   1132 			}
   1133 			timer_modified = true;
   1134 			break;
   1135 		case DELAYED_WORK_RESCHEDULED:
   1136 			/*
   1137 			 * Someone rescheduled it after the callout
   1138 			 * started but before the poor thing even had a
   1139 			 * chance to acquire the lock.
   1140 			 */
   1141 			if (ticks == 0) {
   1142 				/*
   1143 				 * We can just switch back to
   1144 				 * DELAYED_WORK_SCHEDULED so that the
   1145 				 * callout will queue the work as soon
   1146 				 * as it gets the lock.
   1147 				 */
   1148 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1149 				dw->dw_resched = -1;
   1150 				SDT_PROBE2(sdt, linux, work, cancel,
   1151 				    &dw->work, wq);
   1152 				SDT_PROBE2(sdt, linux, work, queue,
   1153 				    &dw->work, wq);
   1154 			} else {
   1155 				/* Change the rescheduled time.  */
   1156 				dw->dw_resched = ticks;
   1157 				SDT_PROBE2(sdt, linux, work, cancel,
   1158 				    &dw->work, wq);
   1159 				SDT_PROBE3(sdt, linux, work, schedule,
   1160 				    dw, wq, ticks);
   1161 			}
   1162 			timer_modified = true;
   1163 			break;
   1164 		case DELAYED_WORK_CANCELLED:
   1165 			/*
   1166 			 * Someone cancelled it after the callout
   1167 			 * started but before the poor thing even had a
   1168 			 * chance to acquire the lock.
   1169 			 */
   1170 			if (ticks == 0) {
   1171 				/*
   1172 				 * We can just switch back to
   1173 				 * DELAYED_WORK_SCHEDULED so that the
   1174 				 * callout will queue the work as soon
   1175 				 * as it gets the lock.
   1176 				 */
   1177 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1178 				SDT_PROBE2(sdt, linux, work, queue,
   1179 				    &dw->work, wq);
   1180 			} else {
   1181 				/* Ask it to reschedule.  */
   1182 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1183 				dw->dw_resched = MIN(INT_MAX, ticks);
   1184 				SDT_PROBE3(sdt, linux, work, schedule,
   1185 				    dw, wq, ticks);
   1186 			}
   1187 			timer_modified = false;
   1188 			break;
   1189 		default:
   1190 			panic("invalid delayed work state: %d", dw->dw_state);
   1191 		}
   1192 	}
   1193 	mutex_exit(&wq->wq_lock);
   1194 
   1195 	return timer_modified;
   1196 }
   1197 
   1198 /*
   1199  * cancel_delayed_work(dw)
   1200  *
   1201  *	If work was scheduled or queued, remove it from the schedule or
   1202  *	queue and return true.  If work was not scheduled or queued,
   1203  *	return false.  Note that work may already be running; if it
   1204  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1205  *	will return false, and either way, cancel_delayed_work will NOT
   1206  *	wait for the work to complete.
   1207  */
   1208 bool
   1209 cancel_delayed_work(struct delayed_work *dw)
   1210 {
   1211 	struct workqueue_struct *wq;
   1212 	bool cancelled_p;
   1213 
   1214 	/* If there's no workqueue, nothing to cancel.   */
   1215 	if ((wq = work_queue(&dw->work)) == NULL)
   1216 		return false;
   1217 
   1218 	mutex_enter(&wq->wq_lock);
   1219 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1220 		cancelled_p = false;
   1221 	} else {
   1222 		switch (dw->dw_state) {
   1223 		case DELAYED_WORK_IDLE:
   1224 			/*
   1225 			 * It is either on the queue or already running
   1226 			 * or both.
   1227 			 */
   1228 			if (work_claimed(&dw->work, wq)) {
   1229 				/* On the queue.  Remove and release.  */
   1230 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1231 				    work_entry);
   1232 				SDT_PROBE2(sdt, linux, work, cancel,
   1233 				    &dw->work, wq);
   1234 				release_work(&dw->work, wq);
   1235 				/* Can't dereference dw after this point.  */
   1236 				cancelled_p = true;
   1237 			} else {
   1238 				/* Not on the queue, so didn't cancel.  */
   1239 				cancelled_p = false;
   1240 			}
   1241 			break;
   1242 		case DELAYED_WORK_SCHEDULED:
   1243 			/*
   1244 			 * If it is scheduled, mark it cancelled and
   1245 			 * try to stop the callout before it starts.
   1246 			 *
   1247 			 * If it's too late and the callout has already
   1248 			 * begun to execute, tough.
   1249 			 *
   1250 			 * If we stopped the callout before it started,
   1251 			 * however, then destroy the callout and
   1252 			 * dissociate it from the workqueue ourselves.
   1253 			 */
   1254 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1255 			cancelled_p = true;
   1256 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1257 			if (!callout_stop(&dw->dw_callout))
   1258 				cancel_delayed_work_done(wq, dw);
   1259 			break;
   1260 		case DELAYED_WORK_RESCHEDULED:
   1261 			/*
   1262 			 * If it is being rescheduled, the callout has
   1263 			 * already fired.  We must ask it to cancel.
   1264 			 */
   1265 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1266 			dw->dw_resched = -1;
   1267 			cancelled_p = true;
   1268 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1269 			break;
   1270 		case DELAYED_WORK_CANCELLED:
   1271 			/*
   1272 			 * If it is being cancelled, the callout has
   1273 			 * already fired.  There is nothing more for us
   1274 			 * to do.  Someone else claims credit for
   1275 			 * cancelling it.
   1276 			 */
   1277 			cancelled_p = false;
   1278 			break;
   1279 		default:
   1280 			panic("invalid delayed work state: %d",
   1281 			    dw->dw_state);
   1282 		}
   1283 	}
   1284 	mutex_exit(&wq->wq_lock);
   1285 
   1286 	return cancelled_p;
   1287 }
   1288 
   1289 /*
   1290  * cancel_delayed_work_sync(dw)
   1291  *
   1292  *	If work was scheduled or queued, remove it from the schedule or
   1293  *	queue and return true.  If work was not scheduled or queued,
   1294  *	return false.  Note that work may already be running; if it
   1295  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1296  *	will return false; either way, wait for it to complete.
   1297  */
   1298 bool
   1299 cancel_delayed_work_sync(struct delayed_work *dw)
   1300 {
   1301 	struct workqueue_struct *wq;
   1302 	bool cancelled_p;
   1303 
   1304 	/* If there's no workqueue, nothing to cancel.  */
   1305 	if ((wq = work_queue(&dw->work)) == NULL)
   1306 		return false;
   1307 
   1308 	mutex_enter(&wq->wq_lock);
   1309 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1310 		cancelled_p = false;
   1311 	} else {
   1312 		switch (dw->dw_state) {
   1313 		case DELAYED_WORK_IDLE:
   1314 			/*
   1315 			 * It is either on the queue or already running
   1316 			 * or both.
   1317 			 */
   1318 			if (work_claimed(&dw->work, wq)) {
   1319 				/* On the queue.  Remove and release.  */
   1320 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1321 				    work_entry);
   1322 				SDT_PROBE2(sdt, linux, work, cancel,
   1323 				    &dw->work, wq);
   1324 				release_work(&dw->work, wq);
   1325 				/* Can't dereference dw after this point.  */
   1326 				cancelled_p = true;
   1327 			} else {
   1328 				/* Not on the queue, so didn't cancel. */
   1329 				cancelled_p = false;
   1330 			}
   1331 			/* If it's still running, wait for it to complete.  */
   1332 			if (wq->wq_current_work == &dw->work)
   1333 				wait_for_current_work(&dw->work, wq);
   1334 			break;
   1335 		case DELAYED_WORK_SCHEDULED:
   1336 			/*
   1337 			 * If it is scheduled, mark it cancelled and
   1338 			 * try to stop the callout before it starts.
   1339 			 *
   1340 			 * If it's too late and the callout has already
   1341 			 * begun to execute, we must wait for it to
   1342 			 * complete.  But we got in soon enough to ask
   1343 			 * the callout not to run, so we successfully
   1344 			 * cancelled it in that case.
   1345 			 *
   1346 			 * If we stopped the callout before it started,
   1347 			 * then we must destroy the callout and
   1348 			 * dissociate it from the workqueue ourselves.
   1349 			 */
   1350 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1351 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1352 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1353 				cancel_delayed_work_done(wq, dw);
   1354 			cancelled_p = true;
   1355 			break;
   1356 		case DELAYED_WORK_RESCHEDULED:
   1357 			/*
   1358 			 * If it is being rescheduled, the callout has
   1359 			 * already fired.  We must ask it to cancel and
   1360 			 * wait for it to complete.
   1361 			 */
   1362 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1363 			dw->dw_resched = -1;
   1364 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1365 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1366 			cancelled_p = true;
   1367 			break;
   1368 		case DELAYED_WORK_CANCELLED:
   1369 			/*
   1370 			 * If it is being cancelled, the callout has
   1371 			 * already fired.  We need only wait for it to
   1372 			 * complete.  Someone else, however, claims
   1373 			 * credit for cancelling it.
   1374 			 */
   1375 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1376 			cancelled_p = false;
   1377 			break;
   1378 		default:
   1379 			panic("invalid delayed work state: %d",
   1380 			    dw->dw_state);
   1381 		}
   1382 	}
   1383 	mutex_exit(&wq->wq_lock);
   1384 
   1385 	return cancelled_p;
   1386 }
   1387 
   1388 /*
   1390  * Flush
   1391  */
   1392 
   1393 /*
   1394  * flush_scheduled_work()
   1395  *
   1396  *	Wait for all work queued on system_wq to complete.  This does
   1397  *	not include delayed work.
   1398  */
   1399 void
   1400 flush_scheduled_work(void)
   1401 {
   1402 
   1403 	flush_workqueue(system_wq);
   1404 }
   1405 
   1406 /*
   1407  * flush_workqueue_locked(wq)
   1408  *
   1409  *	Wait for all work queued on wq to complete.  This does not
   1410  *	include delayed work.  True if there was work to be flushed,
   1411  *	false it the queue was empty.
   1412  *
   1413  *	Caller must hold wq's lock.
   1414  */
   1415 static bool
   1416 flush_workqueue_locked(struct workqueue_struct *wq)
   1417 {
   1418 	uint64_t gen;
   1419 	bool work_queued = false;
   1420 
   1421 	KASSERT(mutex_owned(&wq->wq_lock));
   1422 
   1423 	/* Get the current generation number.  */
   1424 	gen = wq->wq_gen;
   1425 
   1426 	/*
   1427 	 * If there's a batch of work in progress, we must wait for the
   1428 	 * worker thread to finish that batch.
   1429 	 */
   1430 	if (wq->wq_current_work != NULL) {
   1431 		gen++;
   1432 		work_queued = true;
   1433 	}
   1434 
   1435 	/*
   1436 	 * If there's any work yet to be claimed from the queue by the
   1437 	 * worker thread, we must wait for it to finish one more batch
   1438 	 * too.
   1439 	 */
   1440 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue)) {
   1441 		gen++;
   1442 		work_queued = true;
   1443 	}
   1444 
   1445 	/* Wait until the generation number has caught up.  */
   1446 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
   1447 	while (wq->wq_gen < gen)
   1448 		cv_wait(&wq->wq_cv, &wq->wq_lock);
   1449 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
   1450 
   1451 	/* Return whether we had to wait for anything.  */
   1452 	return work_queued;
   1453 }
   1454 
   1455 /*
   1456  * flush_workqueue(wq)
   1457  *
   1458  *	Wait for all work queued on wq to complete.  This does not
   1459  *	include delayed work.
   1460  */
   1461 void
   1462 flush_workqueue(struct workqueue_struct *wq)
   1463 {
   1464 
   1465 	mutex_enter(&wq->wq_lock);
   1466 	(void)flush_workqueue_locked(wq);
   1467 	mutex_exit(&wq->wq_lock);
   1468 }
   1469 
   1470 /*
   1471  * drain_workqueue(wq)
   1472  *
   1473  *	Repeatedly flush wq until there is no more work.
   1474  */
   1475 void
   1476 drain_workqueue(struct workqueue_struct *wq)
   1477 {
   1478 	unsigned ntries = 0;
   1479 
   1480 	mutex_enter(&wq->wq_lock);
   1481 	while (flush_workqueue_locked(wq)) {
   1482 		if (ntries++ == 10 || (ntries % 100) == 0)
   1483 			printf("linux workqueue %s"
   1484 			    ": still clogged after %u flushes",
   1485 			    wq->wq_name, ntries);
   1486 	}
   1487 	mutex_exit(&wq->wq_lock);
   1488 }
   1489 
   1490 /*
   1491  * flush_work(work)
   1492  *
   1493  *	If work is queued or currently executing, wait for it to
   1494  *	complete.
   1495  *
   1496  *	Return true if we waited to flush it, false if it was already
   1497  *	idle.
   1498  */
   1499 bool
   1500 flush_work(struct work_struct *work)
   1501 {
   1502 	struct workqueue_struct *wq;
   1503 
   1504 	/* If there's no workqueue, nothing to flush.  */
   1505 	if ((wq = work_queue(work)) == NULL)
   1506 		return false;
   1507 
   1508 	flush_workqueue(wq);
   1509 	return true;
   1510 }
   1511 
   1512 /*
   1513  * flush_delayed_work(dw)
   1514  *
   1515  *	If dw is scheduled to run after a delay, queue it immediately
   1516  *	instead.  Then, if dw is queued or currently executing, wait
   1517  *	for it to complete.
   1518  */
   1519 bool
   1520 flush_delayed_work(struct delayed_work *dw)
   1521 {
   1522 	struct workqueue_struct *wq;
   1523 	bool waited = false;
   1524 
   1525 	/* If there's no workqueue, nothing to flush.  */
   1526 	if ((wq = work_queue(&dw->work)) == NULL)
   1527 		return false;
   1528 
   1529 	mutex_enter(&wq->wq_lock);
   1530 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1531 		/*
   1532 		 * Moved off the queue already (and possibly to another
   1533 		 * queue, though that would be ill-advised), so it must
   1534 		 * have completed, and we have nothing more to do.
   1535 		 */
   1536 		waited = false;
   1537 	} else {
   1538 		switch (dw->dw_state) {
   1539 		case DELAYED_WORK_IDLE:
   1540 			/*
   1541 			 * It has a workqueue assigned and the callout
   1542 			 * is idle, so it must be in progress or on the
   1543 			 * queue.  In that case, we'll wait for it to
   1544 			 * complete.
   1545 			 */
   1546 			break;
   1547 		case DELAYED_WORK_SCHEDULED:
   1548 		case DELAYED_WORK_RESCHEDULED:
   1549 		case DELAYED_WORK_CANCELLED:
   1550 			/*
   1551 			 * The callout is scheduled, and may have even
   1552 			 * started.  Mark it as scheduled so that if
   1553 			 * the callout has fired it will queue the work
   1554 			 * itself.  Try to stop the callout -- if we
   1555 			 * can, queue the work now; if we can't, wait
   1556 			 * for the callout to complete, which entails
   1557 			 * queueing it.
   1558 			 */
   1559 			dw->dw_state = DELAYED_WORK_SCHEDULED;
   1560 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
   1561 				/*
   1562 				 * We stopped it before it ran.  No
   1563 				 * state change in the interim is
   1564 				 * possible.  Destroy the callout and
   1565 				 * queue it ourselves.
   1566 				 */
   1567 				KASSERT(dw->dw_state ==
   1568 				    DELAYED_WORK_SCHEDULED);
   1569 				dw_callout_destroy(wq, dw);
   1570 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
   1571 				    work_entry);
   1572 				cv_broadcast(&wq->wq_cv);
   1573 				SDT_PROBE2(sdt, linux, work, queue,
   1574 				    &dw->work, wq);
   1575 			}
   1576 			break;
   1577 		default:
   1578 			panic("invalid delayed work state: %d", dw->dw_state);
   1579 		}
   1580 		/*
   1581 		 * Waiting for the whole queue to flush is overkill,
   1582 		 * but doesn't hurt.
   1583 		 */
   1584 		(void)flush_workqueue_locked(wq);
   1585 		waited = true;
   1586 	}
   1587 	mutex_exit(&wq->wq_lock);
   1588 
   1589 	return waited;
   1590 }
   1591 
   1592 /*
   1593  * delayed_work_pending(dw)
   1594  *
   1595  *	True if dw is currently scheduled to execute, false if not.
   1596  */
   1597 bool
   1598 delayed_work_pending(const struct delayed_work *dw)
   1599 {
   1600 
   1601 	return work_pending(&dw->work);
   1602 }
   1603