Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.54
      1 /*	$NetBSD: linux_work.c,v 1.54 2021/12/19 11:40:05 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.54 2021/12/19 11:40:05 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/atomic.h>
     37 #include <sys/callout.h>
     38 #include <sys/condvar.h>
     39 #include <sys/errno.h>
     40 #include <sys/kmem.h>
     41 #include <sys/kthread.h>
     42 #include <sys/lwp.h>
     43 #include <sys/mutex.h>
     44 #ifndef _MODULE
     45 #include <sys/once.h>
     46 #endif
     47 #include <sys/queue.h>
     48 #include <sys/sdt.h>
     49 
     50 #include <linux/workqueue.h>
     51 
     52 TAILQ_HEAD(work_head, work_struct);
     53 TAILQ_HEAD(dwork_head, delayed_work);
     54 
     55 struct workqueue_struct {
     56 	kmutex_t		wq_lock;
     57 	kcondvar_t		wq_cv;
     58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
     59 	struct work_head	wq_queue;   /* work to run */
     60 	struct work_head	wq_dqueue;  /* delayed work to run now */
     61 	struct work_struct	*wq_current_work;
     62 	int			wq_flags;
     63 	bool			wq_dying;
     64 	uint64_t		wq_gen;
     65 	struct lwp		*wq_lwp;
     66 	const char		*wq_name;
     67 };
     68 
     69 static void __dead	linux_workqueue_thread(void *);
     70 static void		linux_workqueue_timeout(void *);
     71 static bool		work_claimed(struct work_struct *,
     72 			    struct workqueue_struct *);
     73 static struct workqueue_struct *
     74 			work_queue(struct work_struct *);
     75 static bool		acquire_work(struct work_struct *,
     76 			    struct workqueue_struct *);
     77 static void		release_work(struct work_struct *,
     78 			    struct workqueue_struct *);
     79 static void		wait_for_current_work(struct work_struct *,
     80 			    struct workqueue_struct *);
     81 static void		dw_callout_init(struct workqueue_struct *,
     82 			    struct delayed_work *);
     83 static void		dw_callout_destroy(struct workqueue_struct *,
     84 			    struct delayed_work *);
     85 static void		cancel_delayed_work_done(struct workqueue_struct *,
     86 			    struct delayed_work *);
     87 
     88 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
     89     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     90 SDT_PROBE_DEFINE2(sdt, linux, work, release,
     91     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     92 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
     93     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     94 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
     95     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     96 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
     97     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
     98     "unsigned long"/*ticks*/);
     99 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
    100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    101 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
    102     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    103 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
    104     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    105 SDT_PROBE_DEFINE2(sdt, linux, work, run,
    106     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    107 SDT_PROBE_DEFINE2(sdt, linux, work, done,
    108     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    109 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
    110     "struct workqueue_struct *"/*wq*/);
    111 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
    112     "struct workqueue_struct *"/*wq*/);
    113 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
    114     "struct workqueue_struct *"/*wq*/);
    115 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
    116     "struct workqueue_struct *"/*wq*/);
    117 
    118 static specificdata_key_t workqueue_key __read_mostly;
    119 
    120 struct workqueue_struct	*system_highpri_wq __read_mostly;
    121 struct workqueue_struct	*system_long_wq __read_mostly;
    122 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
    123 struct workqueue_struct	*system_unbound_wq __read_mostly;
    124 struct workqueue_struct	*system_wq __read_mostly;
    125 
    126 static inline uintptr_t
    127 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
    128 {
    129 
    130 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
    131 }
    132 
    133 /*
    134  * linux_workqueue_init()
    135  *
    136  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
    137  *	NetBSD error on failure.
    138  */
    139 static int
    140 linux_workqueue_init0(void)
    141 {
    142 	int error;
    143 
    144 	error = lwp_specific_key_create(&workqueue_key, NULL);
    145 	if (error)
    146 		goto out;
    147 
    148 	system_highpri_wq = alloc_ordered_workqueue("lnxhipwq", 0);
    149 	if (system_highpri_wq == NULL) {
    150 		error = ENOMEM;
    151 		goto out;
    152 	}
    153 
    154 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
    155 	if (system_long_wq == NULL) {
    156 		error = ENOMEM;
    157 		goto out;
    158 	}
    159 
    160 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    161 	if (system_power_efficient_wq == NULL) {
    162 		error = ENOMEM;
    163 		goto out;
    164 	}
    165 
    166 	system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
    167 	if (system_unbound_wq == NULL) {
    168 		error = ENOMEM;
    169 		goto out;
    170 	}
    171 
    172 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
    173 	if (system_wq == NULL) {
    174 		error = ENOMEM;
    175 		goto out;
    176 	}
    177 
    178 	/* Success!  */
    179 	error = 0;
    180 
    181 out:	if (error) {
    182 		if (system_highpri_wq)
    183 			destroy_workqueue(system_highpri_wq);
    184 		if (system_long_wq)
    185 			destroy_workqueue(system_long_wq);
    186 		if (system_power_efficient_wq)
    187 			destroy_workqueue(system_power_efficient_wq);
    188 		if (system_unbound_wq)
    189 			destroy_workqueue(system_unbound_wq);
    190 		if (system_wq)
    191 			destroy_workqueue(system_wq);
    192 		if (workqueue_key)
    193 			lwp_specific_key_delete(workqueue_key);
    194 	}
    195 
    196 	return error;
    197 }
    198 
    199 /*
    200  * linux_workqueue_fini()
    201  *
    202  *	Destroy the Linux workqueue subsystem.  Never fails.
    203  */
    204 static void
    205 linux_workqueue_fini0(void)
    206 {
    207 
    208 	destroy_workqueue(system_power_efficient_wq);
    209 	destroy_workqueue(system_long_wq);
    210 	destroy_workqueue(system_wq);
    211 	lwp_specific_key_delete(workqueue_key);
    212 }
    213 
    214 #ifndef _MODULE
    215 static ONCE_DECL(linux_workqueue_init_once);
    216 #endif
    217 
    218 int
    219 linux_workqueue_init(void)
    220 {
    221 #ifdef _MODULE
    222 	return linux_workqueue_init0();
    223 #else
    224 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
    225 #endif
    226 }
    227 
    228 void
    229 linux_workqueue_fini(void)
    230 {
    231 #ifdef _MODULE
    232 	return linux_workqueue_fini0();
    233 #else
    234 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
    235 #endif
    236 }
    237 
    238 /*
    240  * Workqueues
    241  */
    242 
    243 /*
    244  * alloc_workqueue(name, flags, max_active)
    245  *
    246  *	Create a workqueue of the given name.  max_active is the
    247  *	maximum number of work items in flight, or 0 for the default.
    248  *	Return NULL on failure, pointer to struct workqueue_struct
    249  *	object on success.
    250  */
    251 struct workqueue_struct *
    252 alloc_workqueue(const char *name, int flags, unsigned max_active)
    253 {
    254 	struct workqueue_struct *wq;
    255 	int error;
    256 
    257 	KASSERT(max_active == 0 || max_active == 1);
    258 
    259 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
    260 
    261 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
    262 	cv_init(&wq->wq_cv, name);
    263 	TAILQ_INIT(&wq->wq_delayed);
    264 	TAILQ_INIT(&wq->wq_queue);
    265 	TAILQ_INIT(&wq->wq_dqueue);
    266 	wq->wq_current_work = NULL;
    267 	wq->wq_flags = 0;
    268 	wq->wq_dying = false;
    269 	wq->wq_gen = 0;
    270 	wq->wq_lwp = NULL;
    271 	wq->wq_name = name;
    272 
    273 	error = kthread_create(PRI_NONE,
    274 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
    275 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
    276 	if (error)
    277 		goto fail0;
    278 
    279 	return wq;
    280 
    281 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    282 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    283 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    284 	cv_destroy(&wq->wq_cv);
    285 	mutex_destroy(&wq->wq_lock);
    286 	kmem_free(wq, sizeof(*wq));
    287 	return NULL;
    288 }
    289 
    290 /*
    291  * alloc_ordered_workqueue(name, flags)
    292  *
    293  *	Same as alloc_workqueue(name, flags, 1).
    294  */
    295 struct workqueue_struct *
    296 alloc_ordered_workqueue(const char *name, int flags)
    297 {
    298 
    299 	return alloc_workqueue(name, flags, 1);
    300 }
    301 
    302 /*
    303  * destroy_workqueue(wq)
    304  *
    305  *	Destroy a workqueue created with wq.  Cancel any pending
    306  *	delayed work.  Wait for all queued work to complete.
    307  *
    308  *	May sleep.
    309  */
    310 void
    311 destroy_workqueue(struct workqueue_struct *wq)
    312 {
    313 
    314 	/*
    315 	 * Cancel all delayed work.  We do this first because any
    316 	 * delayed work that that has already timed out, which we can't
    317 	 * cancel, may have queued new work.
    318 	 */
    319 	mutex_enter(&wq->wq_lock);
    320 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
    321 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
    322 
    323 		KASSERT(work_queue(&dw->work) == wq);
    324 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
    325 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    326 			dw->dw_state == DELAYED_WORK_CANCELLED),
    327 		    "delayed work %p in bad state: %d",
    328 		    dw, dw->dw_state);
    329 
    330 		/*
    331 		 * Mark it cancelled and try to stop the callout before
    332 		 * it starts.
    333 		 *
    334 		 * If it's too late and the callout has already begun
    335 		 * to execute, then it will notice that we asked to
    336 		 * cancel it and remove itself from the queue before
    337 		 * returning.
    338 		 *
    339 		 * If we stopped the callout before it started,
    340 		 * however, then we can safely destroy the callout and
    341 		 * dissociate it from the workqueue ourselves.
    342 		 */
    343 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
    344 		dw->dw_state = DELAYED_WORK_CANCELLED;
    345 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
    346 			cancel_delayed_work_done(wq, dw);
    347 	}
    348 	mutex_exit(&wq->wq_lock);
    349 
    350 	/*
    351 	 * At this point, no new work can be put on the queue.
    352 	 */
    353 
    354 	/* Tell the thread to exit.  */
    355 	mutex_enter(&wq->wq_lock);
    356 	wq->wq_dying = true;
    357 	cv_broadcast(&wq->wq_cv);
    358 	mutex_exit(&wq->wq_lock);
    359 
    360 	/* Wait for it to exit.  */
    361 	(void)kthread_join(wq->wq_lwp);
    362 
    363 	KASSERT(wq->wq_dying);
    364 	KASSERT(wq->wq_flags == 0);
    365 	KASSERT(wq->wq_current_work == NULL);
    366 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    367 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    368 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    369 	cv_destroy(&wq->wq_cv);
    370 	mutex_destroy(&wq->wq_lock);
    371 
    372 	kmem_free(wq, sizeof(*wq));
    373 }
    374 
    375 /*
    377  * Work thread and callout
    378  */
    379 
    380 /*
    381  * linux_workqueue_thread(cookie)
    382  *
    383  *	Main function for a workqueue's worker thread.  Waits until
    384  *	there is work queued, grabs a batch of work off the queue,
    385  *	executes it all, bumps the generation number, and repeats,
    386  *	until dying.
    387  */
    388 static void __dead
    389 linux_workqueue_thread(void *cookie)
    390 {
    391 	struct workqueue_struct *const wq = cookie;
    392 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
    393 	struct work_struct marker, *work;
    394 	unsigned i;
    395 
    396 	lwp_setspecific(workqueue_key, wq);
    397 
    398 	mutex_enter(&wq->wq_lock);
    399 	for (;;) {
    400 		/*
    401 		 * Wait until there's activity.  If there's no work and
    402 		 * we're dying, stop here.
    403 		 */
    404 		if (TAILQ_EMPTY(&wq->wq_queue) &&
    405 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
    406 			if (wq->wq_dying)
    407 				break;
    408 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    409 			continue;
    410 		}
    411 
    412 		/*
    413 		 * Start a batch of work.  Use a marker to delimit when
    414 		 * the batch ends so we can advance the generation
    415 		 * after the batch.
    416 		 */
    417 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
    418 		for (i = 0; i < 2; i++) {
    419 			if (TAILQ_EMPTY(q[i]))
    420 				continue;
    421 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
    422 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
    423 				void (*func)(struct work_struct *);
    424 
    425 				KASSERT(work_queue(work) == wq);
    426 				KASSERT(work_claimed(work, wq));
    427 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
    428 					container_of(work, struct delayed_work,
    429 					    work)->dw_state ==
    430 					DELAYED_WORK_IDLE),
    431 				    "delayed work %p queued and scheduled",
    432 				    work);
    433 
    434 				TAILQ_REMOVE(q[i], work, work_entry);
    435 				KASSERT(wq->wq_current_work == NULL);
    436 				wq->wq_current_work = work;
    437 				func = work->func;
    438 				release_work(work, wq);
    439 				/* Can't dereference work after this point.  */
    440 
    441 				mutex_exit(&wq->wq_lock);
    442 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
    443 				(*func)(work);
    444 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
    445 				mutex_enter(&wq->wq_lock);
    446 
    447 				KASSERT(wq->wq_current_work == work);
    448 				wq->wq_current_work = NULL;
    449 				cv_broadcast(&wq->wq_cv);
    450 			}
    451 			TAILQ_REMOVE(q[i], &marker, work_entry);
    452 		}
    453 
    454 		/* Notify flush that we've completed a batch of work.  */
    455 		wq->wq_gen++;
    456 		cv_broadcast(&wq->wq_cv);
    457 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
    458 	}
    459 	mutex_exit(&wq->wq_lock);
    460 
    461 	kthread_exit(0);
    462 }
    463 
    464 /*
    465  * linux_workqueue_timeout(cookie)
    466  *
    467  *	Delayed work timeout callback.
    468  *
    469  *	- If scheduled, queue it.
    470  *	- If rescheduled, callout_schedule ourselves again.
    471  *	- If cancelled, destroy the callout and release the work from
    472  *        the workqueue.
    473  */
    474 static void
    475 linux_workqueue_timeout(void *cookie)
    476 {
    477 	struct delayed_work *const dw = cookie;
    478 	struct workqueue_struct *const wq = work_queue(&dw->work);
    479 
    480 	KASSERTMSG(wq != NULL,
    481 	    "delayed work %p state %d resched %d",
    482 	    dw, dw->dw_state, dw->dw_resched);
    483 
    484 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
    485 
    486 	mutex_enter(&wq->wq_lock);
    487 	KASSERT(work_queue(&dw->work) == wq);
    488 	switch (dw->dw_state) {
    489 	case DELAYED_WORK_IDLE:
    490 		panic("delayed work callout uninitialized: %p", dw);
    491 	case DELAYED_WORK_SCHEDULED:
    492 		dw_callout_destroy(wq, dw);
    493 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
    494 		cv_broadcast(&wq->wq_cv);
    495 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    496 		break;
    497 	case DELAYED_WORK_RESCHEDULED:
    498 		KASSERT(dw->dw_resched >= 0);
    499 		callout_schedule(&dw->dw_callout, dw->dw_resched);
    500 		dw->dw_state = DELAYED_WORK_SCHEDULED;
    501 		dw->dw_resched = -1;
    502 		break;
    503 	case DELAYED_WORK_CANCELLED:
    504 		cancel_delayed_work_done(wq, dw);
    505 		/* Can't dereference dw after this point.  */
    506 		goto out;
    507 	default:
    508 		panic("delayed work callout in bad state: %p", dw);
    509 	}
    510 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
    511 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
    512 out:	mutex_exit(&wq->wq_lock);
    513 }
    514 
    515 /*
    516  * current_work()
    517  *
    518  *	If in a workqueue worker thread, return the work it is
    519  *	currently executing.  Otherwise return NULL.
    520  */
    521 struct work_struct *
    522 current_work(void)
    523 {
    524 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
    525 
    526 	/* If we're not a workqueue thread, then there's no work.  */
    527 	if (wq == NULL)
    528 		return NULL;
    529 
    530 	/*
    531 	 * Otherwise, this should be possible only while work is in
    532 	 * progress.  Return the current work item.
    533 	 */
    534 	KASSERT(wq->wq_current_work != NULL);
    535 	return wq->wq_current_work;
    536 }
    537 
    538 /*
    540  * Work
    541  */
    542 
    543 /*
    544  * INIT_WORK(work, fn)
    545  *
    546  *	Initialize work for use with a workqueue to call fn in a worker
    547  *	thread.  There is no corresponding destruction operation.
    548  */
    549 void
    550 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    551 {
    552 
    553 	work->work_owner = 0;
    554 	work->func = fn;
    555 }
    556 
    557 /*
    558  * work_claimed(work, wq)
    559  *
    560  *	True if work is currently claimed by a workqueue, meaning it is
    561  *	either on the queue or scheduled in a callout.  The workqueue
    562  *	must be wq, and caller must hold wq's lock.
    563  */
    564 static bool
    565 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
    566 {
    567 
    568 	KASSERT(work_queue(work) == wq);
    569 	KASSERT(mutex_owned(&wq->wq_lock));
    570 
    571 	return atomic_load_relaxed(&work->work_owner) & 1;
    572 }
    573 
    574 /*
    575  * work_pending(work)
    576  *
    577  *	True if work is currently claimed by any workqueue, scheduled
    578  *	to run on that workqueue.
    579  */
    580 bool
    581 work_pending(const struct work_struct *work)
    582 {
    583 
    584 	return atomic_load_relaxed(&work->work_owner) & 1;
    585 }
    586 
    587 /*
    588  * work_queue(work)
    589  *
    590  *	Return the last queue that work was queued on, or NULL if it
    591  *	was never queued.
    592  */
    593 static struct workqueue_struct *
    594 work_queue(struct work_struct *work)
    595 {
    596 
    597 	return (struct workqueue_struct *)
    598 	    (atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
    599 }
    600 
    601 /*
    602  * acquire_work(work, wq)
    603  *
    604  *	Try to claim work for wq.  If work is already claimed, it must
    605  *	be claimed by wq; return false.  If work is not already
    606  *	claimed, claim it, issue a memory barrier to match any prior
    607  *	release_work, and return true.
    608  *
    609  *	Caller must hold wq's lock.
    610  */
    611 static bool
    612 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
    613 {
    614 	uintptr_t owner0, owner;
    615 
    616 	KASSERT(mutex_owned(&wq->wq_lock));
    617 	KASSERT(((uintptr_t)wq & 1) == 0);
    618 
    619 	owner = (uintptr_t)wq | 1;
    620 	do {
    621 		owner0 = atomic_load_relaxed(&work->work_owner);
    622 		if (owner0 & 1) {
    623 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
    624 			return false;
    625 		}
    626 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
    627 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
    628 	    owner0);
    629 
    630 	KASSERT(work_queue(work) == wq);
    631 	membar_enter();
    632 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
    633 	return true;
    634 }
    635 
    636 /*
    637  * release_work(work, wq)
    638  *
    639  *	Issue a memory barrier to match any subsequent acquire_work and
    640  *	dissociate work from wq.
    641  *
    642  *	Caller must hold wq's lock and work must be associated with wq.
    643  */
    644 static void
    645 release_work(struct work_struct *work, struct workqueue_struct *wq)
    646 {
    647 
    648 	KASSERT(work_queue(work) == wq);
    649 	KASSERT(mutex_owned(&wq->wq_lock));
    650 
    651 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
    652 	membar_exit();
    653 
    654 	/*
    655 	 * Non-interlocked r/m/w is safe here because nobody else can
    656 	 * write to this while the claimed bit is set and the workqueue
    657 	 * lock is held.
    658 	 */
    659 	atomic_store_relaxed(&work->work_owner,
    660 	    atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
    661 }
    662 
    663 /*
    664  * schedule_work(work)
    665  *
    666  *	If work is not already queued on system_wq, queue it to be run
    667  *	by system_wq's worker thread when it next can.  True if it was
    668  *	newly queued, false if it was already queued.  If the work was
    669  *	already running, queue it to run again.
    670  *
    671  *	Caller must ensure work is not queued to run on a different
    672  *	workqueue.
    673  */
    674 bool
    675 schedule_work(struct work_struct *work)
    676 {
    677 
    678 	return queue_work(system_wq, work);
    679 }
    680 
    681 /*
    682  * queue_work(wq, work)
    683  *
    684  *	If work is not already queued on wq, queue it to be run by wq's
    685  *	worker thread when it next can.  True if it was newly queued,
    686  *	false if it was already queued.  If the work was already
    687  *	running, queue it to run again.
    688  *
    689  *	Caller must ensure work is not queued to run on a different
    690  *	workqueue.
    691  */
    692 bool
    693 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    694 {
    695 	bool newly_queued;
    696 
    697 	KASSERT(wq != NULL);
    698 
    699 	mutex_enter(&wq->wq_lock);
    700 	if (__predict_true(acquire_work(work, wq))) {
    701 		/*
    702 		 * It wasn't on any workqueue at all.  Put it on this
    703 		 * one, and signal the worker thread that there is work
    704 		 * to do.
    705 		 */
    706 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    707 		cv_broadcast(&wq->wq_cv);
    708 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
    709 		newly_queued = true;
    710 	} else {
    711 		/*
    712 		 * It was already on this workqueue.  Nothing to do
    713 		 * since it is already queued.
    714 		 */
    715 		newly_queued = false;
    716 	}
    717 	mutex_exit(&wq->wq_lock);
    718 
    719 	return newly_queued;
    720 }
    721 
    722 /*
    723  * cancel_work(work)
    724  *
    725  *	If work was queued, remove it from the queue and return true.
    726  *	If work was not queued, return false.  Work may still be
    727  *	running when this returns.
    728  */
    729 bool
    730 cancel_work(struct work_struct *work)
    731 {
    732 	struct workqueue_struct *wq;
    733 	bool cancelled_p = false;
    734 
    735 	/* If there's no workqueue, nothing to cancel.   */
    736 	if ((wq = work_queue(work)) == NULL)
    737 		goto out;
    738 
    739 	mutex_enter(&wq->wq_lock);
    740 	if (__predict_false(work_queue(work) != wq)) {
    741 		/*
    742 		 * It has finished execution or been cancelled by
    743 		 * another thread, and has been moved off the
    744 		 * workqueue, so it's too to cancel.
    745 		 */
    746 		cancelled_p = false;
    747 	} else {
    748 		/* Check whether it's on the queue.  */
    749 		if (work_claimed(work, wq)) {
    750 			/*
    751 			 * It is still on the queue.  Take it off the
    752 			 * queue and report successful cancellation.
    753 			 */
    754 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    755 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    756 			release_work(work, wq);
    757 			/* Can't dereference work after this point.  */
    758 			cancelled_p = true;
    759 		} else {
    760 			/* Not on the queue.  Couldn't cancel it.  */
    761 			cancelled_p = false;
    762 		}
    763 	}
    764 	mutex_exit(&wq->wq_lock);
    765 
    766 out:	return cancelled_p;
    767 }
    768 
    769 /*
    770  * cancel_work_sync(work)
    771  *
    772  *	If work was queued, remove it from the queue and return true.
    773  *	If work was not queued, return false.  Either way, if work is
    774  *	currently running, wait for it to complete.
    775  *
    776  *	May sleep.
    777  */
    778 bool
    779 cancel_work_sync(struct work_struct *work)
    780 {
    781 	struct workqueue_struct *wq;
    782 	bool cancelled_p = false;
    783 
    784 	/* If there's no workqueue, nothing to cancel.   */
    785 	if ((wq = work_queue(work)) == NULL)
    786 		goto out;
    787 
    788 	mutex_enter(&wq->wq_lock);
    789 	if (__predict_false(work_queue(work) != wq)) {
    790 		/*
    791 		 * It has finished execution or been cancelled by
    792 		 * another thread, and has been moved off the
    793 		 * workqueue, so it's too late to cancel.
    794 		 */
    795 		cancelled_p = false;
    796 	} else {
    797 		/* Check whether it's on the queue.  */
    798 		if (work_claimed(work, wq)) {
    799 			/*
    800 			 * It is still on the queue.  Take it off the
    801 			 * queue and report successful cancellation.
    802 			 */
    803 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    804 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    805 			release_work(work, wq);
    806 			/* Can't dereference work after this point.  */
    807 			cancelled_p = true;
    808 		} else {
    809 			/* Not on the queue.  Couldn't cancel it.  */
    810 			cancelled_p = false;
    811 		}
    812 		/* If it's still running, wait for it to complete.  */
    813 		if (wq->wq_current_work == work)
    814 			wait_for_current_work(work, wq);
    815 	}
    816 	mutex_exit(&wq->wq_lock);
    817 
    818 out:	return cancelled_p;
    819 }
    820 
    821 /*
    822  * wait_for_current_work(work, wq)
    823  *
    824  *	wq must be currently executing work.  Wait for it to finish.
    825  *
    826  *	Does not dereference work.
    827  */
    828 static void
    829 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
    830 {
    831 	uint64_t gen;
    832 
    833 	KASSERT(mutex_owned(&wq->wq_lock));
    834 	KASSERT(wq->wq_current_work == work);
    835 
    836 	/* Wait only one generation in case it gets requeued quickly.  */
    837 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
    838 	gen = wq->wq_gen;
    839 	do {
    840 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    841 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
    842 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
    843 }
    844 
    845 /*
    847  * Delayed work
    848  */
    849 
    850 /*
    851  * INIT_DELAYED_WORK(dw, fn)
    852  *
    853  *	Initialize dw for use with a workqueue to call fn in a worker
    854  *	thread after a delay.  There is no corresponding destruction
    855  *	operation.
    856  */
    857 void
    858 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    859 {
    860 
    861 	INIT_WORK(&dw->work, fn);
    862 	dw->dw_state = DELAYED_WORK_IDLE;
    863 	dw->dw_resched = -1;
    864 
    865 	/*
    866 	 * Defer callout_init until we are going to schedule the
    867 	 * callout, which can then callout_destroy it, because
    868 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
    869 	 * we have no opportunity to call callout_destroy.
    870 	 */
    871 }
    872 
    873 /*
    874  * schedule_delayed_work(dw, ticks)
    875  *
    876  *	If it is not currently scheduled, schedule dw to run after
    877  *	ticks on system_wq.  If currently executing and not already
    878  *	rescheduled, reschedule it.  True if it was newly scheduled,
    879  *	false if it was already scheduled.
    880  *
    881  *	If ticks == 0, queue it to run as soon as the worker can,
    882  *	without waiting for the next callout tick to run.
    883  */
    884 bool
    885 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    886 {
    887 
    888 	return queue_delayed_work(system_wq, dw, ticks);
    889 }
    890 
    891 /*
    892  * dw_callout_init(wq, dw)
    893  *
    894  *	Initialize the callout of dw and transition to
    895  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
    896  */
    897 static void
    898 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
    899 {
    900 
    901 	KASSERT(mutex_owned(&wq->wq_lock));
    902 	KASSERT(work_queue(&dw->work) == wq);
    903 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    904 
    905 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    906 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
    907 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    908 	dw->dw_state = DELAYED_WORK_SCHEDULED;
    909 }
    910 
    911 /*
    912  * dw_callout_destroy(wq, dw)
    913  *
    914  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
    915  */
    916 static void
    917 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
    918 {
    919 
    920 	KASSERT(mutex_owned(&wq->wq_lock));
    921 	KASSERT(work_queue(&dw->work) == wq);
    922 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
    923 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    924 	    dw->dw_state == DELAYED_WORK_CANCELLED);
    925 
    926 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    927 	callout_destroy(&dw->dw_callout);
    928 	dw->dw_resched = -1;
    929 	dw->dw_state = DELAYED_WORK_IDLE;
    930 }
    931 
    932 /*
    933  * cancel_delayed_work_done(wq, dw)
    934  *
    935  *	Complete cancellation of a delayed work: transition from
    936  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
    937  *	workqueue.  Caller must not dereference dw after this returns.
    938  */
    939 static void
    940 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
    941 {
    942 
    943 	KASSERT(mutex_owned(&wq->wq_lock));
    944 	KASSERT(work_queue(&dw->work) == wq);
    945 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
    946 
    947 	dw_callout_destroy(wq, dw);
    948 	release_work(&dw->work, wq);
    949 	/* Can't dereference dw after this point.  */
    950 }
    951 
    952 /*
    953  * queue_delayed_work(wq, dw, ticks)
    954  *
    955  *	If it is not currently scheduled, schedule dw to run after
    956  *	ticks on wq.  If currently queued, remove it from the queue
    957  *	first.
    958  *
    959  *	If ticks == 0, queue it to run as soon as the worker can,
    960  *	without waiting for the next callout tick to run.
    961  */
    962 bool
    963 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    964     unsigned long ticks)
    965 {
    966 	bool newly_queued;
    967 
    968 	mutex_enter(&wq->wq_lock);
    969 	if (__predict_true(acquire_work(&dw->work, wq))) {
    970 		/*
    971 		 * It wasn't on any workqueue at all.  Schedule it to
    972 		 * run on this one.
    973 		 */
    974 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    975 		if (ticks == 0) {
    976 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
    977 			    work_entry);
    978 			cv_broadcast(&wq->wq_cv);
    979 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    980 		} else {
    981 			/*
    982 			 * Initialize a callout and schedule to run
    983 			 * after a delay.
    984 			 */
    985 			dw_callout_init(wq, dw);
    986 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    987 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
    988 		}
    989 		newly_queued = true;
    990 	} else {
    991 		/* It was already on this workqueue.  */
    992 		switch (dw->dw_state) {
    993 		case DELAYED_WORK_IDLE:
    994 		case DELAYED_WORK_SCHEDULED:
    995 		case DELAYED_WORK_RESCHEDULED:
    996 			/* On the queue or already scheduled.  Leave it.  */
    997 			newly_queued = false;
    998 			break;
    999 		case DELAYED_WORK_CANCELLED:
   1000 			/*
   1001 			 * Scheduled and the callout began, but it was
   1002 			 * cancelled.  Reschedule it.
   1003 			 */
   1004 			if (ticks == 0) {
   1005 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1006 				SDT_PROBE2(sdt, linux, work, queue,
   1007 				    &dw->work, wq);
   1008 			} else {
   1009 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1010 				dw->dw_resched = MIN(INT_MAX, ticks);
   1011 				SDT_PROBE3(sdt, linux, work, schedule,
   1012 				    dw, wq, ticks);
   1013 			}
   1014 			newly_queued = true;
   1015 			break;
   1016 		default:
   1017 			panic("invalid delayed work state: %d",
   1018 			    dw->dw_state);
   1019 		}
   1020 	}
   1021 	mutex_exit(&wq->wq_lock);
   1022 
   1023 	return newly_queued;
   1024 }
   1025 
   1026 /*
   1027  * mod_delayed_work(wq, dw, ticks)
   1028  *
   1029  *	Schedule dw to run after ticks.  If scheduled or queued,
   1030  *	reschedule.  If ticks == 0, run without delay.
   1031  *
   1032  *	True if it modified the timer of an already scheduled work,
   1033  *	false if it newly scheduled the work.
   1034  */
   1035 bool
   1036 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
   1037     unsigned long ticks)
   1038 {
   1039 	bool timer_modified;
   1040 
   1041 	mutex_enter(&wq->wq_lock);
   1042 	if (acquire_work(&dw->work, wq)) {
   1043 		/*
   1044 		 * It wasn't on any workqueue at all.  Schedule it to
   1045 		 * run on this one.
   1046 		 */
   1047 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
   1048 		if (ticks == 0) {
   1049 			/*
   1050 			 * Run immediately: put it on the queue and
   1051 			 * signal the worker thread.
   1052 			 */
   1053 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
   1054 			    work_entry);
   1055 			cv_broadcast(&wq->wq_cv);
   1056 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
   1057 		} else {
   1058 			/*
   1059 			 * Initialize a callout and schedule to run
   1060 			 * after a delay.
   1061 			 */
   1062 			dw_callout_init(wq, dw);
   1063 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
   1064 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
   1065 		}
   1066 		timer_modified = false;
   1067 	} else {
   1068 		/* It was already on this workqueue.  */
   1069 		switch (dw->dw_state) {
   1070 		case DELAYED_WORK_IDLE:
   1071 			/* On the queue.  */
   1072 			if (ticks == 0) {
   1073 				/* Leave it be.  */
   1074 				SDT_PROBE2(sdt, linux, work, cancel,
   1075 				    &dw->work, wq);
   1076 				SDT_PROBE2(sdt, linux, work, queue,
   1077 				    &dw->work, wq);
   1078 			} else {
   1079 				/* Remove from the queue and schedule.  */
   1080 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1081 				    work_entry);
   1082 				dw_callout_init(wq, dw);
   1083 				callout_schedule(&dw->dw_callout,
   1084 				    MIN(INT_MAX, ticks));
   1085 				SDT_PROBE2(sdt, linux, work, cancel,
   1086 				    &dw->work, wq);
   1087 				SDT_PROBE3(sdt, linux, work, schedule,
   1088 				    dw, wq, ticks);
   1089 			}
   1090 			timer_modified = true;
   1091 			break;
   1092 		case DELAYED_WORK_SCHEDULED:
   1093 			/*
   1094 			 * It is scheduled to run after a delay.  Try
   1095 			 * to stop it and reschedule it; if we can't,
   1096 			 * either reschedule it or cancel it to put it
   1097 			 * on the queue, and inform the callout.
   1098 			 */
   1099 			if (callout_stop(&dw->dw_callout)) {
   1100 				/* Can't stop, callout has begun.  */
   1101 				if (ticks == 0) {
   1102 					/*
   1103 					 * We don't actually need to do
   1104 					 * anything.  The callout will
   1105 					 * queue it as soon as it gets
   1106 					 * the lock.
   1107 					 */
   1108 					SDT_PROBE2(sdt, linux, work, cancel,
   1109 					    &dw->work, wq);
   1110 					SDT_PROBE2(sdt, linux, work, queue,
   1111 					    &dw->work, wq);
   1112 				} else {
   1113 					/* Ask the callout to reschedule.  */
   1114 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1115 					dw->dw_resched = MIN(INT_MAX, ticks);
   1116 					SDT_PROBE2(sdt, linux, work, cancel,
   1117 					    &dw->work, wq);
   1118 					SDT_PROBE3(sdt, linux, work, schedule,
   1119 					    dw, wq, ticks);
   1120 				}
   1121 			} else {
   1122 				/* We stopped the callout before it began.  */
   1123 				if (ticks == 0) {
   1124 					/*
   1125 					 * Run immediately: destroy the
   1126 					 * callout, put it on the
   1127 					 * queue, and signal the worker
   1128 					 * thread.
   1129 					 */
   1130 					dw_callout_destroy(wq, dw);
   1131 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
   1132 					    &dw->work, work_entry);
   1133 					cv_broadcast(&wq->wq_cv);
   1134 					SDT_PROBE2(sdt, linux, work, cancel,
   1135 					    &dw->work, wq);
   1136 					SDT_PROBE2(sdt, linux, work, queue,
   1137 					    &dw->work, wq);
   1138 				} else {
   1139 					/*
   1140 					 * Reschedule the callout.  No
   1141 					 * state change.
   1142 					 */
   1143 					callout_schedule(&dw->dw_callout,
   1144 					    MIN(INT_MAX, ticks));
   1145 					SDT_PROBE2(sdt, linux, work, cancel,
   1146 					    &dw->work, wq);
   1147 					SDT_PROBE3(sdt, linux, work, schedule,
   1148 					    dw, wq, ticks);
   1149 				}
   1150 			}
   1151 			timer_modified = true;
   1152 			break;
   1153 		case DELAYED_WORK_RESCHEDULED:
   1154 			/*
   1155 			 * Someone rescheduled it after the callout
   1156 			 * started but before the poor thing even had a
   1157 			 * chance to acquire the lock.
   1158 			 */
   1159 			if (ticks == 0) {
   1160 				/*
   1161 				 * We can just switch back to
   1162 				 * DELAYED_WORK_SCHEDULED so that the
   1163 				 * callout will queue the work as soon
   1164 				 * as it gets the lock.
   1165 				 */
   1166 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1167 				dw->dw_resched = -1;
   1168 				SDT_PROBE2(sdt, linux, work, cancel,
   1169 				    &dw->work, wq);
   1170 				SDT_PROBE2(sdt, linux, work, queue,
   1171 				    &dw->work, wq);
   1172 			} else {
   1173 				/* Change the rescheduled time.  */
   1174 				dw->dw_resched = ticks;
   1175 				SDT_PROBE2(sdt, linux, work, cancel,
   1176 				    &dw->work, wq);
   1177 				SDT_PROBE3(sdt, linux, work, schedule,
   1178 				    dw, wq, ticks);
   1179 			}
   1180 			timer_modified = true;
   1181 			break;
   1182 		case DELAYED_WORK_CANCELLED:
   1183 			/*
   1184 			 * Someone cancelled it after the callout
   1185 			 * started but before the poor thing even had a
   1186 			 * chance to acquire the lock.
   1187 			 */
   1188 			if (ticks == 0) {
   1189 				/*
   1190 				 * We can just switch back to
   1191 				 * DELAYED_WORK_SCHEDULED so that the
   1192 				 * callout will queue the work as soon
   1193 				 * as it gets the lock.
   1194 				 */
   1195 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1196 				SDT_PROBE2(sdt, linux, work, queue,
   1197 				    &dw->work, wq);
   1198 			} else {
   1199 				/* Ask it to reschedule.  */
   1200 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1201 				dw->dw_resched = MIN(INT_MAX, ticks);
   1202 				SDT_PROBE3(sdt, linux, work, schedule,
   1203 				    dw, wq, ticks);
   1204 			}
   1205 			timer_modified = false;
   1206 			break;
   1207 		default:
   1208 			panic("invalid delayed work state: %d", dw->dw_state);
   1209 		}
   1210 	}
   1211 	mutex_exit(&wq->wq_lock);
   1212 
   1213 	return timer_modified;
   1214 }
   1215 
   1216 /*
   1217  * cancel_delayed_work(dw)
   1218  *
   1219  *	If work was scheduled or queued, remove it from the schedule or
   1220  *	queue and return true.  If work was not scheduled or queued,
   1221  *	return false.  Note that work may already be running; if it
   1222  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1223  *	will return false, and either way, cancel_delayed_work will NOT
   1224  *	wait for the work to complete.
   1225  */
   1226 bool
   1227 cancel_delayed_work(struct delayed_work *dw)
   1228 {
   1229 	struct workqueue_struct *wq;
   1230 	bool cancelled_p;
   1231 
   1232 	/* If there's no workqueue, nothing to cancel.   */
   1233 	if ((wq = work_queue(&dw->work)) == NULL)
   1234 		return false;
   1235 
   1236 	mutex_enter(&wq->wq_lock);
   1237 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1238 		cancelled_p = false;
   1239 	} else {
   1240 		switch (dw->dw_state) {
   1241 		case DELAYED_WORK_IDLE:
   1242 			/*
   1243 			 * It is either on the queue or already running
   1244 			 * or both.
   1245 			 */
   1246 			if (work_claimed(&dw->work, wq)) {
   1247 				/* On the queue.  Remove and release.  */
   1248 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1249 				    work_entry);
   1250 				SDT_PROBE2(sdt, linux, work, cancel,
   1251 				    &dw->work, wq);
   1252 				release_work(&dw->work, wq);
   1253 				/* Can't dereference dw after this point.  */
   1254 				cancelled_p = true;
   1255 			} else {
   1256 				/* Not on the queue, so didn't cancel.  */
   1257 				cancelled_p = false;
   1258 			}
   1259 			break;
   1260 		case DELAYED_WORK_SCHEDULED:
   1261 			/*
   1262 			 * If it is scheduled, mark it cancelled and
   1263 			 * try to stop the callout before it starts.
   1264 			 *
   1265 			 * If it's too late and the callout has already
   1266 			 * begun to execute, tough.
   1267 			 *
   1268 			 * If we stopped the callout before it started,
   1269 			 * however, then destroy the callout and
   1270 			 * dissociate it from the workqueue ourselves.
   1271 			 */
   1272 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1273 			cancelled_p = true;
   1274 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1275 			if (!callout_stop(&dw->dw_callout))
   1276 				cancel_delayed_work_done(wq, dw);
   1277 			break;
   1278 		case DELAYED_WORK_RESCHEDULED:
   1279 			/*
   1280 			 * If it is being rescheduled, the callout has
   1281 			 * already fired.  We must ask it to cancel.
   1282 			 */
   1283 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1284 			dw->dw_resched = -1;
   1285 			cancelled_p = true;
   1286 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1287 			break;
   1288 		case DELAYED_WORK_CANCELLED:
   1289 			/*
   1290 			 * If it is being cancelled, the callout has
   1291 			 * already fired.  There is nothing more for us
   1292 			 * to do.  Someone else claims credit for
   1293 			 * cancelling it.
   1294 			 */
   1295 			cancelled_p = false;
   1296 			break;
   1297 		default:
   1298 			panic("invalid delayed work state: %d",
   1299 			    dw->dw_state);
   1300 		}
   1301 	}
   1302 	mutex_exit(&wq->wq_lock);
   1303 
   1304 	return cancelled_p;
   1305 }
   1306 
   1307 /*
   1308  * cancel_delayed_work_sync(dw)
   1309  *
   1310  *	If work was scheduled or queued, remove it from the schedule or
   1311  *	queue and return true.  If work was not scheduled or queued,
   1312  *	return false.  Note that work may already be running; if it
   1313  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1314  *	will return false; either way, wait for it to complete.
   1315  */
   1316 bool
   1317 cancel_delayed_work_sync(struct delayed_work *dw)
   1318 {
   1319 	struct workqueue_struct *wq;
   1320 	bool cancelled_p;
   1321 
   1322 	/* If there's no workqueue, nothing to cancel.  */
   1323 	if ((wq = work_queue(&dw->work)) == NULL)
   1324 		return false;
   1325 
   1326 	mutex_enter(&wq->wq_lock);
   1327 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1328 		cancelled_p = false;
   1329 	} else {
   1330 		switch (dw->dw_state) {
   1331 		case DELAYED_WORK_IDLE:
   1332 			/*
   1333 			 * It is either on the queue or already running
   1334 			 * or both.
   1335 			 */
   1336 			if (work_claimed(&dw->work, wq)) {
   1337 				/* On the queue.  Remove and release.  */
   1338 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1339 				    work_entry);
   1340 				SDT_PROBE2(sdt, linux, work, cancel,
   1341 				    &dw->work, wq);
   1342 				release_work(&dw->work, wq);
   1343 				/* Can't dereference dw after this point.  */
   1344 				cancelled_p = true;
   1345 			} else {
   1346 				/* Not on the queue, so didn't cancel. */
   1347 				cancelled_p = false;
   1348 			}
   1349 			/* If it's still running, wait for it to complete.  */
   1350 			if (wq->wq_current_work == &dw->work)
   1351 				wait_for_current_work(&dw->work, wq);
   1352 			break;
   1353 		case DELAYED_WORK_SCHEDULED:
   1354 			/*
   1355 			 * If it is scheduled, mark it cancelled and
   1356 			 * try to stop the callout before it starts.
   1357 			 *
   1358 			 * If it's too late and the callout has already
   1359 			 * begun to execute, we must wait for it to
   1360 			 * complete.  But we got in soon enough to ask
   1361 			 * the callout not to run, so we successfully
   1362 			 * cancelled it in that case.
   1363 			 *
   1364 			 * If we stopped the callout before it started,
   1365 			 * then we must destroy the callout and
   1366 			 * dissociate it from the workqueue ourselves.
   1367 			 */
   1368 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1369 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1370 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1371 				cancel_delayed_work_done(wq, dw);
   1372 			cancelled_p = true;
   1373 			break;
   1374 		case DELAYED_WORK_RESCHEDULED:
   1375 			/*
   1376 			 * If it is being rescheduled, the callout has
   1377 			 * already fired.  We must ask it to cancel and
   1378 			 * wait for it to complete.
   1379 			 */
   1380 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1381 			dw->dw_resched = -1;
   1382 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1383 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1384 			cancelled_p = true;
   1385 			break;
   1386 		case DELAYED_WORK_CANCELLED:
   1387 			/*
   1388 			 * If it is being cancelled, the callout has
   1389 			 * already fired.  We need only wait for it to
   1390 			 * complete.  Someone else, however, claims
   1391 			 * credit for cancelling it.
   1392 			 */
   1393 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1394 			cancelled_p = false;
   1395 			break;
   1396 		default:
   1397 			panic("invalid delayed work state: %d",
   1398 			    dw->dw_state);
   1399 		}
   1400 	}
   1401 	mutex_exit(&wq->wq_lock);
   1402 
   1403 	return cancelled_p;
   1404 }
   1405 
   1406 /*
   1408  * Flush
   1409  */
   1410 
   1411 /*
   1412  * flush_scheduled_work()
   1413  *
   1414  *	Wait for all work queued on system_wq to complete.  This does
   1415  *	not include delayed work.
   1416  */
   1417 void
   1418 flush_scheduled_work(void)
   1419 {
   1420 
   1421 	flush_workqueue(system_wq);
   1422 }
   1423 
   1424 /*
   1425  * flush_workqueue_locked(wq)
   1426  *
   1427  *	Wait for all work queued on wq to complete.  This does not
   1428  *	include delayed work.  True if there was work to be flushed,
   1429  *	false it the queue was empty.
   1430  *
   1431  *	Caller must hold wq's lock.
   1432  */
   1433 static bool
   1434 flush_workqueue_locked(struct workqueue_struct *wq)
   1435 {
   1436 	uint64_t gen;
   1437 	bool work_queued = false;
   1438 
   1439 	KASSERT(mutex_owned(&wq->wq_lock));
   1440 
   1441 	/* Get the current generation number.  */
   1442 	gen = wq->wq_gen;
   1443 
   1444 	/*
   1445 	 * If there's a batch of work in progress, we must wait for the
   1446 	 * worker thread to finish that batch.
   1447 	 */
   1448 	if (wq->wq_current_work != NULL) {
   1449 		gen++;
   1450 		work_queued = true;
   1451 	}
   1452 
   1453 	/*
   1454 	 * If there's any work yet to be claimed from the queue by the
   1455 	 * worker thread, we must wait for it to finish one more batch
   1456 	 * too.
   1457 	 */
   1458 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue)) {
   1459 		gen++;
   1460 		work_queued = true;
   1461 	}
   1462 
   1463 	/* Wait until the generation number has caught up.  */
   1464 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
   1465 	while (wq->wq_gen < gen)
   1466 		cv_wait(&wq->wq_cv, &wq->wq_lock);
   1467 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
   1468 
   1469 	/* Return whether we had to wait for anything.  */
   1470 	return work_queued;
   1471 }
   1472 
   1473 /*
   1474  * flush_workqueue(wq)
   1475  *
   1476  *	Wait for all work queued on wq to complete.  This does not
   1477  *	include delayed work.
   1478  */
   1479 void
   1480 flush_workqueue(struct workqueue_struct *wq)
   1481 {
   1482 
   1483 	mutex_enter(&wq->wq_lock);
   1484 	(void)flush_workqueue_locked(wq);
   1485 	mutex_exit(&wq->wq_lock);
   1486 }
   1487 
   1488 /*
   1489  * drain_workqueue(wq)
   1490  *
   1491  *	Repeatedly flush wq until there is no more work.
   1492  */
   1493 void
   1494 drain_workqueue(struct workqueue_struct *wq)
   1495 {
   1496 	unsigned ntries = 0;
   1497 
   1498 	mutex_enter(&wq->wq_lock);
   1499 	while (flush_workqueue_locked(wq)) {
   1500 		if (ntries++ == 10 || (ntries % 100) == 0)
   1501 			printf("linux workqueue %s"
   1502 			    ": still clogged after %u flushes",
   1503 			    wq->wq_name, ntries);
   1504 	}
   1505 	mutex_exit(&wq->wq_lock);
   1506 }
   1507 
   1508 /*
   1509  * flush_work(work)
   1510  *
   1511  *	If work is queued or currently executing, wait for it to
   1512  *	complete.
   1513  *
   1514  *	Return true if we waited to flush it, false if it was already
   1515  *	idle.
   1516  */
   1517 bool
   1518 flush_work(struct work_struct *work)
   1519 {
   1520 	struct workqueue_struct *wq;
   1521 
   1522 	/* If there's no workqueue, nothing to flush.  */
   1523 	if ((wq = work_queue(work)) == NULL)
   1524 		return false;
   1525 
   1526 	flush_workqueue(wq);
   1527 	return true;
   1528 }
   1529 
   1530 /*
   1531  * flush_delayed_work(dw)
   1532  *
   1533  *	If dw is scheduled to run after a delay, queue it immediately
   1534  *	instead.  Then, if dw is queued or currently executing, wait
   1535  *	for it to complete.
   1536  */
   1537 bool
   1538 flush_delayed_work(struct delayed_work *dw)
   1539 {
   1540 	struct workqueue_struct *wq;
   1541 	bool waited = false;
   1542 
   1543 	/* If there's no workqueue, nothing to flush.  */
   1544 	if ((wq = work_queue(&dw->work)) == NULL)
   1545 		return false;
   1546 
   1547 	mutex_enter(&wq->wq_lock);
   1548 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1549 		/*
   1550 		 * Moved off the queue already (and possibly to another
   1551 		 * queue, though that would be ill-advised), so it must
   1552 		 * have completed, and we have nothing more to do.
   1553 		 */
   1554 		waited = false;
   1555 	} else {
   1556 		switch (dw->dw_state) {
   1557 		case DELAYED_WORK_IDLE:
   1558 			/*
   1559 			 * It has a workqueue assigned and the callout
   1560 			 * is idle, so it must be in progress or on the
   1561 			 * queue.  In that case, we'll wait for it to
   1562 			 * complete.
   1563 			 */
   1564 			break;
   1565 		case DELAYED_WORK_SCHEDULED:
   1566 		case DELAYED_WORK_RESCHEDULED:
   1567 		case DELAYED_WORK_CANCELLED:
   1568 			/*
   1569 			 * The callout is scheduled, and may have even
   1570 			 * started.  Mark it as scheduled so that if
   1571 			 * the callout has fired it will queue the work
   1572 			 * itself.  Try to stop the callout -- if we
   1573 			 * can, queue the work now; if we can't, wait
   1574 			 * for the callout to complete, which entails
   1575 			 * queueing it.
   1576 			 */
   1577 			dw->dw_state = DELAYED_WORK_SCHEDULED;
   1578 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
   1579 				/*
   1580 				 * We stopped it before it ran.  No
   1581 				 * state change in the interim is
   1582 				 * possible.  Destroy the callout and
   1583 				 * queue it ourselves.
   1584 				 */
   1585 				KASSERT(dw->dw_state ==
   1586 				    DELAYED_WORK_SCHEDULED);
   1587 				dw_callout_destroy(wq, dw);
   1588 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
   1589 				    work_entry);
   1590 				cv_broadcast(&wq->wq_cv);
   1591 				SDT_PROBE2(sdt, linux, work, queue,
   1592 				    &dw->work, wq);
   1593 			}
   1594 			break;
   1595 		default:
   1596 			panic("invalid delayed work state: %d", dw->dw_state);
   1597 		}
   1598 		/*
   1599 		 * Waiting for the whole queue to flush is overkill,
   1600 		 * but doesn't hurt.
   1601 		 */
   1602 		(void)flush_workqueue_locked(wq);
   1603 		waited = true;
   1604 	}
   1605 	mutex_exit(&wq->wq_lock);
   1606 
   1607 	return waited;
   1608 }
   1609 
   1610 /*
   1611  * delayed_work_pending(dw)
   1612  *
   1613  *	True if dw is currently scheduled to execute, false if not.
   1614  */
   1615 bool
   1616 delayed_work_pending(const struct delayed_work *dw)
   1617 {
   1618 
   1619 	return work_pending(&dw->work);
   1620 }
   1621