Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.57
      1 /*	$NetBSD: linux_work.c,v 1.57 2021/12/19 12:11:28 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.57 2021/12/19 12:11:28 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/atomic.h>
     37 #include <sys/callout.h>
     38 #include <sys/condvar.h>
     39 #include <sys/errno.h>
     40 #include <sys/kmem.h>
     41 #include <sys/kthread.h>
     42 #include <sys/lwp.h>
     43 #include <sys/mutex.h>
     44 #ifndef _MODULE
     45 #include <sys/once.h>
     46 #endif
     47 #include <sys/queue.h>
     48 #include <sys/sdt.h>
     49 
     50 #include <linux/workqueue.h>
     51 
     52 TAILQ_HEAD(work_head, work_struct);
     53 TAILQ_HEAD(dwork_head, delayed_work);
     54 
     55 struct workqueue_struct {
     56 	kmutex_t		wq_lock;
     57 	kcondvar_t		wq_cv;
     58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
     59 	struct work_head	wq_rcu;	    /* RCU work scheduled */
     60 	struct work_head	wq_queue;   /* work to run */
     61 	struct work_head	wq_dqueue;  /* delayed work to run now */
     62 	struct work_struct	*wq_current_work;
     63 	int			wq_flags;
     64 	bool			wq_dying;
     65 	uint64_t		wq_gen;
     66 	struct lwp		*wq_lwp;
     67 	const char		*wq_name;
     68 };
     69 
     70 static void __dead	linux_workqueue_thread(void *);
     71 static void		linux_workqueue_timeout(void *);
     72 static bool		work_claimed(struct work_struct *,
     73 			    struct workqueue_struct *);
     74 static struct workqueue_struct *
     75 			work_queue(struct work_struct *);
     76 static bool		acquire_work(struct work_struct *,
     77 			    struct workqueue_struct *);
     78 static void		release_work(struct work_struct *,
     79 			    struct workqueue_struct *);
     80 static void		wait_for_current_work(struct work_struct *,
     81 			    struct workqueue_struct *);
     82 static void		dw_callout_init(struct workqueue_struct *,
     83 			    struct delayed_work *);
     84 static void		dw_callout_destroy(struct workqueue_struct *,
     85 			    struct delayed_work *);
     86 static void		cancel_delayed_work_done(struct workqueue_struct *,
     87 			    struct delayed_work *);
     88 
     89 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
     90     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     91 SDT_PROBE_DEFINE2(sdt, linux, work, release,
     92     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     93 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
     94     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     95 SDT_PROBE_DEFINE2(sdt, linux, work, rcu,
     96     "struct rcu_work *"/*work*/, "struct workqueue_struct *"/*wq*/);
     97 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
     98     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
     99 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
    100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
    101     "unsigned long"/*ticks*/);
    102 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
    103     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    104 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
    105     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    106 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
    107     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
    108 SDT_PROBE_DEFINE2(sdt, linux, work, run,
    109     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    110 SDT_PROBE_DEFINE2(sdt, linux, work, done,
    111     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
    112 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
    113     "struct workqueue_struct *"/*wq*/);
    114 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
    115     "struct workqueue_struct *"/*wq*/);
    116 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
    117     "struct workqueue_struct *"/*wq*/);
    118 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
    119     "struct workqueue_struct *"/*wq*/);
    120 
    121 static specificdata_key_t workqueue_key __read_mostly;
    122 
    123 struct workqueue_struct	*system_highpri_wq __read_mostly;
    124 struct workqueue_struct	*system_long_wq __read_mostly;
    125 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
    126 struct workqueue_struct	*system_unbound_wq __read_mostly;
    127 struct workqueue_struct	*system_wq __read_mostly;
    128 
    129 static inline uintptr_t
    130 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
    131 {
    132 
    133 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
    134 }
    135 
    136 /*
    137  * linux_workqueue_init()
    138  *
    139  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
    140  *	NetBSD error on failure.
    141  */
    142 static int
    143 linux_workqueue_init0(void)
    144 {
    145 	int error;
    146 
    147 	error = lwp_specific_key_create(&workqueue_key, NULL);
    148 	if (error)
    149 		goto out;
    150 
    151 	system_highpri_wq = alloc_ordered_workqueue("lnxhipwq", 0);
    152 	if (system_highpri_wq == NULL) {
    153 		error = ENOMEM;
    154 		goto out;
    155 	}
    156 
    157 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
    158 	if (system_long_wq == NULL) {
    159 		error = ENOMEM;
    160 		goto out;
    161 	}
    162 
    163 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    164 	if (system_power_efficient_wq == NULL) {
    165 		error = ENOMEM;
    166 		goto out;
    167 	}
    168 
    169 	system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
    170 	if (system_unbound_wq == NULL) {
    171 		error = ENOMEM;
    172 		goto out;
    173 	}
    174 
    175 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
    176 	if (system_wq == NULL) {
    177 		error = ENOMEM;
    178 		goto out;
    179 	}
    180 
    181 	/* Success!  */
    182 	error = 0;
    183 
    184 out:	if (error) {
    185 		if (system_highpri_wq)
    186 			destroy_workqueue(system_highpri_wq);
    187 		if (system_long_wq)
    188 			destroy_workqueue(system_long_wq);
    189 		if (system_power_efficient_wq)
    190 			destroy_workqueue(system_power_efficient_wq);
    191 		if (system_unbound_wq)
    192 			destroy_workqueue(system_unbound_wq);
    193 		if (system_wq)
    194 			destroy_workqueue(system_wq);
    195 		if (workqueue_key)
    196 			lwp_specific_key_delete(workqueue_key);
    197 	}
    198 
    199 	return error;
    200 }
    201 
    202 /*
    203  * linux_workqueue_fini()
    204  *
    205  *	Destroy the Linux workqueue subsystem.  Never fails.
    206  */
    207 static void
    208 linux_workqueue_fini0(void)
    209 {
    210 
    211 	destroy_workqueue(system_power_efficient_wq);
    212 	destroy_workqueue(system_long_wq);
    213 	destroy_workqueue(system_wq);
    214 	lwp_specific_key_delete(workqueue_key);
    215 }
    216 
    217 #ifndef _MODULE
    218 static ONCE_DECL(linux_workqueue_init_once);
    219 #endif
    220 
    221 int
    222 linux_workqueue_init(void)
    223 {
    224 #ifdef _MODULE
    225 	return linux_workqueue_init0();
    226 #else
    227 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
    228 #endif
    229 }
    230 
    231 void
    232 linux_workqueue_fini(void)
    233 {
    234 #ifdef _MODULE
    235 	return linux_workqueue_fini0();
    236 #else
    237 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
    238 #endif
    239 }
    240 
    241 /*
    243  * Workqueues
    244  */
    245 
    246 /*
    247  * alloc_workqueue(name, flags, max_active)
    248  *
    249  *	Create a workqueue of the given name.  max_active is the
    250  *	maximum number of work items in flight, or 0 for the default.
    251  *	Return NULL on failure, pointer to struct workqueue_struct
    252  *	object on success.
    253  */
    254 struct workqueue_struct *
    255 alloc_workqueue(const char *name, int flags, unsigned max_active)
    256 {
    257 	struct workqueue_struct *wq;
    258 	int error;
    259 
    260 	KASSERT(max_active == 0 || max_active == 1);
    261 
    262 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
    263 
    264 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
    265 	cv_init(&wq->wq_cv, name);
    266 	TAILQ_INIT(&wq->wq_delayed);
    267 	TAILQ_INIT(&wq->wq_rcu);
    268 	TAILQ_INIT(&wq->wq_queue);
    269 	TAILQ_INIT(&wq->wq_dqueue);
    270 	wq->wq_current_work = NULL;
    271 	wq->wq_flags = 0;
    272 	wq->wq_dying = false;
    273 	wq->wq_gen = 0;
    274 	wq->wq_lwp = NULL;
    275 	wq->wq_name = name;
    276 
    277 	error = kthread_create(PRI_NONE,
    278 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
    279 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
    280 	if (error)
    281 		goto fail0;
    282 
    283 	return wq;
    284 
    285 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    286 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    287 	KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
    288 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    289 	cv_destroy(&wq->wq_cv);
    290 	mutex_destroy(&wq->wq_lock);
    291 	kmem_free(wq, sizeof(*wq));
    292 	return NULL;
    293 }
    294 
    295 /*
    296  * alloc_ordered_workqueue(name, flags)
    297  *
    298  *	Same as alloc_workqueue(name, flags, 1).
    299  */
    300 struct workqueue_struct *
    301 alloc_ordered_workqueue(const char *name, int flags)
    302 {
    303 
    304 	return alloc_workqueue(name, flags, 1);
    305 }
    306 
    307 /*
    308  * destroy_workqueue(wq)
    309  *
    310  *	Destroy a workqueue created with wq.  Cancel any pending
    311  *	delayed work.  Wait for all queued work to complete.
    312  *
    313  *	May sleep.
    314  */
    315 void
    316 destroy_workqueue(struct workqueue_struct *wq)
    317 {
    318 
    319 	/*
    320 	 * Cancel all delayed work.  We do this first because any
    321 	 * delayed work that that has already timed out, which we can't
    322 	 * cancel, may have queued new work.
    323 	 */
    324 	mutex_enter(&wq->wq_lock);
    325 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
    326 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
    327 
    328 		KASSERT(work_queue(&dw->work) == wq);
    329 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
    330 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    331 			dw->dw_state == DELAYED_WORK_CANCELLED),
    332 		    "delayed work %p in bad state: %d",
    333 		    dw, dw->dw_state);
    334 
    335 		/*
    336 		 * Mark it cancelled and try to stop the callout before
    337 		 * it starts.
    338 		 *
    339 		 * If it's too late and the callout has already begun
    340 		 * to execute, then it will notice that we asked to
    341 		 * cancel it and remove itself from the queue before
    342 		 * returning.
    343 		 *
    344 		 * If we stopped the callout before it started,
    345 		 * however, then we can safely destroy the callout and
    346 		 * dissociate it from the workqueue ourselves.
    347 		 */
    348 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
    349 		dw->dw_state = DELAYED_WORK_CANCELLED;
    350 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
    351 			cancel_delayed_work_done(wq, dw);
    352 	}
    353 	mutex_exit(&wq->wq_lock);
    354 
    355 	/* Wait for all scheduled RCU work to complete.  */
    356 	mutex_enter(&wq->wq_lock);
    357 	while (!TAILQ_EMPTY(&wq->wq_rcu))
    358 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    359 	mutex_exit(&wq->wq_lock);
    360 
    361 	/*
    362 	 * At this point, no new work can be put on the queue.
    363 	 */
    364 
    365 	/* Tell the thread to exit.  */
    366 	mutex_enter(&wq->wq_lock);
    367 	wq->wq_dying = true;
    368 	cv_broadcast(&wq->wq_cv);
    369 	mutex_exit(&wq->wq_lock);
    370 
    371 	/* Wait for it to exit.  */
    372 	(void)kthread_join(wq->wq_lwp);
    373 
    374 	KASSERT(wq->wq_dying);
    375 	KASSERT(wq->wq_flags == 0);
    376 	KASSERT(wq->wq_current_work == NULL);
    377 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
    378 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    379 	KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
    380 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    381 	cv_destroy(&wq->wq_cv);
    382 	mutex_destroy(&wq->wq_lock);
    383 
    384 	kmem_free(wq, sizeof(*wq));
    385 }
    386 
    387 /*
    389  * Work thread and callout
    390  */
    391 
    392 /*
    393  * linux_workqueue_thread(cookie)
    394  *
    395  *	Main function for a workqueue's worker thread.  Waits until
    396  *	there is work queued, grabs a batch of work off the queue,
    397  *	executes it all, bumps the generation number, and repeats,
    398  *	until dying.
    399  */
    400 static void __dead
    401 linux_workqueue_thread(void *cookie)
    402 {
    403 	struct workqueue_struct *const wq = cookie;
    404 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
    405 	struct work_struct marker, *work;
    406 	unsigned i;
    407 
    408 	lwp_setspecific(workqueue_key, wq);
    409 
    410 	mutex_enter(&wq->wq_lock);
    411 	for (;;) {
    412 		/*
    413 		 * Wait until there's activity.  If there's no work and
    414 		 * we're dying, stop here.
    415 		 */
    416 		if (TAILQ_EMPTY(&wq->wq_queue) &&
    417 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
    418 			if (wq->wq_dying)
    419 				break;
    420 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    421 			continue;
    422 		}
    423 
    424 		/*
    425 		 * Start a batch of work.  Use a marker to delimit when
    426 		 * the batch ends so we can advance the generation
    427 		 * after the batch.
    428 		 */
    429 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
    430 		for (i = 0; i < 2; i++) {
    431 			if (TAILQ_EMPTY(q[i]))
    432 				continue;
    433 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
    434 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
    435 				void (*func)(struct work_struct *);
    436 
    437 				KASSERT(work_queue(work) == wq);
    438 				KASSERT(work_claimed(work, wq));
    439 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
    440 					container_of(work, struct delayed_work,
    441 					    work)->dw_state ==
    442 					DELAYED_WORK_IDLE),
    443 				    "delayed work %p queued and scheduled",
    444 				    work);
    445 
    446 				TAILQ_REMOVE(q[i], work, work_entry);
    447 				KASSERT(wq->wq_current_work == NULL);
    448 				wq->wq_current_work = work;
    449 				func = work->func;
    450 				release_work(work, wq);
    451 				/* Can't dereference work after this point.  */
    452 
    453 				mutex_exit(&wq->wq_lock);
    454 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
    455 				(*func)(work);
    456 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
    457 				mutex_enter(&wq->wq_lock);
    458 
    459 				KASSERT(wq->wq_current_work == work);
    460 				wq->wq_current_work = NULL;
    461 				cv_broadcast(&wq->wq_cv);
    462 			}
    463 			TAILQ_REMOVE(q[i], &marker, work_entry);
    464 		}
    465 
    466 		/* Notify cancel that we've completed a batch of work.  */
    467 		wq->wq_gen++;
    468 		cv_broadcast(&wq->wq_cv);
    469 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
    470 	}
    471 	mutex_exit(&wq->wq_lock);
    472 
    473 	kthread_exit(0);
    474 }
    475 
    476 /*
    477  * linux_workqueue_timeout(cookie)
    478  *
    479  *	Delayed work timeout callback.
    480  *
    481  *	- If scheduled, queue it.
    482  *	- If rescheduled, callout_schedule ourselves again.
    483  *	- If cancelled, destroy the callout and release the work from
    484  *        the workqueue.
    485  */
    486 static void
    487 linux_workqueue_timeout(void *cookie)
    488 {
    489 	struct delayed_work *const dw = cookie;
    490 	struct workqueue_struct *const wq = work_queue(&dw->work);
    491 
    492 	KASSERTMSG(wq != NULL,
    493 	    "delayed work %p state %d resched %d",
    494 	    dw, dw->dw_state, dw->dw_resched);
    495 
    496 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
    497 
    498 	mutex_enter(&wq->wq_lock);
    499 	KASSERT(work_queue(&dw->work) == wq);
    500 	switch (dw->dw_state) {
    501 	case DELAYED_WORK_IDLE:
    502 		panic("delayed work callout uninitialized: %p", dw);
    503 	case DELAYED_WORK_SCHEDULED:
    504 		dw_callout_destroy(wq, dw);
    505 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
    506 		cv_broadcast(&wq->wq_cv);
    507 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    508 		break;
    509 	case DELAYED_WORK_RESCHEDULED:
    510 		KASSERT(dw->dw_resched >= 0);
    511 		callout_schedule(&dw->dw_callout, dw->dw_resched);
    512 		dw->dw_state = DELAYED_WORK_SCHEDULED;
    513 		dw->dw_resched = -1;
    514 		break;
    515 	case DELAYED_WORK_CANCELLED:
    516 		cancel_delayed_work_done(wq, dw);
    517 		/* Can't dereference dw after this point.  */
    518 		goto out;
    519 	default:
    520 		panic("delayed work callout in bad state: %p", dw);
    521 	}
    522 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
    523 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
    524 out:	mutex_exit(&wq->wq_lock);
    525 }
    526 
    527 /*
    528  * current_work()
    529  *
    530  *	If in a workqueue worker thread, return the work it is
    531  *	currently executing.  Otherwise return NULL.
    532  */
    533 struct work_struct *
    534 current_work(void)
    535 {
    536 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
    537 
    538 	/* If we're not a workqueue thread, then there's no work.  */
    539 	if (wq == NULL)
    540 		return NULL;
    541 
    542 	/*
    543 	 * Otherwise, this should be possible only while work is in
    544 	 * progress.  Return the current work item.
    545 	 */
    546 	KASSERT(wq->wq_current_work != NULL);
    547 	return wq->wq_current_work;
    548 }
    549 
    550 /*
    552  * Work
    553  */
    554 
    555 /*
    556  * INIT_WORK(work, fn)
    557  *
    558  *	Initialize work for use with a workqueue to call fn in a worker
    559  *	thread.  There is no corresponding destruction operation.
    560  */
    561 void
    562 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    563 {
    564 
    565 	work->work_owner = 0;
    566 	work->func = fn;
    567 }
    568 
    569 /*
    570  * work_claimed(work, wq)
    571  *
    572  *	True if work is currently claimed by a workqueue, meaning it is
    573  *	either on the queue or scheduled in a callout.  The workqueue
    574  *	must be wq, and caller must hold wq's lock.
    575  */
    576 static bool
    577 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
    578 {
    579 
    580 	KASSERT(work_queue(work) == wq);
    581 	KASSERT(mutex_owned(&wq->wq_lock));
    582 
    583 	return atomic_load_relaxed(&work->work_owner) & 1;
    584 }
    585 
    586 /*
    587  * work_pending(work)
    588  *
    589  *	True if work is currently claimed by any workqueue, scheduled
    590  *	to run on that workqueue.
    591  */
    592 bool
    593 work_pending(const struct work_struct *work)
    594 {
    595 
    596 	return atomic_load_relaxed(&work->work_owner) & 1;
    597 }
    598 
    599 /*
    600  * work_queue(work)
    601  *
    602  *	Return the last queue that work was queued on, or NULL if it
    603  *	was never queued.
    604  */
    605 static struct workqueue_struct *
    606 work_queue(struct work_struct *work)
    607 {
    608 
    609 	return (struct workqueue_struct *)
    610 	    (atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
    611 }
    612 
    613 /*
    614  * acquire_work(work, wq)
    615  *
    616  *	Try to claim work for wq.  If work is already claimed, it must
    617  *	be claimed by wq; return false.  If work is not already
    618  *	claimed, claim it, issue a memory barrier to match any prior
    619  *	release_work, and return true.
    620  *
    621  *	Caller must hold wq's lock.
    622  */
    623 static bool
    624 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
    625 {
    626 	uintptr_t owner0, owner;
    627 
    628 	KASSERT(mutex_owned(&wq->wq_lock));
    629 	KASSERT(((uintptr_t)wq & 1) == 0);
    630 
    631 	owner = (uintptr_t)wq | 1;
    632 	do {
    633 		owner0 = atomic_load_relaxed(&work->work_owner);
    634 		if (owner0 & 1) {
    635 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
    636 			return false;
    637 		}
    638 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
    639 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
    640 	    owner0);
    641 
    642 	KASSERT(work_queue(work) == wq);
    643 	membar_enter();
    644 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
    645 	return true;
    646 }
    647 
    648 /*
    649  * release_work(work, wq)
    650  *
    651  *	Issue a memory barrier to match any subsequent acquire_work and
    652  *	dissociate work from wq.
    653  *
    654  *	Caller must hold wq's lock and work must be associated with wq.
    655  */
    656 static void
    657 release_work(struct work_struct *work, struct workqueue_struct *wq)
    658 {
    659 
    660 	KASSERT(work_queue(work) == wq);
    661 	KASSERT(mutex_owned(&wq->wq_lock));
    662 
    663 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
    664 	membar_exit();
    665 
    666 	/*
    667 	 * Non-interlocked r/m/w is safe here because nobody else can
    668 	 * write to this while the claimed bit is set and the workqueue
    669 	 * lock is held.
    670 	 */
    671 	atomic_store_relaxed(&work->work_owner,
    672 	    atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
    673 }
    674 
    675 /*
    676  * schedule_work(work)
    677  *
    678  *	If work is not already queued on system_wq, queue it to be run
    679  *	by system_wq's worker thread when it next can.  True if it was
    680  *	newly queued, false if it was already queued.  If the work was
    681  *	already running, queue it to run again.
    682  *
    683  *	Caller must ensure work is not queued to run on a different
    684  *	workqueue.
    685  */
    686 bool
    687 schedule_work(struct work_struct *work)
    688 {
    689 
    690 	return queue_work(system_wq, work);
    691 }
    692 
    693 /*
    694  * queue_work(wq, work)
    695  *
    696  *	If work is not already queued on wq, queue it to be run by wq's
    697  *	worker thread when it next can.  True if it was newly queued,
    698  *	false if it was already queued.  If the work was already
    699  *	running, queue it to run again.
    700  *
    701  *	Caller must ensure work is not queued to run on a different
    702  *	workqueue.
    703  */
    704 bool
    705 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    706 {
    707 	bool newly_queued;
    708 
    709 	KASSERT(wq != NULL);
    710 
    711 	mutex_enter(&wq->wq_lock);
    712 	if (__predict_true(acquire_work(work, wq))) {
    713 		/*
    714 		 * It wasn't on any workqueue at all.  Put it on this
    715 		 * one, and signal the worker thread that there is work
    716 		 * to do.
    717 		 */
    718 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    719 		cv_broadcast(&wq->wq_cv);
    720 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
    721 		newly_queued = true;
    722 	} else {
    723 		/*
    724 		 * It was already on this workqueue.  Nothing to do
    725 		 * since it is already queued.
    726 		 */
    727 		newly_queued = false;
    728 	}
    729 	mutex_exit(&wq->wq_lock);
    730 
    731 	return newly_queued;
    732 }
    733 
    734 /*
    735  * cancel_work(work)
    736  *
    737  *	If work was queued, remove it from the queue and return true.
    738  *	If work was not queued, return false.  Work may still be
    739  *	running when this returns.
    740  */
    741 bool
    742 cancel_work(struct work_struct *work)
    743 {
    744 	struct workqueue_struct *wq;
    745 	bool cancelled_p = false;
    746 
    747 	/* If there's no workqueue, nothing to cancel.   */
    748 	if ((wq = work_queue(work)) == NULL)
    749 		goto out;
    750 
    751 	mutex_enter(&wq->wq_lock);
    752 	if (__predict_false(work_queue(work) != wq)) {
    753 		/*
    754 		 * It has finished execution or been cancelled by
    755 		 * another thread, and has been moved off the
    756 		 * workqueue, so it's too to cancel.
    757 		 */
    758 		cancelled_p = false;
    759 	} else {
    760 		/* Check whether it's on the queue.  */
    761 		if (work_claimed(work, wq)) {
    762 			/*
    763 			 * It is still on the queue.  Take it off the
    764 			 * queue and report successful cancellation.
    765 			 */
    766 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    767 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    768 			release_work(work, wq);
    769 			/* Can't dereference work after this point.  */
    770 			cancelled_p = true;
    771 		} else {
    772 			/* Not on the queue.  Couldn't cancel it.  */
    773 			cancelled_p = false;
    774 		}
    775 	}
    776 	mutex_exit(&wq->wq_lock);
    777 
    778 out:	return cancelled_p;
    779 }
    780 
    781 /*
    782  * cancel_work_sync(work)
    783  *
    784  *	If work was queued, remove it from the queue and return true.
    785  *	If work was not queued, return false.  Either way, if work is
    786  *	currently running, wait for it to complete.
    787  *
    788  *	May sleep.
    789  */
    790 bool
    791 cancel_work_sync(struct work_struct *work)
    792 {
    793 	struct workqueue_struct *wq;
    794 	bool cancelled_p = false;
    795 
    796 	/* If there's no workqueue, nothing to cancel.   */
    797 	if ((wq = work_queue(work)) == NULL)
    798 		goto out;
    799 
    800 	mutex_enter(&wq->wq_lock);
    801 	if (__predict_false(work_queue(work) != wq)) {
    802 		/*
    803 		 * It has finished execution or been cancelled by
    804 		 * another thread, and has been moved off the
    805 		 * workqueue, so it's too late to cancel.
    806 		 */
    807 		cancelled_p = false;
    808 	} else {
    809 		/* Check whether it's on the queue.  */
    810 		if (work_claimed(work, wq)) {
    811 			/*
    812 			 * It is still on the queue.  Take it off the
    813 			 * queue and report successful cancellation.
    814 			 */
    815 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    816 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
    817 			release_work(work, wq);
    818 			/* Can't dereference work after this point.  */
    819 			cancelled_p = true;
    820 		} else {
    821 			/* Not on the queue.  Couldn't cancel it.  */
    822 			cancelled_p = false;
    823 		}
    824 		/* If it's still running, wait for it to complete.  */
    825 		if (wq->wq_current_work == work)
    826 			wait_for_current_work(work, wq);
    827 	}
    828 	mutex_exit(&wq->wq_lock);
    829 
    830 out:	return cancelled_p;
    831 }
    832 
    833 /*
    834  * wait_for_current_work(work, wq)
    835  *
    836  *	wq must be currently executing work.  Wait for it to finish.
    837  *
    838  *	Does not dereference work.
    839  */
    840 static void
    841 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
    842 {
    843 	uint64_t gen;
    844 
    845 	KASSERT(mutex_owned(&wq->wq_lock));
    846 	KASSERT(wq->wq_current_work == work);
    847 
    848 	/* Wait only one generation in case it gets requeued quickly.  */
    849 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
    850 	gen = wq->wq_gen;
    851 	do {
    852 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    853 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
    854 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
    855 }
    856 
    857 /*
    859  * Delayed work
    860  */
    861 
    862 /*
    863  * INIT_DELAYED_WORK(dw, fn)
    864  *
    865  *	Initialize dw for use with a workqueue to call fn in a worker
    866  *	thread after a delay.  There is no corresponding destruction
    867  *	operation.
    868  */
    869 void
    870 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    871 {
    872 
    873 	INIT_WORK(&dw->work, fn);
    874 	dw->dw_state = DELAYED_WORK_IDLE;
    875 	dw->dw_resched = -1;
    876 
    877 	/*
    878 	 * Defer callout_init until we are going to schedule the
    879 	 * callout, which can then callout_destroy it, because
    880 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
    881 	 * we have no opportunity to call callout_destroy.
    882 	 */
    883 }
    884 
    885 /*
    886  * schedule_delayed_work(dw, ticks)
    887  *
    888  *	If it is not currently scheduled, schedule dw to run after
    889  *	ticks on system_wq.  If currently executing and not already
    890  *	rescheduled, reschedule it.  True if it was newly scheduled,
    891  *	false if it was already scheduled.
    892  *
    893  *	If ticks == 0, queue it to run as soon as the worker can,
    894  *	without waiting for the next callout tick to run.
    895  */
    896 bool
    897 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    898 {
    899 
    900 	return queue_delayed_work(system_wq, dw, ticks);
    901 }
    902 
    903 /*
    904  * dw_callout_init(wq, dw)
    905  *
    906  *	Initialize the callout of dw and transition to
    907  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
    908  */
    909 static void
    910 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
    911 {
    912 
    913 	KASSERT(mutex_owned(&wq->wq_lock));
    914 	KASSERT(work_queue(&dw->work) == wq);
    915 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    916 
    917 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    918 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
    919 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    920 	dw->dw_state = DELAYED_WORK_SCHEDULED;
    921 }
    922 
    923 /*
    924  * dw_callout_destroy(wq, dw)
    925  *
    926  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
    927  */
    928 static void
    929 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
    930 {
    931 
    932 	KASSERT(mutex_owned(&wq->wq_lock));
    933 	KASSERT(work_queue(&dw->work) == wq);
    934 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
    935 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    936 	    dw->dw_state == DELAYED_WORK_CANCELLED);
    937 
    938 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    939 	callout_destroy(&dw->dw_callout);
    940 	dw->dw_resched = -1;
    941 	dw->dw_state = DELAYED_WORK_IDLE;
    942 }
    943 
    944 /*
    945  * cancel_delayed_work_done(wq, dw)
    946  *
    947  *	Complete cancellation of a delayed work: transition from
    948  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
    949  *	workqueue.  Caller must not dereference dw after this returns.
    950  */
    951 static void
    952 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
    953 {
    954 
    955 	KASSERT(mutex_owned(&wq->wq_lock));
    956 	KASSERT(work_queue(&dw->work) == wq);
    957 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
    958 
    959 	dw_callout_destroy(wq, dw);
    960 	release_work(&dw->work, wq);
    961 	/* Can't dereference dw after this point.  */
    962 }
    963 
    964 /*
    965  * queue_delayed_work(wq, dw, ticks)
    966  *
    967  *	If it is not currently scheduled, schedule dw to run after
    968  *	ticks on wq.  If currently queued, remove it from the queue
    969  *	first.
    970  *
    971  *	If ticks == 0, queue it to run as soon as the worker can,
    972  *	without waiting for the next callout tick to run.
    973  */
    974 bool
    975 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    976     unsigned long ticks)
    977 {
    978 	bool newly_queued;
    979 
    980 	mutex_enter(&wq->wq_lock);
    981 	if (__predict_true(acquire_work(&dw->work, wq))) {
    982 		/*
    983 		 * It wasn't on any workqueue at all.  Schedule it to
    984 		 * run on this one.
    985 		 */
    986 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    987 		if (ticks == 0) {
    988 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
    989 			    work_entry);
    990 			cv_broadcast(&wq->wq_cv);
    991 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
    992 		} else {
    993 			/*
    994 			 * Initialize a callout and schedule to run
    995 			 * after a delay.
    996 			 */
    997 			dw_callout_init(wq, dw);
    998 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    999 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
   1000 		}
   1001 		newly_queued = true;
   1002 	} else {
   1003 		/* It was already on this workqueue.  */
   1004 		switch (dw->dw_state) {
   1005 		case DELAYED_WORK_IDLE:
   1006 		case DELAYED_WORK_SCHEDULED:
   1007 		case DELAYED_WORK_RESCHEDULED:
   1008 			/* On the queue or already scheduled.  Leave it.  */
   1009 			newly_queued = false;
   1010 			break;
   1011 		case DELAYED_WORK_CANCELLED:
   1012 			/*
   1013 			 * Scheduled and the callout began, but it was
   1014 			 * cancelled.  Reschedule it.
   1015 			 */
   1016 			if (ticks == 0) {
   1017 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1018 				SDT_PROBE2(sdt, linux, work, queue,
   1019 				    &dw->work, wq);
   1020 			} else {
   1021 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1022 				dw->dw_resched = MIN(INT_MAX, ticks);
   1023 				SDT_PROBE3(sdt, linux, work, schedule,
   1024 				    dw, wq, ticks);
   1025 			}
   1026 			newly_queued = true;
   1027 			break;
   1028 		default:
   1029 			panic("invalid delayed work state: %d",
   1030 			    dw->dw_state);
   1031 		}
   1032 	}
   1033 	mutex_exit(&wq->wq_lock);
   1034 
   1035 	return newly_queued;
   1036 }
   1037 
   1038 /*
   1039  * mod_delayed_work(wq, dw, ticks)
   1040  *
   1041  *	Schedule dw to run after ticks.  If scheduled or queued,
   1042  *	reschedule.  If ticks == 0, run without delay.
   1043  *
   1044  *	True if it modified the timer of an already scheduled work,
   1045  *	false if it newly scheduled the work.
   1046  */
   1047 bool
   1048 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
   1049     unsigned long ticks)
   1050 {
   1051 	bool timer_modified;
   1052 
   1053 	mutex_enter(&wq->wq_lock);
   1054 	if (acquire_work(&dw->work, wq)) {
   1055 		/*
   1056 		 * It wasn't on any workqueue at all.  Schedule it to
   1057 		 * run on this one.
   1058 		 */
   1059 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
   1060 		if (ticks == 0) {
   1061 			/*
   1062 			 * Run immediately: put it on the queue and
   1063 			 * signal the worker thread.
   1064 			 */
   1065 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
   1066 			    work_entry);
   1067 			cv_broadcast(&wq->wq_cv);
   1068 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
   1069 		} else {
   1070 			/*
   1071 			 * Initialize a callout and schedule to run
   1072 			 * after a delay.
   1073 			 */
   1074 			dw_callout_init(wq, dw);
   1075 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
   1076 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
   1077 		}
   1078 		timer_modified = false;
   1079 	} else {
   1080 		/* It was already on this workqueue.  */
   1081 		switch (dw->dw_state) {
   1082 		case DELAYED_WORK_IDLE:
   1083 			/* On the queue.  */
   1084 			if (ticks == 0) {
   1085 				/* Leave it be.  */
   1086 				SDT_PROBE2(sdt, linux, work, cancel,
   1087 				    &dw->work, wq);
   1088 				SDT_PROBE2(sdt, linux, work, queue,
   1089 				    &dw->work, wq);
   1090 			} else {
   1091 				/* Remove from the queue and schedule.  */
   1092 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1093 				    work_entry);
   1094 				dw_callout_init(wq, dw);
   1095 				callout_schedule(&dw->dw_callout,
   1096 				    MIN(INT_MAX, ticks));
   1097 				SDT_PROBE2(sdt, linux, work, cancel,
   1098 				    &dw->work, wq);
   1099 				SDT_PROBE3(sdt, linux, work, schedule,
   1100 				    dw, wq, ticks);
   1101 			}
   1102 			timer_modified = true;
   1103 			break;
   1104 		case DELAYED_WORK_SCHEDULED:
   1105 			/*
   1106 			 * It is scheduled to run after a delay.  Try
   1107 			 * to stop it and reschedule it; if we can't,
   1108 			 * either reschedule it or cancel it to put it
   1109 			 * on the queue, and inform the callout.
   1110 			 */
   1111 			if (callout_stop(&dw->dw_callout)) {
   1112 				/* Can't stop, callout has begun.  */
   1113 				if (ticks == 0) {
   1114 					/*
   1115 					 * We don't actually need to do
   1116 					 * anything.  The callout will
   1117 					 * queue it as soon as it gets
   1118 					 * the lock.
   1119 					 */
   1120 					SDT_PROBE2(sdt, linux, work, cancel,
   1121 					    &dw->work, wq);
   1122 					SDT_PROBE2(sdt, linux, work, queue,
   1123 					    &dw->work, wq);
   1124 				} else {
   1125 					/* Ask the callout to reschedule.  */
   1126 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1127 					dw->dw_resched = MIN(INT_MAX, ticks);
   1128 					SDT_PROBE2(sdt, linux, work, cancel,
   1129 					    &dw->work, wq);
   1130 					SDT_PROBE3(sdt, linux, work, schedule,
   1131 					    dw, wq, ticks);
   1132 				}
   1133 			} else {
   1134 				/* We stopped the callout before it began.  */
   1135 				if (ticks == 0) {
   1136 					/*
   1137 					 * Run immediately: destroy the
   1138 					 * callout, put it on the
   1139 					 * queue, and signal the worker
   1140 					 * thread.
   1141 					 */
   1142 					dw_callout_destroy(wq, dw);
   1143 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
   1144 					    &dw->work, work_entry);
   1145 					cv_broadcast(&wq->wq_cv);
   1146 					SDT_PROBE2(sdt, linux, work, cancel,
   1147 					    &dw->work, wq);
   1148 					SDT_PROBE2(sdt, linux, work, queue,
   1149 					    &dw->work, wq);
   1150 				} else {
   1151 					/*
   1152 					 * Reschedule the callout.  No
   1153 					 * state change.
   1154 					 */
   1155 					callout_schedule(&dw->dw_callout,
   1156 					    MIN(INT_MAX, ticks));
   1157 					SDT_PROBE2(sdt, linux, work, cancel,
   1158 					    &dw->work, wq);
   1159 					SDT_PROBE3(sdt, linux, work, schedule,
   1160 					    dw, wq, ticks);
   1161 				}
   1162 			}
   1163 			timer_modified = true;
   1164 			break;
   1165 		case DELAYED_WORK_RESCHEDULED:
   1166 			/*
   1167 			 * Someone rescheduled it after the callout
   1168 			 * started but before the poor thing even had a
   1169 			 * chance to acquire the lock.
   1170 			 */
   1171 			if (ticks == 0) {
   1172 				/*
   1173 				 * We can just switch back to
   1174 				 * DELAYED_WORK_SCHEDULED so that the
   1175 				 * callout will queue the work as soon
   1176 				 * as it gets the lock.
   1177 				 */
   1178 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1179 				dw->dw_resched = -1;
   1180 				SDT_PROBE2(sdt, linux, work, cancel,
   1181 				    &dw->work, wq);
   1182 				SDT_PROBE2(sdt, linux, work, queue,
   1183 				    &dw->work, wq);
   1184 			} else {
   1185 				/* Change the rescheduled time.  */
   1186 				dw->dw_resched = ticks;
   1187 				SDT_PROBE2(sdt, linux, work, cancel,
   1188 				    &dw->work, wq);
   1189 				SDT_PROBE3(sdt, linux, work, schedule,
   1190 				    dw, wq, ticks);
   1191 			}
   1192 			timer_modified = true;
   1193 			break;
   1194 		case DELAYED_WORK_CANCELLED:
   1195 			/*
   1196 			 * Someone cancelled it after the callout
   1197 			 * started but before the poor thing even had a
   1198 			 * chance to acquire the lock.
   1199 			 */
   1200 			if (ticks == 0) {
   1201 				/*
   1202 				 * We can just switch back to
   1203 				 * DELAYED_WORK_SCHEDULED so that the
   1204 				 * callout will queue the work as soon
   1205 				 * as it gets the lock.
   1206 				 */
   1207 				dw->dw_state = DELAYED_WORK_SCHEDULED;
   1208 				SDT_PROBE2(sdt, linux, work, queue,
   1209 				    &dw->work, wq);
   1210 			} else {
   1211 				/* Ask it to reschedule.  */
   1212 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
   1213 				dw->dw_resched = MIN(INT_MAX, ticks);
   1214 				SDT_PROBE3(sdt, linux, work, schedule,
   1215 				    dw, wq, ticks);
   1216 			}
   1217 			timer_modified = false;
   1218 			break;
   1219 		default:
   1220 			panic("invalid delayed work state: %d", dw->dw_state);
   1221 		}
   1222 	}
   1223 	mutex_exit(&wq->wq_lock);
   1224 
   1225 	return timer_modified;
   1226 }
   1227 
   1228 /*
   1229  * cancel_delayed_work(dw)
   1230  *
   1231  *	If work was scheduled or queued, remove it from the schedule or
   1232  *	queue and return true.  If work was not scheduled or queued,
   1233  *	return false.  Note that work may already be running; if it
   1234  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1235  *	will return false, and either way, cancel_delayed_work will NOT
   1236  *	wait for the work to complete.
   1237  */
   1238 bool
   1239 cancel_delayed_work(struct delayed_work *dw)
   1240 {
   1241 	struct workqueue_struct *wq;
   1242 	bool cancelled_p;
   1243 
   1244 	/* If there's no workqueue, nothing to cancel.   */
   1245 	if ((wq = work_queue(&dw->work)) == NULL)
   1246 		return false;
   1247 
   1248 	mutex_enter(&wq->wq_lock);
   1249 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1250 		cancelled_p = false;
   1251 	} else {
   1252 		switch (dw->dw_state) {
   1253 		case DELAYED_WORK_IDLE:
   1254 			/*
   1255 			 * It is either on the queue or already running
   1256 			 * or both.
   1257 			 */
   1258 			if (work_claimed(&dw->work, wq)) {
   1259 				/* On the queue.  Remove and release.  */
   1260 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1261 				    work_entry);
   1262 				SDT_PROBE2(sdt, linux, work, cancel,
   1263 				    &dw->work, wq);
   1264 				release_work(&dw->work, wq);
   1265 				/* Can't dereference dw after this point.  */
   1266 				cancelled_p = true;
   1267 			} else {
   1268 				/* Not on the queue, so didn't cancel.  */
   1269 				cancelled_p = false;
   1270 			}
   1271 			break;
   1272 		case DELAYED_WORK_SCHEDULED:
   1273 			/*
   1274 			 * If it is scheduled, mark it cancelled and
   1275 			 * try to stop the callout before it starts.
   1276 			 *
   1277 			 * If it's too late and the callout has already
   1278 			 * begun to execute, tough.
   1279 			 *
   1280 			 * If we stopped the callout before it started,
   1281 			 * however, then destroy the callout and
   1282 			 * dissociate it from the workqueue ourselves.
   1283 			 */
   1284 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1285 			cancelled_p = true;
   1286 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1287 			if (!callout_stop(&dw->dw_callout))
   1288 				cancel_delayed_work_done(wq, dw);
   1289 			break;
   1290 		case DELAYED_WORK_RESCHEDULED:
   1291 			/*
   1292 			 * If it is being rescheduled, the callout has
   1293 			 * already fired.  We must ask it to cancel.
   1294 			 */
   1295 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1296 			dw->dw_resched = -1;
   1297 			cancelled_p = true;
   1298 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1299 			break;
   1300 		case DELAYED_WORK_CANCELLED:
   1301 			/*
   1302 			 * If it is being cancelled, the callout has
   1303 			 * already fired.  There is nothing more for us
   1304 			 * to do.  Someone else claims credit for
   1305 			 * cancelling it.
   1306 			 */
   1307 			cancelled_p = false;
   1308 			break;
   1309 		default:
   1310 			panic("invalid delayed work state: %d",
   1311 			    dw->dw_state);
   1312 		}
   1313 	}
   1314 	mutex_exit(&wq->wq_lock);
   1315 
   1316 	return cancelled_p;
   1317 }
   1318 
   1319 /*
   1320  * cancel_delayed_work_sync(dw)
   1321  *
   1322  *	If work was scheduled or queued, remove it from the schedule or
   1323  *	queue and return true.  If work was not scheduled or queued,
   1324  *	return false.  Note that work may already be running; if it
   1325  *	hasn't been rescheduled or requeued, then cancel_delayed_work
   1326  *	will return false; either way, wait for it to complete.
   1327  */
   1328 bool
   1329 cancel_delayed_work_sync(struct delayed_work *dw)
   1330 {
   1331 	struct workqueue_struct *wq;
   1332 	bool cancelled_p;
   1333 
   1334 	/* If there's no workqueue, nothing to cancel.  */
   1335 	if ((wq = work_queue(&dw->work)) == NULL)
   1336 		return false;
   1337 
   1338 	mutex_enter(&wq->wq_lock);
   1339 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1340 		cancelled_p = false;
   1341 	} else {
   1342 		switch (dw->dw_state) {
   1343 		case DELAYED_WORK_IDLE:
   1344 			/*
   1345 			 * It is either on the queue or already running
   1346 			 * or both.
   1347 			 */
   1348 			if (work_claimed(&dw->work, wq)) {
   1349 				/* On the queue.  Remove and release.  */
   1350 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
   1351 				    work_entry);
   1352 				SDT_PROBE2(sdt, linux, work, cancel,
   1353 				    &dw->work, wq);
   1354 				release_work(&dw->work, wq);
   1355 				/* Can't dereference dw after this point.  */
   1356 				cancelled_p = true;
   1357 			} else {
   1358 				/* Not on the queue, so didn't cancel. */
   1359 				cancelled_p = false;
   1360 			}
   1361 			/* If it's still running, wait for it to complete.  */
   1362 			if (wq->wq_current_work == &dw->work)
   1363 				wait_for_current_work(&dw->work, wq);
   1364 			break;
   1365 		case DELAYED_WORK_SCHEDULED:
   1366 			/*
   1367 			 * If it is scheduled, mark it cancelled and
   1368 			 * try to stop the callout before it starts.
   1369 			 *
   1370 			 * If it's too late and the callout has already
   1371 			 * begun to execute, we must wait for it to
   1372 			 * complete.  But we got in soon enough to ask
   1373 			 * the callout not to run, so we successfully
   1374 			 * cancelled it in that case.
   1375 			 *
   1376 			 * If we stopped the callout before it started,
   1377 			 * then we must destroy the callout and
   1378 			 * dissociate it from the workqueue ourselves.
   1379 			 */
   1380 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1381 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1382 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1383 				cancel_delayed_work_done(wq, dw);
   1384 			cancelled_p = true;
   1385 			break;
   1386 		case DELAYED_WORK_RESCHEDULED:
   1387 			/*
   1388 			 * If it is being rescheduled, the callout has
   1389 			 * already fired.  We must ask it to cancel and
   1390 			 * wait for it to complete.
   1391 			 */
   1392 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1393 			dw->dw_resched = -1;
   1394 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
   1395 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1396 			cancelled_p = true;
   1397 			break;
   1398 		case DELAYED_WORK_CANCELLED:
   1399 			/*
   1400 			 * If it is being cancelled, the callout has
   1401 			 * already fired.  We need only wait for it to
   1402 			 * complete.  Someone else, however, claims
   1403 			 * credit for cancelling it.
   1404 			 */
   1405 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1406 			cancelled_p = false;
   1407 			break;
   1408 		default:
   1409 			panic("invalid delayed work state: %d",
   1410 			    dw->dw_state);
   1411 		}
   1412 	}
   1413 	mutex_exit(&wq->wq_lock);
   1414 
   1415 	return cancelled_p;
   1416 }
   1417 
   1418 /*
   1420  * Flush
   1421  */
   1422 
   1423 /*
   1424  * flush_scheduled_work()
   1425  *
   1426  *	Wait for all work queued on system_wq to complete.  This does
   1427  *	not include delayed work.
   1428  */
   1429 void
   1430 flush_scheduled_work(void)
   1431 {
   1432 
   1433 	flush_workqueue(system_wq);
   1434 }
   1435 
   1436 struct flush_work {
   1437 	kmutex_t		fw_lock;
   1438 	kcondvar_t		fw_cv;
   1439 	struct work_struct	fw_work;
   1440 	bool			fw_done;
   1441 };
   1442 
   1443 static void
   1444 flush_work_cb(struct work_struct *work)
   1445 {
   1446 	struct flush_work *fw = container_of(work, struct flush_work, fw_work);
   1447 
   1448 	mutex_enter(&fw->fw_lock);
   1449 	fw->fw_done = true;
   1450 	cv_broadcast(&fw->fw_cv);
   1451 	mutex_exit(&fw->fw_lock);
   1452 }
   1453 
   1454 /*
   1455  * flush_workqueue(wq)
   1456  *
   1457  *	Wait for all work queued on wq to complete.  This does not
   1458  *	include delayed work.
   1459  */
   1460 void
   1461 flush_workqueue(struct workqueue_struct *wq)
   1462 {
   1463 	struct flush_work fw;
   1464 
   1465 	mutex_init(&fw.fw_lock, MUTEX_DEFAULT, IPL_VM);
   1466 	cv_init(&fw.fw_cv, "lxwqflsh");
   1467 	INIT_WORK(&fw.fw_work, &flush_work_cb);
   1468 	fw.fw_done = false;
   1469 
   1470 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
   1471 	queue_work(wq, &fw.fw_work);
   1472 
   1473 	mutex_enter(&fw.fw_lock);
   1474 	while (!fw.fw_done)
   1475 		cv_wait(&fw.fw_cv, &fw.fw_lock);
   1476 	mutex_exit(&fw.fw_lock);
   1477 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
   1478 
   1479 	KASSERT(fw.fw_done);
   1480 	/* no DESTROY_WORK */
   1481 	cv_destroy(&fw.fw_cv);
   1482 	mutex_destroy(&fw.fw_lock);
   1483 }
   1484 
   1485 /*
   1486  * drain_workqueue(wq)
   1487  *
   1488  *	Repeatedly flush wq until there is no more work.
   1489  */
   1490 void
   1491 drain_workqueue(struct workqueue_struct *wq)
   1492 {
   1493 	unsigned ntries = 0;
   1494 	bool done;
   1495 
   1496 	do {
   1497 		if (ntries++ == 10 || (ntries % 100) == 0)
   1498 			printf("linux workqueue %s"
   1499 			    ": still clogged after %u flushes",
   1500 			    wq->wq_name, ntries);
   1501 		flush_workqueue(wq);
   1502 		mutex_enter(&wq->wq_lock);
   1503 		done = wq->wq_current_work == NULL;
   1504 		done &= TAILQ_EMPTY(&wq->wq_queue);
   1505 		done &= TAILQ_EMPTY(&wq->wq_dqueue);
   1506 		mutex_exit(&wq->wq_lock);
   1507 	} while (!done);
   1508 }
   1509 
   1510 /*
   1511  * flush_work(work)
   1512  *
   1513  *	If work is queued or currently executing, wait for it to
   1514  *	complete.
   1515  *
   1516  *	Return true if we waited to flush it, false if it was already
   1517  *	idle.
   1518  */
   1519 bool
   1520 flush_work(struct work_struct *work)
   1521 {
   1522 	struct workqueue_struct *wq;
   1523 
   1524 	/* If there's no workqueue, nothing to flush.  */
   1525 	if ((wq = work_queue(work)) == NULL)
   1526 		return false;
   1527 
   1528 	flush_workqueue(wq);
   1529 	return true;
   1530 }
   1531 
   1532 /*
   1533  * flush_delayed_work(dw)
   1534  *
   1535  *	If dw is scheduled to run after a delay, queue it immediately
   1536  *	instead.  Then, if dw is queued or currently executing, wait
   1537  *	for it to complete.
   1538  */
   1539 bool
   1540 flush_delayed_work(struct delayed_work *dw)
   1541 {
   1542 	struct workqueue_struct *wq;
   1543 	bool waited = false;
   1544 
   1545 	/* If there's no workqueue, nothing to flush.  */
   1546 	if ((wq = work_queue(&dw->work)) == NULL)
   1547 		return false;
   1548 
   1549 	mutex_enter(&wq->wq_lock);
   1550 	if (__predict_false(work_queue(&dw->work) != wq)) {
   1551 		/*
   1552 		 * Moved off the queue already (and possibly to another
   1553 		 * queue, though that would be ill-advised), so it must
   1554 		 * have completed, and we have nothing more to do.
   1555 		 */
   1556 		waited = false;
   1557 	} else {
   1558 		switch (dw->dw_state) {
   1559 		case DELAYED_WORK_IDLE:
   1560 			/*
   1561 			 * It has a workqueue assigned and the callout
   1562 			 * is idle, so it must be in progress or on the
   1563 			 * queue.  In that case, we'll wait for it to
   1564 			 * complete.
   1565 			 */
   1566 			break;
   1567 		case DELAYED_WORK_SCHEDULED:
   1568 		case DELAYED_WORK_RESCHEDULED:
   1569 		case DELAYED_WORK_CANCELLED:
   1570 			/*
   1571 			 * The callout is scheduled, and may have even
   1572 			 * started.  Mark it as scheduled so that if
   1573 			 * the callout has fired it will queue the work
   1574 			 * itself.  Try to stop the callout -- if we
   1575 			 * can, queue the work now; if we can't, wait
   1576 			 * for the callout to complete, which entails
   1577 			 * queueing it.
   1578 			 */
   1579 			dw->dw_state = DELAYED_WORK_SCHEDULED;
   1580 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
   1581 				/*
   1582 				 * We stopped it before it ran.  No
   1583 				 * state change in the interim is
   1584 				 * possible.  Destroy the callout and
   1585 				 * queue it ourselves.
   1586 				 */
   1587 				KASSERT(dw->dw_state ==
   1588 				    DELAYED_WORK_SCHEDULED);
   1589 				dw_callout_destroy(wq, dw);
   1590 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
   1591 				    work_entry);
   1592 				cv_broadcast(&wq->wq_cv);
   1593 				SDT_PROBE2(sdt, linux, work, queue,
   1594 				    &dw->work, wq);
   1595 			}
   1596 			break;
   1597 		default:
   1598 			panic("invalid delayed work state: %d", dw->dw_state);
   1599 		}
   1600 		/*
   1601 		 * Waiting for the whole queue to flush is overkill,
   1602 		 * but doesn't hurt.
   1603 		 */
   1604 		mutex_exit(&wq->wq_lock);
   1605 		flush_workqueue(wq);
   1606 		mutex_enter(&wq->wq_lock);
   1607 		waited = true;
   1608 	}
   1609 	mutex_exit(&wq->wq_lock);
   1610 
   1611 	return waited;
   1612 }
   1613 
   1614 /*
   1615  * delayed_work_pending(dw)
   1616  *
   1617  *	True if dw is currently scheduled to execute, false if not.
   1618  */
   1619 bool
   1620 delayed_work_pending(const struct delayed_work *dw)
   1621 {
   1622 
   1623 	return work_pending(&dw->work);
   1624 }
   1625 
   1626 /*
   1627  * INIT_RCU_WORK(rw, fn)
   1628  *
   1629  *	Initialize rw for use with a workqueue to call fn in a worker
   1630  *	thread after an RCU grace period.  There is no corresponding
   1631  *	destruction operation.
   1632  */
   1633 void
   1634 INIT_RCU_WORK(struct rcu_work *rw, void (*fn)(struct work_struct *))
   1635 {
   1636 
   1637 	INIT_WORK(&rw->work, fn);
   1638 }
   1639 
   1640 static void
   1641 queue_rcu_work_cb(struct rcu_head *r)
   1642 {
   1643 	struct rcu_work *rw = container_of(r, struct rcu_work, rw_rcu);
   1644 	struct workqueue_struct *wq = work_queue(&rw->work);
   1645 
   1646 	mutex_enter(&wq->wq_lock);
   1647 	KASSERT(work_pending(&rw->work));
   1648 	KASSERT(work_queue(&rw->work) == wq);
   1649 	destroy_rcu_head(&rw->rw_rcu);
   1650 	TAILQ_REMOVE(&wq->wq_rcu, &rw->work, work_entry);
   1651 	TAILQ_INSERT_TAIL(&wq->wq_queue, &rw->work, work_entry);
   1652 	cv_broadcast(&wq->wq_cv);
   1653 	SDT_PROBE2(sdt, linux, work, queue,  &rw->work, wq);
   1654 	mutex_exit(&wq->wq_lock);
   1655 }
   1656 
   1657 /*
   1658  * queue_rcu_work(wq, rw)
   1659  *
   1660  *	Schedule rw to run on wq after an RCU grace period.
   1661  */
   1662 void
   1663 queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rw)
   1664 {
   1665 
   1666 	mutex_enter(&wq->wq_lock);
   1667 	if (acquire_work(&rw->work, wq)) {
   1668 		init_rcu_head(&rw->rw_rcu);
   1669 		SDT_PROBE2(sdt, linux, work, rcu,  rw, wq);
   1670 		TAILQ_INSERT_TAIL(&wq->wq_rcu, &rw->work, work_entry);
   1671 		call_rcu(&rw->rw_rcu, &queue_rcu_work_cb);
   1672 	}
   1673 	mutex_exit(&wq->wq_lock);
   1674 }
   1675