Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.31
      1 /*	$NetBSD: linux_work.c,v 1.31 2018/08/27 15:03:59 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.31 2018/08/27 15:03:59 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/atomic.h>
     37 #include <sys/callout.h>
     38 #include <sys/condvar.h>
     39 #include <sys/errno.h>
     40 #include <sys/kmem.h>
     41 #include <sys/kthread.h>
     42 #include <sys/lwp.h>
     43 #include <sys/mutex.h>
     44 #include <sys/queue.h>
     45 
     46 #include <linux/workqueue.h>
     47 
     48 struct workqueue_struct {
     49 	kmutex_t			wq_lock;
     50 	kcondvar_t			wq_cv;
     51 	TAILQ_HEAD(, delayed_work)	wq_delayed;
     52 	TAILQ_HEAD(, work_struct)	wq_queue;
     53 	struct work_struct		*wq_current_work;
     54 	int				wq_flags;
     55 	struct lwp			*wq_lwp;
     56 	uint64_t			wq_gen;
     57 	bool				wq_requeued:1;
     58 	bool				wq_dying:1;
     59 };
     60 
     61 static void __dead	linux_workqueue_thread(void *);
     62 static void		linux_workqueue_timeout(void *);
     63 static struct workqueue_struct *
     64 			acquire_work(struct work_struct *,
     65 			    struct workqueue_struct *);
     66 static void		release_work(struct work_struct *,
     67 			    struct workqueue_struct *);
     68 static void		dw_callout_init(struct workqueue_struct *,
     69 			    struct delayed_work *);
     70 static void		dw_callout_destroy(struct workqueue_struct *,
     71 			    struct delayed_work *);
     72 static void		cancel_delayed_work_done(struct workqueue_struct *,
     73 			    struct delayed_work *);
     74 
     75 static specificdata_key_t workqueue_key __read_mostly;
     76 
     77 struct workqueue_struct	*system_wq __read_mostly;
     78 struct workqueue_struct	*system_long_wq __read_mostly;
     79 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
     80 
     81 int
     82 linux_workqueue_init(void)
     83 {
     84 	int error;
     85 
     86 	error = lwp_specific_key_create(&workqueue_key, NULL);
     87 	if (error)
     88 		goto fail0;
     89 
     90 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
     91 	if (system_wq == NULL) {
     92 		error = ENOMEM;
     93 		goto fail1;
     94 	}
     95 
     96 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
     97 	if (system_long_wq == NULL) {
     98 		error = ENOMEM;
     99 		goto fail2;
    100 	}
    101 
    102 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    103 	if (system_long_wq == NULL) {
    104 		error = ENOMEM;
    105 		goto fail3;
    106 	}
    107 
    108 	return 0;
    109 
    110 fail4: __unused
    111 	destroy_workqueue(system_power_efficient_wq);
    112 fail3:	destroy_workqueue(system_long_wq);
    113 fail2:	destroy_workqueue(system_wq);
    114 fail1:	lwp_specific_key_delete(workqueue_key);
    115 fail0:	KASSERT(error);
    116 	return error;
    117 }
    118 
    119 void
    120 linux_workqueue_fini(void)
    121 {
    122 
    123 	destroy_workqueue(system_power_efficient_wq);
    124 	destroy_workqueue(system_long_wq);
    125 	destroy_workqueue(system_wq);
    126 	lwp_specific_key_delete(workqueue_key);
    127 }
    128 
    129 /*
    131  * Workqueues
    132  */
    133 
    134 struct workqueue_struct *
    135 alloc_ordered_workqueue(const char *name, int flags)
    136 {
    137 	struct workqueue_struct *wq;
    138 	int error;
    139 
    140 	KASSERT(flags == 0);
    141 
    142 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
    143 
    144 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_NONE);
    145 	cv_init(&wq->wq_cv, name);
    146 	TAILQ_INIT(&wq->wq_delayed);
    147 	TAILQ_INIT(&wq->wq_queue);
    148 	wq->wq_current_work = NULL;
    149 	wq->wq_flags = 0;
    150 	wq->wq_lwp = NULL;
    151 	wq->wq_gen = 0;
    152 	wq->wq_requeued = false;
    153 	wq->wq_dying = false;
    154 
    155 	error = kthread_create(PRI_NONE,
    156 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
    157 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
    158 	if (error)
    159 		goto fail0;
    160 
    161 	return wq;
    162 
    163 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    164 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    165 	cv_destroy(&wq->wq_cv);
    166 	mutex_destroy(&wq->wq_lock);
    167 	kmem_free(wq, sizeof(*wq));
    168 	return NULL;
    169 }
    170 
    171 void
    172 destroy_workqueue(struct workqueue_struct *wq)
    173 {
    174 
    175 	/*
    176 	 * Cancel all delayed work.  We do this first because any
    177 	 * delayed work that that has already timed out, which we can't
    178 	 * cancel, may have queued new work.
    179 	 */
    180 	mutex_enter(&wq->wq_lock);
    181 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
    182 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
    183 
    184 		KASSERT(dw->work.work_queue == wq);
    185 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
    186 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    187 			dw->dw_state == DELAYED_WORK_CANCELLED),
    188 		    "delayed work %p in bad state: %d",
    189 		    dw, dw->dw_state);
    190 
    191 		/*
    192 		 * Mark it cancelled and try to stop the callout before
    193 		 * it starts.
    194 		 *
    195 		 * If it's too late and the callout has already begun
    196 		 * to execute, then it will notice that we asked to
    197 		 * cancel it and remove itself from the queue before
    198 		 * returning.
    199 		 *
    200 		 * If we stopped the callout before it started,
    201 		 * however, then we can safely destroy the callout and
    202 		 * dissociate it from the workqueue ourselves.
    203 		 */
    204 		dw->dw_state = DELAYED_WORK_CANCELLED;
    205 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
    206 			cancel_delayed_work_done(wq, dw);
    207 	}
    208 	mutex_exit(&wq->wq_lock);
    209 
    210 	/*
    211 	 * At this point, no new work can be put on the queue.
    212 	 */
    213 
    214 	/* Tell the thread to exit.  */
    215 	mutex_enter(&wq->wq_lock);
    216 	wq->wq_dying = true;
    217 	cv_broadcast(&wq->wq_cv);
    218 	mutex_exit(&wq->wq_lock);
    219 
    220 	/* Wait for it to exit.  */
    221 	(void)kthread_join(wq->wq_lwp);
    222 
    223 	KASSERT(wq->wq_dying);
    224 	KASSERT(!wq->wq_requeued);
    225 	KASSERT(wq->wq_flags == 0);
    226 	KASSERT(wq->wq_current_work == NULL);
    227 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    228 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    229 	cv_destroy(&wq->wq_cv);
    230 	mutex_destroy(&wq->wq_lock);
    231 
    232 	kmem_free(wq, sizeof(*wq));
    233 }
    234 
    235 /*
    237  * Work thread and callout
    238  */
    239 
    240 static void __dead
    241 linux_workqueue_thread(void *cookie)
    242 {
    243 	struct workqueue_struct *const wq = cookie;
    244 	TAILQ_HEAD(, work_struct) tmp;
    245 
    246 	lwp_setspecific(workqueue_key, wq);
    247 
    248 	mutex_enter(&wq->wq_lock);
    249 	for (;;) {
    250 		/*
    251 		 * Wait until there's activity.  If there's no work and
    252 		 * we're dying, stop here.
    253 		 */
    254 		while (TAILQ_EMPTY(&wq->wq_queue) && !wq->wq_dying)
    255 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    256 		if (TAILQ_EMPTY(&wq->wq_queue)) {
    257 			KASSERT(wq->wq_dying);
    258 			break;
    259 		}
    260 
    261 		/* Grab a batch of work off the queue.  */
    262 		KASSERT(!TAILQ_EMPTY(&wq->wq_queue));
    263 		TAILQ_INIT(&tmp);
    264 		TAILQ_CONCAT(&tmp, &wq->wq_queue, work_entry);
    265 
    266 		/* Process each work item in the batch.  */
    267 		while (!TAILQ_EMPTY(&tmp)) {
    268 			struct work_struct *const work = TAILQ_FIRST(&tmp);
    269 
    270 			KASSERT(work->work_queue == wq);
    271 			TAILQ_REMOVE(&tmp, work, work_entry);
    272 			KASSERT(wq->wq_current_work == NULL);
    273 			wq->wq_current_work = work;
    274 
    275 			mutex_exit(&wq->wq_lock);
    276 			(*work->func)(work);
    277 			mutex_enter(&wq->wq_lock);
    278 
    279 			KASSERT(wq->wq_current_work == work);
    280 			KASSERT(work->work_queue == wq);
    281 			if (wq->wq_requeued)
    282 				wq->wq_requeued = false;
    283 			else
    284 				release_work(work, wq);
    285 			wq->wq_current_work = NULL;
    286 			cv_broadcast(&wq->wq_cv);
    287 		}
    288 
    289 		/* Notify flush that we've completed a batch of work.  */
    290 		wq->wq_gen++;
    291 		cv_broadcast(&wq->wq_cv);
    292 	}
    293 	mutex_exit(&wq->wq_lock);
    294 
    295 	kthread_exit(0);
    296 }
    297 
    298 static void
    299 linux_workqueue_timeout(void *cookie)
    300 {
    301 	struct delayed_work *const dw = cookie;
    302 	struct workqueue_struct *const wq = dw->work.work_queue;
    303 
    304 	KASSERT(wq != NULL);
    305 
    306 	mutex_enter(&wq->wq_lock);
    307 	KASSERT(dw->work.work_queue == wq);
    308 	switch (dw->dw_state) {
    309 	case DELAYED_WORK_IDLE:
    310 		panic("delayed work callout uninitialized: %p", dw);
    311 	case DELAYED_WORK_SCHEDULED:
    312 		dw_callout_destroy(wq, dw);
    313 		TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work, work_entry);
    314 		cv_broadcast(&wq->wq_cv);
    315 		break;
    316 	case DELAYED_WORK_RESCHEDULED:
    317 		dw->dw_state = DELAYED_WORK_SCHEDULED;
    318 		break;
    319 	case DELAYED_WORK_CANCELLED:
    320 		cancel_delayed_work_done(wq, dw);
    321 		/* Can't touch dw any more.  */
    322 		goto out;
    323 	default:
    324 		panic("delayed work callout in bad state: %p", dw);
    325 	}
    326 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
    327 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
    328 out:	mutex_exit(&wq->wq_lock);
    329 }
    330 
    331 struct work_struct *
    332 current_work(void)
    333 {
    334 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
    335 
    336 	/* If we're not a workqueue thread, then there's no work.  */
    337 	if (wq == NULL)
    338 		return NULL;
    339 
    340 	/*
    341 	 * Otherwise, this should be possible only while work is in
    342 	 * progress.  Return the current work item.
    343 	 */
    344 	KASSERT(wq->wq_current_work != NULL);
    345 	return wq->wq_current_work;
    346 }
    347 
    348 /*
    350  * Work
    351  */
    352 
    353 void
    354 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    355 {
    356 
    357 	work->work_queue = NULL;
    358 	work->func = fn;
    359 }
    360 
    361 static struct workqueue_struct *
    362 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
    363 {
    364 	struct workqueue_struct *wq0;
    365 
    366 	KASSERT(mutex_owned(&wq->wq_lock));
    367 
    368 	wq0 = atomic_cas_ptr(&work->work_queue, NULL, wq);
    369 	if (wq0 == NULL) {
    370 		membar_enter();
    371 		KASSERT(work->work_queue == wq);
    372 	}
    373 
    374 	return wq0;
    375 }
    376 
    377 static void
    378 release_work(struct work_struct *work, struct workqueue_struct *wq)
    379 {
    380 
    381 	KASSERT(work->work_queue == wq);
    382 	KASSERT(mutex_owned(&wq->wq_lock));
    383 
    384 	membar_exit();
    385 	work->work_queue = NULL;
    386 }
    387 
    388 bool
    389 schedule_work(struct work_struct *work)
    390 {
    391 
    392 	return queue_work(system_wq, work);
    393 }
    394 
    395 bool
    396 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    397 {
    398 	struct workqueue_struct *wq0;
    399 	bool newly_queued;
    400 
    401 	KASSERT(wq != NULL);
    402 
    403 	mutex_enter(&wq->wq_lock);
    404 	if (__predict_true((wq0 = acquire_work(work, wq)) == NULL)) {
    405 		/*
    406 		 * It wasn't on any workqueue at all.  Put it on this
    407 		 * one, and signal the worker thread that there is work
    408 		 * to do.
    409 		 */
    410 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    411 		newly_queued = true;
    412 		cv_broadcast(&wq->wq_cv);
    413 	} else {
    414 		/*
    415 		 * It was on a workqueue, which had better be this one.
    416 		 * Requeue it if it has been taken off the queue to
    417 		 * execute and hasn't been requeued yet.  The worker
    418 		 * thread should already be running, so no need to
    419 		 * signal it.
    420 		 */
    421 		KASSERT(wq0 == wq);
    422 		if (wq->wq_current_work == work && !wq->wq_requeued) {
    423 			/*
    424 			 * It has been taken off the queue to execute,
    425 			 * and it hasn't been put back on the queue
    426 			 * again.  Put it back on the queue.  No need
    427 			 * to signal the worker thread because it will
    428 			 * notice when it reacquires the lock after
    429 			 * doing the work.
    430 			 */
    431 			TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    432 			wq->wq_requeued = true;
    433 			newly_queued = true;
    434 		} else {
    435 			/* It is still on the queue; nothing to do.  */
    436 			newly_queued = false;
    437 		}
    438 	}
    439 	mutex_exit(&wq->wq_lock);
    440 
    441 	return newly_queued;
    442 }
    443 
    444 bool
    445 cancel_work(struct work_struct *work)
    446 {
    447 	struct workqueue_struct *wq;
    448 	bool cancelled_p = false;
    449 
    450 	/* If there's no workqueue, nothing to cancel.   */
    451 	if ((wq = work->work_queue) == NULL)
    452 		goto out;
    453 
    454 	mutex_enter(&wq->wq_lock);
    455 	if (__predict_false(work->work_queue != wq)) {
    456 		/*
    457 		 * It has finished execution or been cancelled by
    458 		 * another thread, and has been moved off the
    459 		 * workqueue, so it's too to cancel.
    460 		 */
    461 		cancelled_p = false;
    462 	} else if (wq->wq_current_work == work) {
    463 		/*
    464 		 * It has already begun execution, so it's too late to
    465 		 * cancel now.
    466 		 */
    467 		cancelled_p = false;
    468 	} else {
    469 		/*
    470 		 * It is still on the queue.  Take it off the queue and
    471 		 * report successful cancellation.
    472 		 */
    473 		TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    474 		cancelled_p = true;
    475 	}
    476 	mutex_exit(&wq->wq_lock);
    477 
    478 out:	return cancelled_p;
    479 }
    480 
    481 bool
    482 cancel_work_sync(struct work_struct *work)
    483 {
    484 	struct workqueue_struct *wq;
    485 	bool cancelled_p = false;
    486 
    487 	/* If there's no workqueue, nothing to cancel.   */
    488 	if ((wq = work->work_queue) == NULL)
    489 		goto out;
    490 
    491 	mutex_enter(&wq->wq_lock);
    492 	if (__predict_false(work->work_queue != wq)) {
    493 		/*
    494 		 * It has finished execution or been cancelled by
    495 		 * another thread, and has been moved off the
    496 		 * workqueue, so it's too to cancel.
    497 		 */
    498 		cancelled_p = false;
    499 	} else if (wq->wq_current_work == work) {
    500 		/*
    501 		 * It has already begun execution, so it's too late to
    502 		 * cancel now.  Wait for it to complete.  Don't wait
    503 		 * more than one generation in case it gets requeued.
    504 		 */
    505 		uint64_t gen = wq->wq_gen;
    506 		do {
    507 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    508 		} while (wq->wq_current_work == work && wq->wq_gen == gen);
    509 		cancelled_p = false;
    510 	} else {
    511 		/*
    512 		 * It is still on the queue.  Take it off the queue and
    513 		 * report successful cancellation.
    514 		 */
    515 		TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    516 		cancelled_p = true;
    517 	}
    518 	mutex_exit(&wq->wq_lock);
    519 
    520 out:	return cancelled_p;
    521 }
    522 
    523 /*
    525  * Delayed work
    526  */
    527 
    528 void
    529 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    530 {
    531 
    532 	INIT_WORK(&dw->work, fn);
    533 	dw->dw_state = DELAYED_WORK_IDLE;
    534 
    535 	/*
    536 	 * Defer callout_init until we are going to schedule the
    537 	 * callout, which can then callout_destroy it, because
    538 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
    539 	 * we have no opportunity to call callout_destroy.
    540 	 */
    541 }
    542 
    543 bool
    544 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    545 {
    546 
    547 	return queue_delayed_work(system_wq, dw, ticks);
    548 }
    549 
    550 /*
    551  * dw_callout_init(wq, dw)
    552  *
    553  *	Initialize the callout of dw and transition to
    554  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
    555  */
    556 static void
    557 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
    558 {
    559 
    560 	KASSERT(mutex_owned(&wq->wq_lock));
    561 	KASSERT(dw->work.work_queue == wq);
    562 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    563 
    564 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    565 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
    566 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    567 	dw->dw_state = DELAYED_WORK_SCHEDULED;
    568 }
    569 
    570 /*
    571  * dw_callout_destroy(wq, dw)
    572  *
    573  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
    574  */
    575 static void
    576 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
    577 {
    578 
    579 	KASSERT(mutex_owned(&wq->wq_lock));
    580 	KASSERT(dw->work.work_queue == wq);
    581 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
    582 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    583 	    dw->dw_state == DELAYED_WORK_CANCELLED);
    584 
    585 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    586 	callout_destroy(&dw->dw_callout);
    587 	dw->dw_state = DELAYED_WORK_IDLE;
    588 }
    589 
    590 /*
    591  * cancel_delayed_work_done(wq, dw)
    592  *
    593  *	Complete cancellation of a delayed work: transition from
    594  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
    595  *	workqueue.  Caller must not touch dw after this returns.
    596  */
    597 static void
    598 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
    599 {
    600 
    601 	KASSERT(mutex_owned(&wq->wq_lock));
    602 	KASSERT(dw->work.work_queue == wq);
    603 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
    604 
    605 	dw_callout_destroy(wq, dw);
    606 	release_work(&dw->work, wq);
    607 	/* Can't touch dw after this point.  */
    608 }
    609 
    610 /*
    611  * queue_delayed_work(wq, dw, ticks)
    612  *
    613  *	If it is not currently scheduled, schedule dw to run after
    614  *	ticks.  If currently executing and not already rescheduled,
    615  *	reschedule it.  If ticks == 0, run without delay.
    616  */
    617 bool
    618 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    619     unsigned long ticks)
    620 {
    621 	struct workqueue_struct *wq0;
    622 	bool newly_queued;
    623 
    624 	mutex_enter(&wq->wq_lock);
    625 	if (__predict_true((wq0 = acquire_work(&dw->work, wq)) == NULL)) {
    626 		/*
    627 		 * It wasn't on any workqueue at all.  Schedule it to
    628 		 * run on this one.
    629 		 */
    630 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    631 		if (ticks == 0) {
    632 			TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work,
    633 			    work_entry);
    634 			cv_broadcast(&wq->wq_cv);
    635 		} else {
    636 			/*
    637 			 * Initialize a callout and schedule to run
    638 			 * after a delay.
    639 			 */
    640 			dw_callout_init(wq, dw);
    641 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    642 		}
    643 		newly_queued = true;
    644 	} else {
    645 		/*
    646 		 * It was on a workqueue, which had better be this one.
    647 		 *
    648 		 * - If it has already begun to run, and it is not yet
    649 		 *   scheduled to run again, schedule it again.
    650 		 *
    651 		 * - If the callout is cancelled, reschedule it.
    652 		 *
    653 		 * - Otherwise, leave it alone.
    654 		 */
    655 		KASSERT(wq0 == wq);
    656 		if (wq->wq_current_work != &dw->work || !wq->wq_requeued) {
    657 			/*
    658 			 * It is either scheduled, on the queue but not
    659 			 * in progress, or in progress but not on the
    660 			 * queue.
    661 			 */
    662 			switch (dw->dw_state) {
    663 			case DELAYED_WORK_IDLE:
    664 				/*
    665 				 * It is not scheduled to run, and it
    666 				 * is not on the queue if it is
    667 				 * running.
    668 				 */
    669 				if (ticks == 0) {
    670 					/*
    671 					 * If it's in progress, put it
    672 					 * on the queue to run as soon
    673 					 * as the worker thread gets to
    674 					 * it.  No need for a wakeup
    675 					 * because either the worker
    676 					 * thread already knows it is
    677 					 * on the queue, or will check
    678 					 * once it is done executing.
    679 					 */
    680 					if (wq->wq_current_work == &dw->work) {
    681 						KASSERT(!wq->wq_requeued);
    682 						TAILQ_INSERT_TAIL(&wq->wq_queue,
    683 						    &dw->work, work_entry);
    684 						wq->wq_requeued = true;
    685 					}
    686 				} else {
    687 					/*
    688 					 * Initialize a callout and
    689 					 * schedule it to run after the
    690 					 * specified delay.
    691 					 */
    692 					dw_callout_init(wq, dw);
    693 					callout_schedule(&dw->dw_callout,
    694 					    MIN(INT_MAX, ticks));
    695 				}
    696 				break;
    697 			case DELAYED_WORK_SCHEDULED:
    698 			case DELAYED_WORK_RESCHEDULED:
    699 				/*
    700 				 * It is already scheduled to run after
    701 				 * a delay.  Leave it be.
    702 				 */
    703 				break;
    704 			case DELAYED_WORK_CANCELLED:
    705 				/*
    706 				 * It was scheduled and the callout has
    707 				 * begun to execute, but it was
    708 				 * cancelled.  Reschedule it.
    709 				 */
    710 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    711 				callout_schedule(&dw->dw_callout,
    712 				    MIN(INT_MAX, ticks));
    713 				break;
    714 			default:
    715 				panic("invalid delayed work state: %d",
    716 				    dw->dw_state);
    717 			}
    718 		} else {
    719 			/*
    720 			 * It is in progress and it has been requeued.
    721 			 * It cannot be scheduled to run after a delay
    722 			 * at this point.  We just leave it be.
    723 			 */
    724 			KASSERTMSG((dw->dw_state == DELAYED_WORK_IDLE),
    725 			    "delayed work %p in wrong state: %d",
    726 			    dw, dw->dw_state);
    727 		}
    728 	}
    729 	mutex_exit(&wq->wq_lock);
    730 
    731 	return newly_queued;
    732 }
    733 
    734 /*
    735  * mod_delayed_work(wq, dw, ticks)
    736  *
    737  *	Schedule dw to run after ticks.  If currently scheduled,
    738  *	reschedule it.  If currently executing, reschedule it.  If
    739  *	ticks == 0, run without delay.
    740  */
    741 bool
    742 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    743     unsigned long ticks)
    744 {
    745 	struct workqueue_struct *wq0;
    746 	bool timer_modified;
    747 
    748 	mutex_enter(&wq->wq_lock);
    749 	if ((wq0 = acquire_work(&dw->work, wq)) == NULL) {
    750 		/*
    751 		 * It wasn't on any workqueue at all.  Schedule it to
    752 		 * run on this one.
    753 		 */
    754 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    755 		if (ticks == 0) {
    756 			/*
    757 			 * Run immediately: put it on the queue and
    758 			 * signal the worker thread.
    759 			 */
    760 			TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work,
    761 			    work_entry);
    762 			cv_broadcast(&wq->wq_cv);
    763 		} else {
    764 			/*
    765 			 * Initialize a callout and schedule to run
    766 			 * after a delay.
    767 			 */
    768 			dw_callout_init(wq, dw);
    769 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    770 		}
    771 		timer_modified = false;
    772 	} else {
    773 		/* It was on a workqueue, which had better be this one.  */
    774 		KASSERT(wq0 == wq);
    775 		switch (dw->dw_state) {
    776 		case DELAYED_WORK_IDLE:
    777 			/*
    778 			 * It is not scheduled: it is on the queue or
    779 			 * it is running or both.
    780 			 */
    781 			if (wq->wq_current_work != &dw->work) {
    782 				/* It is on the queue and not yet running.  */
    783 				if (ticks == 0) {
    784 					/*
    785 					 * We ask it to run
    786 					 * immediately.  Leave it on
    787 					 * the queue.
    788 					 */
    789 				} else {
    790 					/*
    791 					 * Take it off the queue and
    792 					 * schedule a callout to run it
    793 					 * after a delay.
    794 					 */
    795 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    796 					    work_entry);
    797 					dw_callout_init(wq, dw);
    798 					callout_schedule(&dw->dw_callout,
    799 					    MIN(INT_MAX, ticks));
    800 				}
    801 				timer_modified = true;
    802 			} else if (wq->wq_requeued) {
    803 				/*
    804 				 * It is currently running _and_ it is
    805 				 * on the queue again.
    806 				 */
    807 				if (ticks == 0) {
    808 					/*
    809 					 * We ask it to run
    810 					 * immediately.  Leave it on
    811 					 * the queue.
    812 					 */
    813 				} else {
    814 					/*
    815 					 * Take it off the queue and
    816 					 * schedule a callout to run it
    817 					 * after a delay.
    818 					 */
    819 					wq->wq_requeued = false;
    820 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    821 					    work_entry);
    822 					dw_callout_init(wq, dw);
    823 					callout_schedule(&dw->dw_callout,
    824 					    MIN(INT_MAX, ticks));
    825 				}
    826 				timer_modified = true;
    827 			} else {
    828 				/*
    829 				 * It is currently running and has not
    830 				 * been requeued.
    831 				 */
    832 				if (ticks == 0) {
    833 					/*
    834 					 * We ask it to run
    835 					 * immediately.  Put it on the
    836 					 * queue again.
    837 					 */
    838 					wq->wq_requeued = true;
    839 					TAILQ_INSERT_TAIL(&wq->wq_queue,
    840 					    &dw->work, work_entry);
    841 				} else {
    842 					/*
    843 					 * Schedule a callout to run it
    844 					 * after a delay.
    845 					 */
    846 					dw_callout_init(wq, dw);
    847 					callout_schedule(&dw->dw_callout,
    848 					    MIN(INT_MAX, ticks));
    849 				}
    850 				timer_modified = false;
    851 			}
    852 			break;
    853 		case DELAYED_WORK_SCHEDULED:
    854 			/*
    855 			 * It is scheduled to run after a delay.  Try
    856 			 * to stop it and reschedule it; if we can't,
    857 			 * either reschedule it or cancel it to put it
    858 			 * on the queue, and inform the callout.
    859 			 */
    860 			if (callout_stop(&dw->dw_callout)) {
    861 				/* Can't stop, callout has begun.  */
    862 				if (ticks == 0) {
    863 					/*
    864 					 * We don't actually need to do
    865 					 * anything.  The callout will
    866 					 * queue it as soon as it gets
    867 					 * the lock.
    868 					 */
    869 				} else {
    870 					/*
    871 					 * Schedule callout and tell
    872 					 * the instance that's running
    873 					 * now that it's been
    874 					 * rescheduled.
    875 					 */
    876 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
    877 					callout_schedule(&dw->dw_callout,
    878 					    MIN(INT_MAX, ticks));
    879 				}
    880 			} else {
    881 				if (ticks == 0) {
    882 					/*
    883 					 * Run immediately: destroy the
    884 					 * callout, put it on the
    885 					 * queue, and signal the worker
    886 					 * thread.
    887 					 */
    888 					dw_callout_destroy(wq, dw);
    889 					TAILQ_INSERT_TAIL(&wq->wq_queue,
    890 					    &dw->work, work_entry);
    891 					cv_broadcast(&wq->wq_cv);
    892 				} else {
    893 					/*
    894 					 * Reschedule the callout.  No
    895 					 * state change.
    896 					 */
    897 					callout_schedule(&dw->dw_callout,
    898 					    MIN(INT_MAX, ticks));
    899 				}
    900 			}
    901 			timer_modified = true;
    902 			break;
    903 		case DELAYED_WORK_RESCHEDULED:
    904 		case DELAYED_WORK_CANCELLED:
    905 			/*
    906 			 * Someone modified the timer _again_, or
    907 			 * cancelled it, after the callout started but
    908 			 * before the poor thing even had a chance to
    909 			 * acquire the lock.
    910 			 */
    911 			if (ticks == 0) {
    912 				/*
    913 				 * We can just switch back to
    914 				 * DELAYED_WORK_SCHEDULED so that the
    915 				 * callout will queue the work as soon
    916 				 * as it gets the lock.
    917 				 */
    918 				dw->dw_state = DELAYED_WORK_SCHEDULED;
    919 			} else {
    920 				/* Reschedule it.  */
    921 				callout_schedule(&dw->dw_callout,
    922 				    MIN(INT_MAX, ticks));
    923 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    924 			}
    925 			timer_modified = true;
    926 			break;
    927 		default:
    928 			panic("invalid delayed work state: %d", dw->dw_state);
    929 		}
    930 	}
    931 	mutex_exit(&wq->wq_lock);
    932 
    933 	return timer_modified;
    934 }
    935 
    936 bool
    937 cancel_delayed_work(struct delayed_work *dw)
    938 {
    939 	struct workqueue_struct *wq;
    940 	bool cancelled_p;
    941 
    942 	/* If there's no workqueue, nothing to cancel.   */
    943 	if ((wq = dw->work.work_queue) == NULL)
    944 		return false;
    945 
    946 	mutex_enter(&wq->wq_lock);
    947 	if (__predict_false(dw->work.work_queue != wq)) {
    948 		cancelled_p = false;
    949 	} else {
    950 		switch (dw->dw_state) {
    951 		case DELAYED_WORK_IDLE:
    952 			if (wq->wq_current_work == &dw->work) {
    953 				/*
    954 				 * Too late, it's already running.  If
    955 				 * it's been requeued, tough -- it'll
    956 				 * run again.
    957 				 */
    958 				cancelled_p = false;
    959 			} else {
    960 				/* Got in before it started.  Remove it.  */
    961 				TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    962 				    work_entry);
    963 				cancelled_p = true;
    964 			}
    965 			break;
    966 		case DELAYED_WORK_SCHEDULED:
    967 		case DELAYED_WORK_RESCHEDULED:
    968 		case DELAYED_WORK_CANCELLED:
    969 			/*
    970 			 * If it is scheduled, mark it cancelled and
    971 			 * try to stop the callout before it starts.
    972 			 *
    973 			 * If it's too late and the callout has already
    974 			 * begun to execute, tough.
    975 			 *
    976 			 * If we stopped the callout before it started,
    977 			 * however, then destroy the callout and
    978 			 * dissociate it from the workqueue ourselves.
    979 			 */
    980 			dw->dw_state = DELAYED_WORK_CANCELLED;
    981 			cancelled_p = true;
    982 			if (!callout_stop(&dw->dw_callout))
    983 				cancel_delayed_work_done(wq, dw);
    984 			break;
    985 		default:
    986 			panic("invalid delayed work state: %d",
    987 			    dw->dw_state);
    988 		}
    989 	}
    990 	mutex_exit(&wq->wq_lock);
    991 
    992 	return cancelled_p;
    993 }
    994 
    995 bool
    996 cancel_delayed_work_sync(struct delayed_work *dw)
    997 {
    998 	struct workqueue_struct *wq;
    999 	bool cancelled_p;
   1000 
   1001 	/* If there's no workqueue, nothing to cancel.  */
   1002 	if ((wq = dw->work.work_queue) == NULL)
   1003 		return false;
   1004 
   1005 	mutex_enter(&wq->wq_lock);
   1006 	if (__predict_false(dw->work.work_queue != wq)) {
   1007 		cancelled_p = false;
   1008 	} else {
   1009 		switch (dw->dw_state) {
   1010 		case DELAYED_WORK_IDLE:
   1011 			if (wq->wq_current_work == &dw->work) {
   1012 				/*
   1013 				 * Too late, it's already running.
   1014 				 * First, make sure it's not requeued.
   1015 				 * Then wait for it to complete, at
   1016 				 * most one generation.
   1017 				 */
   1018 				uint64_t gen = wq->wq_gen;
   1019 				if (wq->wq_requeued) {
   1020 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
   1021 					    work_entry);
   1022 					wq->wq_requeued = false;
   1023 				}
   1024 				do {
   1025 					cv_wait(&wq->wq_cv, &wq->wq_lock);
   1026 				} while (wq->wq_current_work == &dw->work &&
   1027 				    wq->wq_gen == gen);
   1028 				cancelled_p = false;
   1029 			} else {
   1030 				/* Got in before it started.  Remove it.  */
   1031 				TAILQ_REMOVE(&wq->wq_queue, &dw->work,
   1032 				    work_entry);
   1033 				cancelled_p = true;
   1034 			}
   1035 			break;
   1036 		case DELAYED_WORK_SCHEDULED:
   1037 		case DELAYED_WORK_RESCHEDULED:
   1038 		case DELAYED_WORK_CANCELLED:
   1039 			/*
   1040 			 * If it is scheduled, mark it cancelled and
   1041 			 * try to stop the callout before it starts.
   1042 			 *
   1043 			 * If it's too late and the callout has already
   1044 			 * begun to execute, we must wait for it to
   1045 			 * complete.  But we got in soon enough to ask
   1046 			 * the callout not to run, so we successfully
   1047 			 * cancelled it in that case.
   1048 			 *
   1049 			 * If we stopped the callout before it started,
   1050 			 * however, then destroy the callout and
   1051 			 * dissociate it from the workqueue ourselves.
   1052 			 */
   1053 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1054 			cancelled_p = true;
   1055 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1056 				cancel_delayed_work_done(wq, dw);
   1057 			break;
   1058 		default:
   1059 			panic("invalid delayed work state: %d",
   1060 			    dw->dw_state);
   1061 		}
   1062 	}
   1063 	mutex_exit(&wq->wq_lock);
   1064 
   1065 	return cancelled_p;
   1066 }
   1067 
   1068 /*
   1070  * Flush
   1071  */
   1072 
   1073 void
   1074 flush_scheduled_work(void)
   1075 {
   1076 
   1077 	flush_workqueue(system_wq);
   1078 }
   1079 
   1080 static void
   1081 flush_workqueue_locked(struct workqueue_struct *wq)
   1082 {
   1083 	uint64_t gen;
   1084 
   1085 	KASSERT(mutex_owned(&wq->wq_lock));
   1086 
   1087 	/* Get the current generation number.  */
   1088 	gen = wq->wq_gen;
   1089 
   1090 	/*
   1091 	 * If there's a batch of work in progress, we must wait for the
   1092 	 * worker thread to finish that batch.
   1093 	 */
   1094 	if (wq->wq_current_work != NULL)
   1095 		gen++;
   1096 
   1097 	/*
   1098 	 * If there's any work yet to be claimed from the queue by the
   1099 	 * worker thread, we must wait for it to finish one more batch
   1100 	 * too.
   1101 	 */
   1102 	if (!TAILQ_EMPTY(&wq->wq_queue))
   1103 		gen++;
   1104 
   1105 	/* Wait until the generation number has caught up.  */
   1106 	while (wq->wq_gen < gen)
   1107 		cv_wait(&wq->wq_cv, &wq->wq_lock);
   1108 }
   1109 
   1110 void
   1111 flush_workqueue(struct workqueue_struct *wq)
   1112 {
   1113 
   1114 	mutex_enter(&wq->wq_lock);
   1115 	flush_workqueue_locked(wq);
   1116 	mutex_exit(&wq->wq_lock);
   1117 }
   1118 
   1119 void
   1120 flush_work(struct work_struct *work)
   1121 {
   1122 	struct workqueue_struct *wq;
   1123 
   1124 	/* If there's no workqueue, nothing to flush.  */
   1125 	if ((wq = work->work_queue) == NULL)
   1126 		return;
   1127 
   1128 	flush_workqueue(wq);
   1129 }
   1130 
   1131 void
   1132 flush_delayed_work(struct delayed_work *dw)
   1133 {
   1134 	struct workqueue_struct *wq;
   1135 
   1136 	/* If there's no workqueue, nothing to flush.  */
   1137 	if ((wq = dw->work.work_queue) == NULL)
   1138 		return;
   1139 
   1140 	mutex_enter(&wq->wq_lock);
   1141 	if (__predict_true(dw->work.work_queue == wq)) {
   1142 		switch (dw->dw_state) {
   1143 		case DELAYED_WORK_IDLE:
   1144 			/*
   1145 			 * It has a workqueue assigned and the callout
   1146 			 * is idle, so it must be in progress or on the
   1147 			 * queue.  In that case, wait for it to
   1148 			 * complete.  Waiting for the whole queue to
   1149 			 * flush is overkill, but doesn't hurt.
   1150 			 */
   1151 			flush_workqueue_locked(wq);
   1152 			break;
   1153 		case DELAYED_WORK_SCHEDULED:
   1154 		case DELAYED_WORK_RESCHEDULED:
   1155 		case DELAYED_WORK_CANCELLED:
   1156 			/*
   1157 			 * The callout is still scheduled to run.
   1158 			 * Notify it that we are cancelling, and try to
   1159 			 * stop the callout before it runs.
   1160 			 *
   1161 			 * If we do stop the callout, we are now
   1162 			 * responsible for dissociating the work from
   1163 			 * the queue.
   1164 			 *
   1165 			 * Otherwise, wait for it to complete and
   1166 			 * dissociate itself -- it will not put itself
   1167 			 * on the workqueue once it is cancelled.
   1168 			 */
   1169 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1170 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1171 				cancel_delayed_work_done(wq, dw);
   1172 		default:
   1173 			panic("invalid delayed work state: %d",
   1174 			    dw->dw_state);
   1175 		}
   1176 	}
   1177 	mutex_exit(&wq->wq_lock);
   1178 }
   1179