Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.35
      1 /*	$NetBSD: linux_work.c,v 1.35 2018/08/27 15:05:01 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.35 2018/08/27 15:05:01 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/atomic.h>
     37 #include <sys/callout.h>
     38 #include <sys/condvar.h>
     39 #include <sys/errno.h>
     40 #include <sys/kmem.h>
     41 #include <sys/kthread.h>
     42 #include <sys/lwp.h>
     43 #include <sys/mutex.h>
     44 #include <sys/queue.h>
     45 
     46 #include <linux/workqueue.h>
     47 
     48 struct workqueue_struct {
     49 	kmutex_t			wq_lock;
     50 	kcondvar_t			wq_cv;
     51 	TAILQ_HEAD(, delayed_work)	wq_delayed;
     52 	TAILQ_HEAD(, work_struct)	wq_queue;
     53 	struct work_struct		*wq_current_work;
     54 	int				wq_flags;
     55 	struct lwp			*wq_lwp;
     56 	uint64_t			wq_gen;
     57 	bool				wq_requeued:1;
     58 	bool				wq_dying:1;
     59 };
     60 
     61 static void __dead	linux_workqueue_thread(void *);
     62 static void		linux_workqueue_timeout(void *);
     63 static struct workqueue_struct *
     64 			acquire_work(struct work_struct *,
     65 			    struct workqueue_struct *);
     66 static void		release_work(struct work_struct *,
     67 			    struct workqueue_struct *);
     68 static void		wait_for_current_work(struct work_struct *,
     69 			    struct workqueue_struct *);
     70 static void		dw_callout_init(struct workqueue_struct *,
     71 			    struct delayed_work *);
     72 static void		dw_callout_destroy(struct workqueue_struct *,
     73 			    struct delayed_work *);
     74 static void		cancel_delayed_work_done(struct workqueue_struct *,
     75 			    struct delayed_work *);
     76 
     77 static specificdata_key_t workqueue_key __read_mostly;
     78 
     79 struct workqueue_struct	*system_wq __read_mostly;
     80 struct workqueue_struct	*system_long_wq __read_mostly;
     81 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
     82 
     83 int
     84 linux_workqueue_init(void)
     85 {
     86 	int error;
     87 
     88 	error = lwp_specific_key_create(&workqueue_key, NULL);
     89 	if (error)
     90 		goto fail0;
     91 
     92 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
     93 	if (system_wq == NULL) {
     94 		error = ENOMEM;
     95 		goto fail1;
     96 	}
     97 
     98 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
     99 	if (system_long_wq == NULL) {
    100 		error = ENOMEM;
    101 		goto fail2;
    102 	}
    103 
    104 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    105 	if (system_long_wq == NULL) {
    106 		error = ENOMEM;
    107 		goto fail3;
    108 	}
    109 
    110 	return 0;
    111 
    112 fail4: __unused
    113 	destroy_workqueue(system_power_efficient_wq);
    114 fail3:	destroy_workqueue(system_long_wq);
    115 fail2:	destroy_workqueue(system_wq);
    116 fail1:	lwp_specific_key_delete(workqueue_key);
    117 fail0:	KASSERT(error);
    118 	return error;
    119 }
    120 
    121 void
    122 linux_workqueue_fini(void)
    123 {
    124 
    125 	destroy_workqueue(system_power_efficient_wq);
    126 	destroy_workqueue(system_long_wq);
    127 	destroy_workqueue(system_wq);
    128 	lwp_specific_key_delete(workqueue_key);
    129 }
    130 
    131 /*
    133  * Workqueues
    134  */
    135 
    136 struct workqueue_struct *
    137 alloc_ordered_workqueue(const char *name, int flags)
    138 {
    139 	struct workqueue_struct *wq;
    140 	int error;
    141 
    142 	KASSERT(flags == 0);
    143 
    144 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
    145 
    146 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_NONE);
    147 	cv_init(&wq->wq_cv, name);
    148 	TAILQ_INIT(&wq->wq_delayed);
    149 	TAILQ_INIT(&wq->wq_queue);
    150 	wq->wq_current_work = NULL;
    151 	wq->wq_flags = 0;
    152 	wq->wq_lwp = NULL;
    153 	wq->wq_gen = 0;
    154 	wq->wq_requeued = false;
    155 	wq->wq_dying = false;
    156 
    157 	error = kthread_create(PRI_NONE,
    158 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
    159 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
    160 	if (error)
    161 		goto fail0;
    162 
    163 	return wq;
    164 
    165 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    166 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    167 	cv_destroy(&wq->wq_cv);
    168 	mutex_destroy(&wq->wq_lock);
    169 	kmem_free(wq, sizeof(*wq));
    170 	return NULL;
    171 }
    172 
    173 void
    174 destroy_workqueue(struct workqueue_struct *wq)
    175 {
    176 
    177 	/*
    178 	 * Cancel all delayed work.  We do this first because any
    179 	 * delayed work that that has already timed out, which we can't
    180 	 * cancel, may have queued new work.
    181 	 */
    182 	mutex_enter(&wq->wq_lock);
    183 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
    184 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
    185 
    186 		KASSERT(dw->work.work_queue == wq);
    187 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
    188 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    189 			dw->dw_state == DELAYED_WORK_CANCELLED),
    190 		    "delayed work %p in bad state: %d",
    191 		    dw, dw->dw_state);
    192 
    193 		/*
    194 		 * Mark it cancelled and try to stop the callout before
    195 		 * it starts.
    196 		 *
    197 		 * If it's too late and the callout has already begun
    198 		 * to execute, then it will notice that we asked to
    199 		 * cancel it and remove itself from the queue before
    200 		 * returning.
    201 		 *
    202 		 * If we stopped the callout before it started,
    203 		 * however, then we can safely destroy the callout and
    204 		 * dissociate it from the workqueue ourselves.
    205 		 */
    206 		dw->dw_state = DELAYED_WORK_CANCELLED;
    207 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
    208 			cancel_delayed_work_done(wq, dw);
    209 	}
    210 	mutex_exit(&wq->wq_lock);
    211 
    212 	/*
    213 	 * At this point, no new work can be put on the queue.
    214 	 */
    215 
    216 	/* Tell the thread to exit.  */
    217 	mutex_enter(&wq->wq_lock);
    218 	wq->wq_dying = true;
    219 	cv_broadcast(&wq->wq_cv);
    220 	mutex_exit(&wq->wq_lock);
    221 
    222 	/* Wait for it to exit.  */
    223 	(void)kthread_join(wq->wq_lwp);
    224 
    225 	KASSERT(wq->wq_dying);
    226 	KASSERT(!wq->wq_requeued);
    227 	KASSERT(wq->wq_flags == 0);
    228 	KASSERT(wq->wq_current_work == NULL);
    229 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    230 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    231 	cv_destroy(&wq->wq_cv);
    232 	mutex_destroy(&wq->wq_lock);
    233 
    234 	kmem_free(wq, sizeof(*wq));
    235 }
    236 
    237 /*
    239  * Work thread and callout
    240  */
    241 
    242 static void __dead
    243 linux_workqueue_thread(void *cookie)
    244 {
    245 	struct workqueue_struct *const wq = cookie;
    246 	TAILQ_HEAD(, work_struct) tmp;
    247 
    248 	lwp_setspecific(workqueue_key, wq);
    249 
    250 	mutex_enter(&wq->wq_lock);
    251 	for (;;) {
    252 		/*
    253 		 * Wait until there's activity.  If there's no work and
    254 		 * we're dying, stop here.
    255 		 */
    256 		while (TAILQ_EMPTY(&wq->wq_queue) && !wq->wq_dying)
    257 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    258 		if (TAILQ_EMPTY(&wq->wq_queue)) {
    259 			KASSERT(wq->wq_dying);
    260 			break;
    261 		}
    262 
    263 		/* Grab a batch of work off the queue.  */
    264 		KASSERT(!TAILQ_EMPTY(&wq->wq_queue));
    265 		TAILQ_INIT(&tmp);
    266 		TAILQ_CONCAT(&tmp, &wq->wq_queue, work_entry);
    267 
    268 		/* Process each work item in the batch.  */
    269 		while (!TAILQ_EMPTY(&tmp)) {
    270 			struct work_struct *const work = TAILQ_FIRST(&tmp);
    271 
    272 			KASSERT(work->work_queue == wq);
    273 			TAILQ_REMOVE(&tmp, work, work_entry);
    274 			KASSERT(wq->wq_current_work == NULL);
    275 			wq->wq_current_work = work;
    276 
    277 			mutex_exit(&wq->wq_lock);
    278 			(*work->func)(work);
    279 			mutex_enter(&wq->wq_lock);
    280 
    281 			KASSERT(wq->wq_current_work == work);
    282 			KASSERT(work->work_queue == wq);
    283 			if (wq->wq_requeued)
    284 				wq->wq_requeued = false;
    285 			else
    286 				release_work(work, wq);
    287 			wq->wq_current_work = NULL;
    288 			cv_broadcast(&wq->wq_cv);
    289 		}
    290 
    291 		/* Notify flush that we've completed a batch of work.  */
    292 		wq->wq_gen++;
    293 		cv_broadcast(&wq->wq_cv);
    294 	}
    295 	mutex_exit(&wq->wq_lock);
    296 
    297 	kthread_exit(0);
    298 }
    299 
    300 static void
    301 linux_workqueue_timeout(void *cookie)
    302 {
    303 	struct delayed_work *const dw = cookie;
    304 	struct workqueue_struct *const wq = dw->work.work_queue;
    305 
    306 	KASSERT(wq != NULL);
    307 
    308 	mutex_enter(&wq->wq_lock);
    309 	KASSERT(dw->work.work_queue == wq);
    310 	switch (dw->dw_state) {
    311 	case DELAYED_WORK_IDLE:
    312 		panic("delayed work callout uninitialized: %p", dw);
    313 	case DELAYED_WORK_SCHEDULED:
    314 		dw_callout_destroy(wq, dw);
    315 		TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work, work_entry);
    316 		cv_broadcast(&wq->wq_cv);
    317 		break;
    318 	case DELAYED_WORK_RESCHEDULED:
    319 		KASSERT(dw->dw_resched >= 0);
    320 		callout_schedule(&dw->dw_callout, dw->dw_resched);
    321 		dw->dw_state = DELAYED_WORK_SCHEDULED;
    322 		dw->dw_resched = -1;
    323 		break;
    324 	case DELAYED_WORK_CANCELLED:
    325 		cancel_delayed_work_done(wq, dw);
    326 		/* Can't touch dw any more.  */
    327 		goto out;
    328 	default:
    329 		panic("delayed work callout in bad state: %p", dw);
    330 	}
    331 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
    332 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
    333 out:	mutex_exit(&wq->wq_lock);
    334 }
    335 
    336 struct work_struct *
    337 current_work(void)
    338 {
    339 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
    340 
    341 	/* If we're not a workqueue thread, then there's no work.  */
    342 	if (wq == NULL)
    343 		return NULL;
    344 
    345 	/*
    346 	 * Otherwise, this should be possible only while work is in
    347 	 * progress.  Return the current work item.
    348 	 */
    349 	KASSERT(wq->wq_current_work != NULL);
    350 	return wq->wq_current_work;
    351 }
    352 
    353 /*
    355  * Work
    356  */
    357 
    358 void
    359 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    360 {
    361 
    362 	work->work_queue = NULL;
    363 	work->func = fn;
    364 }
    365 
    366 static struct workqueue_struct *
    367 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
    368 {
    369 	struct workqueue_struct *wq0;
    370 
    371 	KASSERT(mutex_owned(&wq->wq_lock));
    372 
    373 	wq0 = atomic_cas_ptr(&work->work_queue, NULL, wq);
    374 	if (wq0 == NULL) {
    375 		membar_enter();
    376 		KASSERT(work->work_queue == wq);
    377 	}
    378 
    379 	return wq0;
    380 }
    381 
    382 static void
    383 release_work(struct work_struct *work, struct workqueue_struct *wq)
    384 {
    385 
    386 	KASSERT(work->work_queue == wq);
    387 	KASSERT(mutex_owned(&wq->wq_lock));
    388 
    389 	membar_exit();
    390 	work->work_queue = NULL;
    391 }
    392 
    393 bool
    394 schedule_work(struct work_struct *work)
    395 {
    396 
    397 	return queue_work(system_wq, work);
    398 }
    399 
    400 bool
    401 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    402 {
    403 	struct workqueue_struct *wq0;
    404 	bool newly_queued;
    405 
    406 	KASSERT(wq != NULL);
    407 
    408 	mutex_enter(&wq->wq_lock);
    409 	if (__predict_true((wq0 = acquire_work(work, wq)) == NULL)) {
    410 		/*
    411 		 * It wasn't on any workqueue at all.  Put it on this
    412 		 * one, and signal the worker thread that there is work
    413 		 * to do.
    414 		 */
    415 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    416 		newly_queued = true;
    417 		cv_broadcast(&wq->wq_cv);
    418 	} else {
    419 		/*
    420 		 * It was on a workqueue, which had better be this one.
    421 		 * Requeue it if it has been taken off the queue to
    422 		 * execute and hasn't been requeued yet.  The worker
    423 		 * thread should already be running, so no need to
    424 		 * signal it.
    425 		 */
    426 		KASSERT(wq0 == wq);
    427 		if (wq->wq_current_work == work && !wq->wq_requeued) {
    428 			/*
    429 			 * It has been taken off the queue to execute,
    430 			 * and it hasn't been put back on the queue
    431 			 * again.  Put it back on the queue.  No need
    432 			 * to signal the worker thread because it will
    433 			 * notice when it reacquires the lock after
    434 			 * doing the work.
    435 			 */
    436 			TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    437 			wq->wq_requeued = true;
    438 			newly_queued = true;
    439 		} else {
    440 			/* It is still on the queue; nothing to do.  */
    441 			newly_queued = false;
    442 		}
    443 	}
    444 	mutex_exit(&wq->wq_lock);
    445 
    446 	return newly_queued;
    447 }
    448 
    449 bool
    450 cancel_work(struct work_struct *work)
    451 {
    452 	struct workqueue_struct *wq;
    453 	bool cancelled_p = false;
    454 
    455 	/* If there's no workqueue, nothing to cancel.   */
    456 	if ((wq = work->work_queue) == NULL)
    457 		goto out;
    458 
    459 	mutex_enter(&wq->wq_lock);
    460 	if (__predict_false(work->work_queue != wq)) {
    461 		/*
    462 		 * It has finished execution or been cancelled by
    463 		 * another thread, and has been moved off the
    464 		 * workqueue, so it's too to cancel.
    465 		 */
    466 		cancelled_p = false;
    467 	} else if (wq->wq_current_work == work) {
    468 		/*
    469 		 * It has already begun execution, so it's too late to
    470 		 * cancel now.
    471 		 */
    472 		cancelled_p = false;
    473 	} else {
    474 		/*
    475 		 * It is still on the queue.  Take it off the queue and
    476 		 * report successful cancellation.
    477 		 */
    478 		TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    479 		cancelled_p = true;
    480 	}
    481 	mutex_exit(&wq->wq_lock);
    482 
    483 out:	return cancelled_p;
    484 }
    485 
    486 bool
    487 cancel_work_sync(struct work_struct *work)
    488 {
    489 	struct workqueue_struct *wq;
    490 	bool cancelled_p = false;
    491 
    492 	/* If there's no workqueue, nothing to cancel.   */
    493 	if ((wq = work->work_queue) == NULL)
    494 		goto out;
    495 
    496 	mutex_enter(&wq->wq_lock);
    497 	if (__predict_false(work->work_queue != wq)) {
    498 		/*
    499 		 * It has finished execution or been cancelled by
    500 		 * another thread, and has been moved off the
    501 		 * workqueue, so it's too to cancel.
    502 		 */
    503 		cancelled_p = false;
    504 	} else if (wq->wq_current_work == work) {
    505 		/*
    506 		 * It has already begun execution, so it's too late to
    507 		 * cancel now.  Wait for it to complete.
    508 		 */
    509 		wait_for_current_work(work, wq);
    510 		cancelled_p = false;
    511 	} else {
    512 		/*
    513 		 * It is still on the queue.  Take it off the queue and
    514 		 * report successful cancellation.
    515 		 */
    516 		TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    517 		cancelled_p = true;
    518 	}
    519 	mutex_exit(&wq->wq_lock);
    520 
    521 out:	return cancelled_p;
    522 }
    523 
    524 /*
    525  * wait_for_current_work(work, wq)
    526  *
    527  *	wq must be currently executing work.  Wait for it to finish.
    528  */
    529 static void
    530 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
    531 {
    532 	uint64_t gen;
    533 
    534 	KASSERT(mutex_owned(&wq->wq_lock));
    535 	KASSERT(work->work_queue == wq);
    536 	KASSERT(wq->wq_current_work == work);
    537 
    538 	/* Wait only one generation in case it gets requeued quickly.  */
    539 	gen = wq->wq_gen;
    540 	do {
    541 		cv_wait(&wq->wq_cv, &wq->wq_lock);
    542 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
    543 }
    544 
    545 /*
    547  * Delayed work
    548  */
    549 
    550 void
    551 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    552 {
    553 
    554 	INIT_WORK(&dw->work, fn);
    555 	dw->dw_state = DELAYED_WORK_IDLE;
    556 	dw->dw_resched = -1;
    557 
    558 	/*
    559 	 * Defer callout_init until we are going to schedule the
    560 	 * callout, which can then callout_destroy it, because
    561 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
    562 	 * we have no opportunity to call callout_destroy.
    563 	 */
    564 }
    565 
    566 bool
    567 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    568 {
    569 
    570 	return queue_delayed_work(system_wq, dw, ticks);
    571 }
    572 
    573 /*
    574  * dw_callout_init(wq, dw)
    575  *
    576  *	Initialize the callout of dw and transition to
    577  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
    578  */
    579 static void
    580 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
    581 {
    582 
    583 	KASSERT(mutex_owned(&wq->wq_lock));
    584 	KASSERT(dw->work.work_queue == wq);
    585 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    586 
    587 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    588 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
    589 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    590 	dw->dw_state = DELAYED_WORK_SCHEDULED;
    591 }
    592 
    593 /*
    594  * dw_callout_destroy(wq, dw)
    595  *
    596  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
    597  */
    598 static void
    599 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
    600 {
    601 
    602 	KASSERT(mutex_owned(&wq->wq_lock));
    603 	KASSERT(dw->work.work_queue == wq);
    604 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
    605 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    606 	    dw->dw_state == DELAYED_WORK_CANCELLED);
    607 
    608 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    609 	callout_destroy(&dw->dw_callout);
    610 	dw->dw_resched = -1;
    611 	dw->dw_state = DELAYED_WORK_IDLE;
    612 }
    613 
    614 /*
    615  * cancel_delayed_work_done(wq, dw)
    616  *
    617  *	Complete cancellation of a delayed work: transition from
    618  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
    619  *	workqueue.  Caller must not touch dw after this returns.
    620  */
    621 static void
    622 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
    623 {
    624 
    625 	KASSERT(mutex_owned(&wq->wq_lock));
    626 	KASSERT(dw->work.work_queue == wq);
    627 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
    628 
    629 	dw_callout_destroy(wq, dw);
    630 	release_work(&dw->work, wq);
    631 	/* Can't touch dw after this point.  */
    632 }
    633 
    634 /*
    635  * queue_delayed_work(wq, dw, ticks)
    636  *
    637  *	If it is not currently scheduled, schedule dw to run after
    638  *	ticks.  If currently executing and not already rescheduled,
    639  *	reschedule it.  If ticks == 0, run without delay.
    640  */
    641 bool
    642 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    643     unsigned long ticks)
    644 {
    645 	struct workqueue_struct *wq0;
    646 	bool newly_queued;
    647 
    648 	mutex_enter(&wq->wq_lock);
    649 	if (__predict_true((wq0 = acquire_work(&dw->work, wq)) == NULL)) {
    650 		/*
    651 		 * It wasn't on any workqueue at all.  Schedule it to
    652 		 * run on this one.
    653 		 */
    654 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    655 		if (ticks == 0) {
    656 			TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work,
    657 			    work_entry);
    658 			cv_broadcast(&wq->wq_cv);
    659 		} else {
    660 			/*
    661 			 * Initialize a callout and schedule to run
    662 			 * after a delay.
    663 			 */
    664 			dw_callout_init(wq, dw);
    665 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    666 		}
    667 		newly_queued = true;
    668 	} else {
    669 		/*
    670 		 * It was on a workqueue, which had better be this one.
    671 		 *
    672 		 * - If it has already begun to run, and it is not yet
    673 		 *   scheduled to run again, schedule it again.
    674 		 *
    675 		 * - If the callout is cancelled, reschedule it.
    676 		 *
    677 		 * - Otherwise, leave it alone.
    678 		 */
    679 		KASSERT(wq0 == wq);
    680 		if (wq->wq_current_work != &dw->work || !wq->wq_requeued) {
    681 			/*
    682 			 * It is either scheduled, on the queue but not
    683 			 * in progress, or in progress but not on the
    684 			 * queue.
    685 			 */
    686 			switch (dw->dw_state) {
    687 			case DELAYED_WORK_IDLE:
    688 				/*
    689 				 * It is not scheduled to run, and it
    690 				 * is not on the queue if it is
    691 				 * running.
    692 				 */
    693 				if (ticks == 0) {
    694 					/*
    695 					 * If it's in progress, put it
    696 					 * on the queue to run as soon
    697 					 * as the worker thread gets to
    698 					 * it.  No need for a wakeup
    699 					 * because either the worker
    700 					 * thread already knows it is
    701 					 * on the queue, or will check
    702 					 * once it is done executing.
    703 					 */
    704 					if (wq->wq_current_work == &dw->work) {
    705 						KASSERT(!wq->wq_requeued);
    706 						TAILQ_INSERT_TAIL(&wq->wq_queue,
    707 						    &dw->work, work_entry);
    708 						wq->wq_requeued = true;
    709 					}
    710 				} else {
    711 					/*
    712 					 * Initialize a callout and
    713 					 * schedule it to run after the
    714 					 * specified delay.
    715 					 */
    716 					dw_callout_init(wq, dw);
    717 					callout_schedule(&dw->dw_callout,
    718 					    MIN(INT_MAX, ticks));
    719 				}
    720 				break;
    721 			case DELAYED_WORK_SCHEDULED:
    722 			case DELAYED_WORK_RESCHEDULED:
    723 				/*
    724 				 * It is already scheduled to run after
    725 				 * a delay.  Leave it be.
    726 				 */
    727 				break;
    728 			case DELAYED_WORK_CANCELLED:
    729 				/*
    730 				 * It was scheduled and the callout has
    731 				 * begun to execute, but it was
    732 				 * cancelled.  Reschedule it.
    733 				 */
    734 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    735 				dw->dw_resched = MIN(INT_MAX, ticks);
    736 				break;
    737 			default:
    738 				panic("invalid delayed work state: %d",
    739 				    dw->dw_state);
    740 			}
    741 		} else {
    742 			/*
    743 			 * It is in progress and it has been requeued.
    744 			 * It cannot be scheduled to run after a delay
    745 			 * at this point.  We just leave it be.
    746 			 */
    747 			KASSERTMSG((dw->dw_state == DELAYED_WORK_IDLE),
    748 			    "delayed work %p in wrong state: %d",
    749 			    dw, dw->dw_state);
    750 		}
    751 	}
    752 	mutex_exit(&wq->wq_lock);
    753 
    754 	return newly_queued;
    755 }
    756 
    757 /*
    758  * mod_delayed_work(wq, dw, ticks)
    759  *
    760  *	Schedule dw to run after ticks.  If currently scheduled,
    761  *	reschedule it.  If currently executing, reschedule it.  If
    762  *	ticks == 0, run without delay.
    763  */
    764 bool
    765 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    766     unsigned long ticks)
    767 {
    768 	struct workqueue_struct *wq0;
    769 	bool timer_modified;
    770 
    771 	mutex_enter(&wq->wq_lock);
    772 	if ((wq0 = acquire_work(&dw->work, wq)) == NULL) {
    773 		/*
    774 		 * It wasn't on any workqueue at all.  Schedule it to
    775 		 * run on this one.
    776 		 */
    777 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    778 		if (ticks == 0) {
    779 			/*
    780 			 * Run immediately: put it on the queue and
    781 			 * signal the worker thread.
    782 			 */
    783 			TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work,
    784 			    work_entry);
    785 			cv_broadcast(&wq->wq_cv);
    786 		} else {
    787 			/*
    788 			 * Initialize a callout and schedule to run
    789 			 * after a delay.
    790 			 */
    791 			dw_callout_init(wq, dw);
    792 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    793 		}
    794 		timer_modified = false;
    795 	} else {
    796 		/* It was on a workqueue, which had better be this one.  */
    797 		KASSERT(wq0 == wq);
    798 		switch (dw->dw_state) {
    799 		case DELAYED_WORK_IDLE:
    800 			/*
    801 			 * It is not scheduled: it is on the queue or
    802 			 * it is running or both.
    803 			 */
    804 			if (wq->wq_current_work != &dw->work ||
    805 			    wq->wq_requeued) {
    806 				/*
    807 				 * It is on the queue, and it may or
    808 				 * may not be running.
    809 				 */
    810 				if (ticks == 0) {
    811 					/*
    812 					 * We ask it to run
    813 					 * immediately.  Leave it on
    814 					 * the queue.
    815 					 */
    816 				} else {
    817 					/*
    818 					 * Take it off the queue and
    819 					 * schedule a callout to run it
    820 					 * after a delay.
    821 					 */
    822 					if (wq->wq_requeued) {
    823 						wq->wq_requeued = false;
    824 					} else {
    825 						KASSERT(wq->wq_current_work !=
    826 						    &dw->work);
    827 					}
    828 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    829 					    work_entry);
    830 					dw_callout_init(wq, dw);
    831 					callout_schedule(&dw->dw_callout,
    832 					    MIN(INT_MAX, ticks));
    833 				}
    834 				timer_modified = true;
    835 			} else {
    836 				/*
    837 				 * It is currently running and has not
    838 				 * been requeued.
    839 				 */
    840 				if (ticks == 0) {
    841 					/*
    842 					 * We ask it to run
    843 					 * immediately.  Put it on the
    844 					 * queue again.
    845 					 */
    846 					wq->wq_requeued = true;
    847 					TAILQ_INSERT_TAIL(&wq->wq_queue,
    848 					    &dw->work, work_entry);
    849 				} else {
    850 					/*
    851 					 * Schedule a callout to run it
    852 					 * after a delay.
    853 					 */
    854 					dw_callout_init(wq, dw);
    855 					callout_schedule(&dw->dw_callout,
    856 					    MIN(INT_MAX, ticks));
    857 				}
    858 				timer_modified = false;
    859 			}
    860 			break;
    861 		case DELAYED_WORK_SCHEDULED:
    862 			/*
    863 			 * It is scheduled to run after a delay.  Try
    864 			 * to stop it and reschedule it; if we can't,
    865 			 * either reschedule it or cancel it to put it
    866 			 * on the queue, and inform the callout.
    867 			 */
    868 			if (callout_stop(&dw->dw_callout)) {
    869 				/* Can't stop, callout has begun.  */
    870 				if (ticks == 0) {
    871 					/*
    872 					 * We don't actually need to do
    873 					 * anything.  The callout will
    874 					 * queue it as soon as it gets
    875 					 * the lock.
    876 					 */
    877 				} else {
    878 					/* Ask the callout to reschedule.  */
    879 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
    880 					dw->dw_resched = MIN(INT_MAX, ticks);
    881 				}
    882 			} else {
    883 				/* We stopped the callout before it began.  */
    884 				if (ticks == 0) {
    885 					/*
    886 					 * Run immediately: destroy the
    887 					 * callout, put it on the
    888 					 * queue, and signal the worker
    889 					 * thread.
    890 					 */
    891 					dw_callout_destroy(wq, dw);
    892 					TAILQ_INSERT_TAIL(&wq->wq_queue,
    893 					    &dw->work, work_entry);
    894 					cv_broadcast(&wq->wq_cv);
    895 				} else {
    896 					/*
    897 					 * Reschedule the callout.  No
    898 					 * state change.
    899 					 */
    900 					callout_schedule(&dw->dw_callout,
    901 					    MIN(INT_MAX, ticks));
    902 				}
    903 			}
    904 			timer_modified = true;
    905 			break;
    906 		case DELAYED_WORK_RESCHEDULED:
    907 			/*
    908 			 * Someone rescheduled it after the callout
    909 			 * started but before the poor thing even had a
    910 			 * chance to acquire the lock.
    911 			 */
    912 			if (ticks == 0) {
    913 				/*
    914 				 * We can just switch back to
    915 				 * DELAYED_WORK_SCHEDULED so that the
    916 				 * callout will queue the work as soon
    917 				 * as it gets the lock.
    918 				 */
    919 				dw->dw_state = DELAYED_WORK_SCHEDULED;
    920 				dw->dw_resched = -1;
    921 			} else {
    922 				/* Change the rescheduled time.  */
    923 				dw->dw_resched = ticks;
    924 			}
    925 			timer_modified = true;
    926 			break;
    927 		case DELAYED_WORK_CANCELLED:
    928 			/*
    929 			 * Someone cancelled it after the callout
    930 			 * started but before the poor thing even had a
    931 			 * chance to acquire the lock.
    932 			 */
    933 			if (ticks == 0) {
    934 				/*
    935 				 * We can just switch back to
    936 				 * DELAYED_WORK_SCHEDULED so that the
    937 				 * callout will queue the work as soon
    938 				 * as it gets the lock.
    939 				 */
    940 				dw->dw_state = DELAYED_WORK_SCHEDULED;
    941 			} else {
    942 				/* Reschedule it.  */
    943 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    944 				dw->dw_resched = MIN(INT_MAX, ticks);
    945 			}
    946 			timer_modified = true;
    947 			break;
    948 		default:
    949 			panic("invalid delayed work state: %d", dw->dw_state);
    950 		}
    951 	}
    952 	mutex_exit(&wq->wq_lock);
    953 
    954 	return timer_modified;
    955 }
    956 
    957 bool
    958 cancel_delayed_work(struct delayed_work *dw)
    959 {
    960 	struct workqueue_struct *wq;
    961 	bool cancelled_p;
    962 
    963 	/* If there's no workqueue, nothing to cancel.   */
    964 	if ((wq = dw->work.work_queue) == NULL)
    965 		return false;
    966 
    967 	mutex_enter(&wq->wq_lock);
    968 	if (__predict_false(dw->work.work_queue != wq)) {
    969 		cancelled_p = false;
    970 	} else {
    971 		switch (dw->dw_state) {
    972 		case DELAYED_WORK_IDLE:
    973 			/*
    974 			 * It is either on the queue or already running
    975 			 * or both.
    976 			 */
    977 			if (wq->wq_current_work != &dw->work ||
    978 			    wq->wq_requeued) {
    979 				/*
    980 				 * It is on the queue, and it may or
    981 				 * may not be running.  Remove it from
    982 				 * the queue.
    983 				 */
    984 				TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    985 				    work_entry);
    986 				if (wq->wq_current_work == &dw->work) {
    987 					/*
    988 					 * If it is running, then it
    989 					 * must have been requeued in
    990 					 * this case, so mark it no
    991 					 * longer requeued.
    992 					 */
    993 					KASSERT(wq->wq_requeued);
    994 					wq->wq_requeued = false;
    995 				}
    996 				cancelled_p = true;
    997 			} else {
    998 				/*
    999 				 * Too late, it's already running, but
   1000 				 * at least it hasn't been requeued.
   1001 				 */
   1002 				cancelled_p = false;
   1003 			}
   1004 			break;
   1005 		case DELAYED_WORK_SCHEDULED:
   1006 			/*
   1007 			 * If it is scheduled, mark it cancelled and
   1008 			 * try to stop the callout before it starts.
   1009 			 *
   1010 			 * If it's too late and the callout has already
   1011 			 * begun to execute, tough.
   1012 			 *
   1013 			 * If we stopped the callout before it started,
   1014 			 * however, then destroy the callout and
   1015 			 * dissociate it from the workqueue ourselves.
   1016 			 */
   1017 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1018 			cancelled_p = true;
   1019 			if (!callout_stop(&dw->dw_callout))
   1020 				cancel_delayed_work_done(wq, dw);
   1021 			break;
   1022 		case DELAYED_WORK_RESCHEDULED:
   1023 			/*
   1024 			 * If it is being rescheduled, the callout has
   1025 			 * already fired.  We must ask it to cancel.
   1026 			 */
   1027 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1028 			dw->dw_resched = -1;
   1029 			cancelled_p = true;
   1030 			break;
   1031 		case DELAYED_WORK_CANCELLED:
   1032 			/*
   1033 			 * If it is being cancelled, the callout has
   1034 			 * already fired.  There is nothing more for us
   1035 			 * to do.  Someone else claims credit for
   1036 			 * cancelling it.
   1037 			 */
   1038 			cancelled_p = false;
   1039 			break;
   1040 		default:
   1041 			panic("invalid delayed work state: %d",
   1042 			    dw->dw_state);
   1043 		}
   1044 	}
   1045 	mutex_exit(&wq->wq_lock);
   1046 
   1047 	return cancelled_p;
   1048 }
   1049 
   1050 bool
   1051 cancel_delayed_work_sync(struct delayed_work *dw)
   1052 {
   1053 	struct workqueue_struct *wq;
   1054 	bool cancelled_p;
   1055 
   1056 	/* If there's no workqueue, nothing to cancel.  */
   1057 	if ((wq = dw->work.work_queue) == NULL)
   1058 		return false;
   1059 
   1060 	mutex_enter(&wq->wq_lock);
   1061 	if (__predict_false(dw->work.work_queue != wq)) {
   1062 		cancelled_p = false;
   1063 	} else {
   1064 		switch (dw->dw_state) {
   1065 		case DELAYED_WORK_IDLE:
   1066 			/*
   1067 			 * It is either on the queue or already running
   1068 			 * or both.
   1069 			 */
   1070 			if (wq->wq_current_work == &dw->work) {
   1071 				/*
   1072 				 * Too late, it's already running.
   1073 				 * First, make sure it's not requeued.
   1074 				 * Then wait for it to complete.
   1075 				 */
   1076 				if (wq->wq_requeued) {
   1077 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
   1078 					    work_entry);
   1079 					wq->wq_requeued = false;
   1080 				}
   1081 				wait_for_current_work(&dw->work, wq);
   1082 				cancelled_p = false;
   1083 			} else {
   1084 				/* Got in before it started.  Remove it.  */
   1085 				TAILQ_REMOVE(&wq->wq_queue, &dw->work,
   1086 				    work_entry);
   1087 				cancelled_p = true;
   1088 			}
   1089 			break;
   1090 		case DELAYED_WORK_SCHEDULED:
   1091 			/*
   1092 			 * If it is scheduled, mark it cancelled and
   1093 			 * try to stop the callout before it starts.
   1094 			 *
   1095 			 * If it's too late and the callout has already
   1096 			 * begun to execute, we must wait for it to
   1097 			 * complete.  But we got in soon enough to ask
   1098 			 * the callout not to run, so we successfully
   1099 			 * cancelled it in that case.
   1100 			 *
   1101 			 * If we stopped the callout before it started,
   1102 			 * then we must destroy the callout and
   1103 			 * dissociate it from the workqueue ourselves.
   1104 			 */
   1105 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1106 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1107 				cancel_delayed_work_done(wq, dw);
   1108 			cancelled_p = true;
   1109 			break;
   1110 		case DELAYED_WORK_RESCHEDULED:
   1111 			/*
   1112 			 * If it is being rescheduled, the callout has
   1113 			 * already fired.  We must ask it to cancel and
   1114 			 * wait for it to complete.
   1115 			 */
   1116 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1117 			dw->dw_resched = -1;
   1118 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1119 			cancelled_p = true;
   1120 			break;
   1121 		case DELAYED_WORK_CANCELLED:
   1122 			/*
   1123 			 * If it is being cancelled, the callout has
   1124 			 * already fired.  We need only wait for it to
   1125 			 * complete.  Someone else, however, claims
   1126 			 * credit for cancelling it.
   1127 			 */
   1128 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1129 			cancelled_p = false;
   1130 			break;
   1131 		default:
   1132 			panic("invalid delayed work state: %d",
   1133 			    dw->dw_state);
   1134 		}
   1135 	}
   1136 	mutex_exit(&wq->wq_lock);
   1137 
   1138 	return cancelled_p;
   1139 }
   1140 
   1141 /*
   1143  * Flush
   1144  */
   1145 
   1146 void
   1147 flush_scheduled_work(void)
   1148 {
   1149 
   1150 	flush_workqueue(system_wq);
   1151 }
   1152 
   1153 static void
   1154 flush_workqueue_locked(struct workqueue_struct *wq)
   1155 {
   1156 	uint64_t gen;
   1157 
   1158 	KASSERT(mutex_owned(&wq->wq_lock));
   1159 
   1160 	/* Get the current generation number.  */
   1161 	gen = wq->wq_gen;
   1162 
   1163 	/*
   1164 	 * If there's a batch of work in progress, we must wait for the
   1165 	 * worker thread to finish that batch.
   1166 	 */
   1167 	if (wq->wq_current_work != NULL)
   1168 		gen++;
   1169 
   1170 	/*
   1171 	 * If there's any work yet to be claimed from the queue by the
   1172 	 * worker thread, we must wait for it to finish one more batch
   1173 	 * too.
   1174 	 */
   1175 	if (!TAILQ_EMPTY(&wq->wq_queue))
   1176 		gen++;
   1177 
   1178 	/* Wait until the generation number has caught up.  */
   1179 	while (wq->wq_gen < gen)
   1180 		cv_wait(&wq->wq_cv, &wq->wq_lock);
   1181 }
   1182 
   1183 void
   1184 flush_workqueue(struct workqueue_struct *wq)
   1185 {
   1186 
   1187 	mutex_enter(&wq->wq_lock);
   1188 	flush_workqueue_locked(wq);
   1189 	mutex_exit(&wq->wq_lock);
   1190 }
   1191 
   1192 void
   1193 flush_work(struct work_struct *work)
   1194 {
   1195 	struct workqueue_struct *wq;
   1196 
   1197 	/* If there's no workqueue, nothing to flush.  */
   1198 	if ((wq = work->work_queue) == NULL)
   1199 		return;
   1200 
   1201 	flush_workqueue(wq);
   1202 }
   1203 
   1204 void
   1205 flush_delayed_work(struct delayed_work *dw)
   1206 {
   1207 	struct workqueue_struct *wq;
   1208 
   1209 	/* If there's no workqueue, nothing to flush.  */
   1210 	if ((wq = dw->work.work_queue) == NULL)
   1211 		return;
   1212 
   1213 	mutex_enter(&wq->wq_lock);
   1214 	if (__predict_true(dw->work.work_queue == wq)) {
   1215 		switch (dw->dw_state) {
   1216 		case DELAYED_WORK_IDLE:
   1217 			/*
   1218 			 * It has a workqueue assigned and the callout
   1219 			 * is idle, so it must be in progress or on the
   1220 			 * queue.  In that case, wait for it to
   1221 			 * complete.  Waiting for the whole queue to
   1222 			 * flush is overkill, but doesn't hurt.
   1223 			 */
   1224 			flush_workqueue_locked(wq);
   1225 			break;
   1226 		case DELAYED_WORK_SCHEDULED:
   1227 			/*
   1228 			 * If it is scheduled, mark it cancelled and
   1229 			 * try to stop the callout before it starts.
   1230 			 *
   1231 			 * If it's too late and the callout has already
   1232 			 * begun to execute, we must wait for it to
   1233 			 * complete.  But we got in soon enough to ask
   1234 			 * the callout not to run.
   1235 			 *
   1236 			 * If we stopped the callout before it started,
   1237 			 * then we must destroy the callout and
   1238 			 * dissociate it from the workqueue ourselves.
   1239 			 */
   1240 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1241 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1242 				cancel_delayed_work_done(wq, dw);
   1243 			break;
   1244 		case DELAYED_WORK_RESCHEDULED:
   1245 			/*
   1246 			 * If it is being rescheduled, the callout has
   1247 			 * already fired.  We must ask it to cancel and
   1248 			 * wait for it to complete.
   1249 			 */
   1250 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1251 			dw->dw_resched = -1;
   1252 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1253 			break;
   1254 		case DELAYED_WORK_CANCELLED:
   1255 			/*
   1256 			 * If it is being cancelled, the callout has
   1257 			 * already fired.  We need only wait for it to
   1258 			 * complete.
   1259 			 */
   1260 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
   1261 			break;
   1262 		default:
   1263 			panic("invalid delayed work state: %d",
   1264 			    dw->dw_state);
   1265 		}
   1266 	}
   1267 	mutex_exit(&wq->wq_lock);
   1268 }
   1269