Home | History | Annotate | Line # | Download | only in linux
linux_work.c revision 1.30
      1 /*	$NetBSD: linux_work.c,v 1.30 2018/08/27 15:03:45 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.30 2018/08/27 15:03:45 riastradh Exp $");
     34 
     35 #include <sys/types.h>
     36 #include <sys/atomic.h>
     37 #include <sys/callout.h>
     38 #include <sys/condvar.h>
     39 #include <sys/errno.h>
     40 #include <sys/kmem.h>
     41 #include <sys/kthread.h>
     42 #include <sys/lwp.h>
     43 #include <sys/mutex.h>
     44 #include <sys/queue.h>
     45 
     46 #include <linux/workqueue.h>
     47 
     48 struct workqueue_struct {
     49 	kmutex_t			wq_lock;
     50 	kcondvar_t			wq_cv;
     51 	TAILQ_HEAD(, delayed_work)	wq_delayed;
     52 	TAILQ_HEAD(, work_struct)	wq_queue;
     53 	struct work_struct		*wq_current_work;
     54 	int				wq_flags;
     55 	struct lwp			*wq_lwp;
     56 	uint64_t			wq_gen;
     57 	bool				wq_requeued:1;
     58 	bool				wq_dying:1;
     59 };
     60 
     61 static void __dead	linux_workqueue_thread(void *);
     62 static void		linux_workqueue_timeout(void *);
     63 static struct workqueue_struct *
     64 			acquire_work(struct work_struct *,
     65 			    struct workqueue_struct *);
     66 static void		release_work(struct work_struct *,
     67 			    struct workqueue_struct *);
     68 static void		dw_callout_init(struct workqueue_struct *,
     69 			    struct delayed_work *);
     70 static void		cancel_delayed_work_done(struct workqueue_struct *,
     71 			    struct delayed_work *);
     72 
     73 static specificdata_key_t workqueue_key __read_mostly;
     74 
     75 struct workqueue_struct	*system_wq __read_mostly;
     76 struct workqueue_struct	*system_long_wq __read_mostly;
     77 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
     78 
     79 int
     80 linux_workqueue_init(void)
     81 {
     82 	int error;
     83 
     84 	error = lwp_specific_key_create(&workqueue_key, NULL);
     85 	if (error)
     86 		goto fail0;
     87 
     88 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
     89 	if (system_wq == NULL) {
     90 		error = ENOMEM;
     91 		goto fail1;
     92 	}
     93 
     94 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
     95 	if (system_long_wq == NULL) {
     96 		error = ENOMEM;
     97 		goto fail2;
     98 	}
     99 
    100 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
    101 	if (system_long_wq == NULL) {
    102 		error = ENOMEM;
    103 		goto fail3;
    104 	}
    105 
    106 	return 0;
    107 
    108 fail4: __unused
    109 	destroy_workqueue(system_power_efficient_wq);
    110 fail3:	destroy_workqueue(system_long_wq);
    111 fail2:	destroy_workqueue(system_wq);
    112 fail1:	lwp_specific_key_delete(workqueue_key);
    113 fail0:	KASSERT(error);
    114 	return error;
    115 }
    116 
    117 void
    118 linux_workqueue_fini(void)
    119 {
    120 
    121 	destroy_workqueue(system_power_efficient_wq);
    122 	destroy_workqueue(system_long_wq);
    123 	destroy_workqueue(system_wq);
    124 	lwp_specific_key_delete(workqueue_key);
    125 }
    126 
    127 /*
    129  * Workqueues
    130  */
    131 
    132 struct workqueue_struct *
    133 alloc_ordered_workqueue(const char *name, int flags)
    134 {
    135 	struct workqueue_struct *wq;
    136 	int error;
    137 
    138 	KASSERT(flags == 0);
    139 
    140 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
    141 
    142 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_NONE);
    143 	cv_init(&wq->wq_cv, name);
    144 	TAILQ_INIT(&wq->wq_delayed);
    145 	TAILQ_INIT(&wq->wq_queue);
    146 	wq->wq_current_work = NULL;
    147 	wq->wq_flags = 0;
    148 	wq->wq_lwp = NULL;
    149 	wq->wq_gen = 0;
    150 	wq->wq_requeued = false;
    151 	wq->wq_dying = false;
    152 
    153 	error = kthread_create(PRI_NONE,
    154 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
    155 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
    156 	if (error)
    157 		goto fail0;
    158 
    159 	return wq;
    160 
    161 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    162 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    163 	cv_destroy(&wq->wq_cv);
    164 	mutex_destroy(&wq->wq_lock);
    165 	kmem_free(wq, sizeof(*wq));
    166 	return NULL;
    167 }
    168 
    169 void
    170 destroy_workqueue(struct workqueue_struct *wq)
    171 {
    172 
    173 	/*
    174 	 * Cancel all delayed work.  We do this first because any
    175 	 * delayed work that that has already timed out, which we can't
    176 	 * cancel, may have queued new work.
    177 	 */
    178 	mutex_enter(&wq->wq_lock);
    179 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
    180 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
    181 
    182 		KASSERT(dw->work.work_queue == wq);
    183 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
    184 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
    185 			dw->dw_state == DELAYED_WORK_CANCELLED),
    186 		    "delayed work %p in bad state: %d",
    187 		    dw, dw->dw_state);
    188 
    189 		/*
    190 		 * Mark it cancelled and try to stop the callout before
    191 		 * it starts.
    192 		 *
    193 		 * If it's too late and the callout has already begun
    194 		 * to execute, then it will notice that we asked to
    195 		 * cancel it and remove itself from the queue before
    196 		 * returning.
    197 		 *
    198 		 * If we stopped the callout before it started,
    199 		 * however, then we can safely destroy the callout and
    200 		 * dissociate it from the workqueue ourselves.
    201 		 */
    202 		dw->dw_state = DELAYED_WORK_CANCELLED;
    203 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
    204 			cancel_delayed_work_done(wq, dw);
    205 	}
    206 	mutex_exit(&wq->wq_lock);
    207 
    208 	/*
    209 	 * At this point, no new work can be put on the queue.
    210 	 */
    211 
    212 	/* Tell the thread to exit.  */
    213 	mutex_enter(&wq->wq_lock);
    214 	wq->wq_dying = true;
    215 	cv_broadcast(&wq->wq_cv);
    216 	mutex_exit(&wq->wq_lock);
    217 
    218 	/* Wait for it to exit.  */
    219 	(void)kthread_join(wq->wq_lwp);
    220 
    221 	KASSERT(wq->wq_dying);
    222 	KASSERT(!wq->wq_requeued);
    223 	KASSERT(wq->wq_flags == 0);
    224 	KASSERT(wq->wq_current_work == NULL);
    225 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
    226 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
    227 	cv_destroy(&wq->wq_cv);
    228 	mutex_destroy(&wq->wq_lock);
    229 
    230 	kmem_free(wq, sizeof(*wq));
    231 }
    232 
    233 /*
    235  * Work thread and callout
    236  */
    237 
    238 static void __dead
    239 linux_workqueue_thread(void *cookie)
    240 {
    241 	struct workqueue_struct *const wq = cookie;
    242 	TAILQ_HEAD(, work_struct) tmp;
    243 
    244 	lwp_setspecific(workqueue_key, wq);
    245 
    246 	mutex_enter(&wq->wq_lock);
    247 	for (;;) {
    248 		/*
    249 		 * Wait until there's activity.  If there's no work and
    250 		 * we're dying, stop here.
    251 		 */
    252 		while (TAILQ_EMPTY(&wq->wq_queue) && !wq->wq_dying)
    253 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    254 		if (TAILQ_EMPTY(&wq->wq_queue)) {
    255 			KASSERT(wq->wq_dying);
    256 			break;
    257 		}
    258 
    259 		/* Grab a batch of work off the queue.  */
    260 		KASSERT(!TAILQ_EMPTY(&wq->wq_queue));
    261 		TAILQ_INIT(&tmp);
    262 		TAILQ_CONCAT(&tmp, &wq->wq_queue, work_entry);
    263 
    264 		/* Process each work item in the batch.  */
    265 		while (!TAILQ_EMPTY(&tmp)) {
    266 			struct work_struct *const work = TAILQ_FIRST(&tmp);
    267 
    268 			KASSERT(work->work_queue == wq);
    269 			TAILQ_REMOVE(&tmp, work, work_entry);
    270 			KASSERT(wq->wq_current_work == NULL);
    271 			wq->wq_current_work = work;
    272 
    273 			mutex_exit(&wq->wq_lock);
    274 			(*work->func)(work);
    275 			mutex_enter(&wq->wq_lock);
    276 
    277 			KASSERT(wq->wq_current_work == work);
    278 			KASSERT(work->work_queue == wq);
    279 			if (wq->wq_requeued)
    280 				wq->wq_requeued = false;
    281 			else
    282 				release_work(work, wq);
    283 			wq->wq_current_work = NULL;
    284 			cv_broadcast(&wq->wq_cv);
    285 		}
    286 
    287 		/* Notify flush that we've completed a batch of work.  */
    288 		wq->wq_gen++;
    289 		cv_broadcast(&wq->wq_cv);
    290 	}
    291 	mutex_exit(&wq->wq_lock);
    292 
    293 	kthread_exit(0);
    294 }
    295 
    296 static void
    297 linux_workqueue_timeout(void *cookie)
    298 {
    299 	struct delayed_work *const dw = cookie;
    300 	struct workqueue_struct *const wq = dw->work.work_queue;
    301 
    302 	KASSERT(wq != NULL);
    303 
    304 	mutex_enter(&wq->wq_lock);
    305 	KASSERT(dw->work.work_queue == wq);
    306 	switch (dw->dw_state) {
    307 	case DELAYED_WORK_IDLE:
    308 		panic("delayed work callout uninitialized: %p", dw);
    309 	case DELAYED_WORK_SCHEDULED:
    310 		dw->dw_state = DELAYED_WORK_IDLE;
    311 		callout_destroy(&dw->dw_callout);
    312 		TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    313 		TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work, work_entry);
    314 		cv_broadcast(&wq->wq_cv);
    315 		break;
    316 	case DELAYED_WORK_RESCHEDULED:
    317 		dw->dw_state = DELAYED_WORK_SCHEDULED;
    318 		break;
    319 	case DELAYED_WORK_CANCELLED:
    320 		cancel_delayed_work_done(wq, dw);
    321 		/* Can't touch dw any more.  */
    322 		goto out;
    323 	default:
    324 		panic("delayed work callout in bad state: %p", dw);
    325 	}
    326 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
    327 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
    328 out:	mutex_exit(&wq->wq_lock);
    329 }
    330 
    331 struct work_struct *
    332 current_work(void)
    333 {
    334 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
    335 
    336 	/* If we're not a workqueue thread, then there's no work.  */
    337 	if (wq == NULL)
    338 		return NULL;
    339 
    340 	/*
    341 	 * Otherwise, this should be possible only while work is in
    342 	 * progress.  Return the current work item.
    343 	 */
    344 	KASSERT(wq->wq_current_work != NULL);
    345 	return wq->wq_current_work;
    346 }
    347 
    348 /*
    350  * Work
    351  */
    352 
    353 void
    354 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
    355 {
    356 
    357 	work->work_queue = NULL;
    358 	work->func = fn;
    359 }
    360 
    361 static struct workqueue_struct *
    362 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
    363 {
    364 	struct workqueue_struct *wq0;
    365 
    366 	KASSERT(mutex_owned(&wq->wq_lock));
    367 
    368 	wq0 = atomic_cas_ptr(&work->work_queue, NULL, wq);
    369 	if (wq0 == NULL) {
    370 		membar_enter();
    371 		KASSERT(work->work_queue == wq);
    372 	}
    373 
    374 	return wq0;
    375 }
    376 
    377 static void
    378 release_work(struct work_struct *work, struct workqueue_struct *wq)
    379 {
    380 
    381 	KASSERT(work->work_queue == wq);
    382 	KASSERT(mutex_owned(&wq->wq_lock));
    383 
    384 	membar_exit();
    385 	work->work_queue = NULL;
    386 }
    387 
    388 bool
    389 schedule_work(struct work_struct *work)
    390 {
    391 
    392 	return queue_work(system_wq, work);
    393 }
    394 
    395 bool
    396 queue_work(struct workqueue_struct *wq, struct work_struct *work)
    397 {
    398 	struct workqueue_struct *wq0;
    399 	bool newly_queued;
    400 
    401 	KASSERT(wq != NULL);
    402 
    403 	mutex_enter(&wq->wq_lock);
    404 	if (__predict_true((wq0 = acquire_work(work, wq)) == NULL)) {
    405 		/*
    406 		 * It wasn't on any workqueue at all.  Put it on this
    407 		 * one, and signal the worker thread that there is work
    408 		 * to do.
    409 		 */
    410 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    411 		newly_queued = true;
    412 		cv_broadcast(&wq->wq_cv);
    413 	} else {
    414 		/*
    415 		 * It was on a workqueue, which had better be this one.
    416 		 * Requeue it if it has been taken off the queue to
    417 		 * execute and hasn't been requeued yet.  The worker
    418 		 * thread should already be running, so no need to
    419 		 * signal it.
    420 		 */
    421 		KASSERT(wq0 == wq);
    422 		if (wq->wq_current_work == work && !wq->wq_requeued) {
    423 			/*
    424 			 * It has been taken off the queue to execute,
    425 			 * and it hasn't been put back on the queue
    426 			 * again.  Put it back on the queue.  No need
    427 			 * to signal the worker thread because it will
    428 			 * notice when it reacquires the lock after
    429 			 * doing the work.
    430 			 */
    431 			TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
    432 			wq->wq_requeued = true;
    433 			newly_queued = true;
    434 		} else {
    435 			/* It is still on the queue; nothing to do.  */
    436 			newly_queued = false;
    437 		}
    438 	}
    439 	mutex_exit(&wq->wq_lock);
    440 
    441 	return newly_queued;
    442 }
    443 
    444 bool
    445 cancel_work(struct work_struct *work)
    446 {
    447 	struct workqueue_struct *wq;
    448 	bool cancelled_p = false;
    449 
    450 	/* If there's no workqueue, nothing to cancel.   */
    451 	if ((wq = work->work_queue) == NULL)
    452 		goto out;
    453 
    454 	mutex_enter(&wq->wq_lock);
    455 	if (__predict_false(work->work_queue != wq)) {
    456 		/*
    457 		 * It has finished execution or been cancelled by
    458 		 * another thread, and has been moved off the
    459 		 * workqueue, so it's too to cancel.
    460 		 */
    461 		cancelled_p = false;
    462 	} else if (wq->wq_current_work == work) {
    463 		/*
    464 		 * It has already begun execution, so it's too late to
    465 		 * cancel now.
    466 		 */
    467 		cancelled_p = false;
    468 	} else {
    469 		/*
    470 		 * It is still on the queue.  Take it off the queue and
    471 		 * report successful cancellation.
    472 		 */
    473 		TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    474 		cancelled_p = true;
    475 	}
    476 	mutex_exit(&wq->wq_lock);
    477 
    478 out:	return cancelled_p;
    479 }
    480 
    481 bool
    482 cancel_work_sync(struct work_struct *work)
    483 {
    484 	struct workqueue_struct *wq;
    485 	bool cancelled_p = false;
    486 
    487 	/* If there's no workqueue, nothing to cancel.   */
    488 	if ((wq = work->work_queue) == NULL)
    489 		goto out;
    490 
    491 	mutex_enter(&wq->wq_lock);
    492 	if (__predict_false(work->work_queue != wq)) {
    493 		/*
    494 		 * It has finished execution or been cancelled by
    495 		 * another thread, and has been moved off the
    496 		 * workqueue, so it's too to cancel.
    497 		 */
    498 		cancelled_p = false;
    499 	} else if (wq->wq_current_work == work) {
    500 		/*
    501 		 * It has already begun execution, so it's too late to
    502 		 * cancel now.  Wait for it to complete.  Don't wait
    503 		 * more than one generation in case it gets requeued.
    504 		 */
    505 		uint64_t gen = wq->wq_gen;
    506 		do {
    507 			cv_wait(&wq->wq_cv, &wq->wq_lock);
    508 		} while (wq->wq_current_work == work && wq->wq_gen == gen);
    509 		cancelled_p = false;
    510 	} else {
    511 		/*
    512 		 * It is still on the queue.  Take it off the queue and
    513 		 * report successful cancellation.
    514 		 */
    515 		TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
    516 		cancelled_p = true;
    517 	}
    518 	mutex_exit(&wq->wq_lock);
    519 
    520 out:	return cancelled_p;
    521 }
    522 
    523 /*
    525  * Delayed work
    526  */
    527 
    528 void
    529 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
    530 {
    531 
    532 	INIT_WORK(&dw->work, fn);
    533 	dw->dw_state = DELAYED_WORK_IDLE;
    534 
    535 	/*
    536 	 * Defer callout_init until we are going to schedule the
    537 	 * callout, which can then callout_destroy it, because
    538 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
    539 	 * we have no opportunity to call callout_destroy.
    540 	 */
    541 }
    542 
    543 bool
    544 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
    545 {
    546 
    547 	return queue_delayed_work(system_wq, dw, ticks);
    548 }
    549 
    550 /*
    551  * dw_callout_init(wq, dw)
    552  *
    553  *	Initialize the callout of dw and transition to
    554  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
    555  */
    556 static void
    557 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
    558 {
    559 
    560 	KASSERT(mutex_owned(&wq->wq_lock));
    561 	KASSERT(dw->work.work_queue == wq);
    562 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    563 
    564 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
    565 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
    566 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
    567 	dw->dw_state = DELAYED_WORK_SCHEDULED;
    568 }
    569 
    570 /*
    571  * cancel_delayed_work_done(wq, dw)
    572  *
    573  *	Complete cancellation of a delayed work: transition from
    574  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
    575  *	workqueue.  Caller must not touch dw after this returns.
    576  */
    577 static void
    578 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
    579 {
    580 
    581 	KASSERT(mutex_owned(&wq->wq_lock));
    582 	KASSERT(dw->work.work_queue == wq);
    583 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
    584 	dw->dw_state = DELAYED_WORK_IDLE;
    585 	callout_destroy(&dw->dw_callout);
    586 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
    587 	release_work(&dw->work, wq);
    588 	/* Can't touch dw after this point.  */
    589 }
    590 
    591 /*
    592  * queue_delayed_work(wq, dw, ticks)
    593  *
    594  *	If it is not currently scheduled, schedule dw to run after
    595  *	ticks.  If currently executing and not already rescheduled,
    596  *	reschedule it.  If ticks == 0, run without delay.
    597  */
    598 bool
    599 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    600     unsigned long ticks)
    601 {
    602 	struct workqueue_struct *wq0;
    603 	bool newly_queued;
    604 
    605 	mutex_enter(&wq->wq_lock);
    606 	if (__predict_true((wq0 = acquire_work(&dw->work, wq)) == NULL)) {
    607 		/*
    608 		 * It wasn't on any workqueue at all.  Schedule it to
    609 		 * run on this one.
    610 		 */
    611 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    612 		if (ticks == 0) {
    613 			TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work,
    614 			    work_entry);
    615 			cv_broadcast(&wq->wq_cv);
    616 		} else {
    617 			/*
    618 			 * Initialize a callout and schedule to run
    619 			 * after a delay.
    620 			 */
    621 			dw_callout_init(wq, dw);
    622 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    623 		}
    624 		newly_queued = true;
    625 	} else {
    626 		/*
    627 		 * It was on a workqueue, which had better be this one.
    628 		 *
    629 		 * - If it has already begun to run, and it is not yet
    630 		 *   scheduled to run again, schedule it again.
    631 		 *
    632 		 * - If the callout is cancelled, reschedule it.
    633 		 *
    634 		 * - Otherwise, leave it alone.
    635 		 */
    636 		KASSERT(wq0 == wq);
    637 		if (wq->wq_current_work != &dw->work || !wq->wq_requeued) {
    638 			/*
    639 			 * It is either scheduled, on the queue but not
    640 			 * in progress, or in progress but not on the
    641 			 * queue.
    642 			 */
    643 			switch (dw->dw_state) {
    644 			case DELAYED_WORK_IDLE:
    645 				/*
    646 				 * It is not scheduled to run, and it
    647 				 * is not on the queue if it is
    648 				 * running.
    649 				 */
    650 				if (ticks == 0) {
    651 					/*
    652 					 * If it's in progress, put it
    653 					 * on the queue to run as soon
    654 					 * as the worker thread gets to
    655 					 * it.  No need for a wakeup
    656 					 * because either the worker
    657 					 * thread already knows it is
    658 					 * on the queue, or will check
    659 					 * once it is done executing.
    660 					 */
    661 					if (wq->wq_current_work == &dw->work) {
    662 						KASSERT(!wq->wq_requeued);
    663 						TAILQ_INSERT_TAIL(&wq->wq_queue,
    664 						    &dw->work, work_entry);
    665 						wq->wq_requeued = true;
    666 					}
    667 				} else {
    668 					/*
    669 					 * Initialize a callout and
    670 					 * schedule it to run after the
    671 					 * specified delay.
    672 					 */
    673 					dw_callout_init(wq, dw);
    674 					callout_schedule(&dw->dw_callout,
    675 					    MIN(INT_MAX, ticks));
    676 				}
    677 				break;
    678 			case DELAYED_WORK_SCHEDULED:
    679 			case DELAYED_WORK_RESCHEDULED:
    680 				/*
    681 				 * It is already scheduled to run after
    682 				 * a delay.  Leave it be.
    683 				 */
    684 				break;
    685 			case DELAYED_WORK_CANCELLED:
    686 				/*
    687 				 * It was scheduled and the callout has
    688 				 * begun to execute, but it was
    689 				 * cancelled.  Reschedule it.
    690 				 */
    691 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    692 				callout_schedule(&dw->dw_callout,
    693 				    MIN(INT_MAX, ticks));
    694 				break;
    695 			default:
    696 				panic("invalid delayed work state: %d",
    697 				    dw->dw_state);
    698 			}
    699 		} else {
    700 			/*
    701 			 * It is in progress and it has been requeued.
    702 			 * It cannot be scheduled to run after a delay
    703 			 * at this point.  We just leave it be.
    704 			 */
    705 			KASSERTMSG((dw->dw_state == DELAYED_WORK_IDLE),
    706 			    "delayed work %p in wrong state: %d",
    707 			    dw, dw->dw_state);
    708 		}
    709 	}
    710 	mutex_exit(&wq->wq_lock);
    711 
    712 	return newly_queued;
    713 }
    714 
    715 /*
    716  * mod_delayed_work(wq, dw, ticks)
    717  *
    718  *	Schedule dw to run after ticks.  If currently scheduled,
    719  *	reschedule it.  If currently executing, reschedule it.  If
    720  *	ticks == 0, run without delay.
    721  */
    722 bool
    723 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
    724     unsigned long ticks)
    725 {
    726 	struct workqueue_struct *wq0;
    727 	bool timer_modified;
    728 
    729 	mutex_enter(&wq->wq_lock);
    730 	if ((wq0 = acquire_work(&dw->work, wq)) == NULL) {
    731 		/*
    732 		 * It wasn't on any workqueue at all.  Schedule it to
    733 		 * run on this one.
    734 		 */
    735 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
    736 		if (ticks == 0) {
    737 			/*
    738 			 * Run immediately: put it on the queue and
    739 			 * signal the worker thread.
    740 			 */
    741 			TAILQ_INSERT_TAIL(&wq->wq_queue, &dw->work,
    742 			    work_entry);
    743 			cv_broadcast(&wq->wq_cv);
    744 		} else {
    745 			/*
    746 			 * Initialize a callout and schedule to run
    747 			 * after a delay.
    748 			 */
    749 			dw_callout_init(wq, dw);
    750 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
    751 		}
    752 		timer_modified = false;
    753 	} else {
    754 		/* It was on a workqueue, which had better be this one.  */
    755 		KASSERT(wq0 == wq);
    756 		switch (dw->dw_state) {
    757 		case DELAYED_WORK_IDLE:
    758 			/*
    759 			 * It is not scheduled: it is on the queue or
    760 			 * it is running or both.
    761 			 */
    762 			if (wq->wq_current_work != &dw->work) {
    763 				/* It is on the queue and not yet running.  */
    764 				if (ticks == 0) {
    765 					/*
    766 					 * We ask it to run
    767 					 * immediately.  Leave it on
    768 					 * the queue.
    769 					 */
    770 				} else {
    771 					/*
    772 					 * Take it off the queue and
    773 					 * schedule a callout to run it
    774 					 * after a delay.
    775 					 */
    776 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    777 					    work_entry);
    778 					dw_callout_init(wq, dw);
    779 					callout_schedule(&dw->dw_callout,
    780 					    MIN(INT_MAX, ticks));
    781 				}
    782 				timer_modified = true;
    783 			} else if (wq->wq_requeued) {
    784 				/*
    785 				 * It is currently running _and_ it is
    786 				 * on the queue again.
    787 				 */
    788 				if (ticks == 0) {
    789 					/*
    790 					 * We ask it to run
    791 					 * immediately.  Leave it on
    792 					 * the queue.
    793 					 */
    794 				} else {
    795 					/*
    796 					 * Take it off the queue and
    797 					 * schedule a callout to run it
    798 					 * after a delay.
    799 					 */
    800 					wq->wq_requeued = false;
    801 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    802 					    work_entry);
    803 					dw_callout_init(wq, dw);
    804 					callout_schedule(&dw->dw_callout,
    805 					    MIN(INT_MAX, ticks));
    806 				}
    807 				timer_modified = true;
    808 			} else {
    809 				/*
    810 				 * It is currently running and has not
    811 				 * been requeued.
    812 				 */
    813 				if (ticks == 0) {
    814 					/*
    815 					 * We ask it to run
    816 					 * immediately.  Put it on the
    817 					 * queue again.
    818 					 */
    819 					wq->wq_requeued = true;
    820 					TAILQ_INSERT_TAIL(&wq->wq_queue,
    821 					    &dw->work, work_entry);
    822 				} else {
    823 					/*
    824 					 * Schedule a callout to run it
    825 					 * after a delay.
    826 					 */
    827 					dw_callout_init(wq, dw);
    828 					callout_schedule(&dw->dw_callout,
    829 					    MIN(INT_MAX, ticks));
    830 				}
    831 				timer_modified = false;
    832 			}
    833 			break;
    834 		case DELAYED_WORK_SCHEDULED:
    835 			/*
    836 			 * It is scheduled to run after a delay.  Try
    837 			 * to stop it and reschedule it; if we can't,
    838 			 * either reschedule it or cancel it to put it
    839 			 * on the queue, and inform the callout.
    840 			 */
    841 			if (callout_stop(&dw->dw_callout)) {
    842 				/* Can't stop, callout has begun.  */
    843 				if (ticks == 0) {
    844 					/*
    845 					 * We don't actually need to do
    846 					 * anything.  The callout will
    847 					 * queue it as soon as it gets
    848 					 * the lock.
    849 					 */
    850 				} else {
    851 					/*
    852 					 * Schedule callout and tell
    853 					 * the instance that's running
    854 					 * now that it's been
    855 					 * rescheduled.
    856 					 */
    857 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
    858 					callout_schedule(&dw->dw_callout,
    859 					    MIN(INT_MAX, ticks));
    860 				}
    861 			} else {
    862 				if (ticks == 0) {
    863 					/*
    864 					 * Run immediately: destroy the
    865 					 * callout, put it on the
    866 					 * queue, and signal the worker
    867 					 * thread.
    868 					 */
    869 					dw->dw_state = DELAYED_WORK_IDLE;
    870 					callout_destroy(&dw->dw_callout);
    871 					TAILQ_REMOVE(&wq->wq_delayed, dw,
    872 					    dw_entry);
    873 					TAILQ_INSERT_TAIL(&wq->wq_queue,
    874 					    &dw->work, work_entry);
    875 					cv_broadcast(&wq->wq_cv);
    876 				} else {
    877 					/*
    878 					 * Reschedule the callout.  No
    879 					 * state change.
    880 					 */
    881 					callout_schedule(&dw->dw_callout,
    882 					    MIN(INT_MAX, ticks));
    883 				}
    884 			}
    885 			timer_modified = true;
    886 			break;
    887 		case DELAYED_WORK_RESCHEDULED:
    888 		case DELAYED_WORK_CANCELLED:
    889 			/*
    890 			 * Someone modified the timer _again_, or
    891 			 * cancelled it, after the callout started but
    892 			 * before the poor thing even had a chance to
    893 			 * acquire the lock.
    894 			 */
    895 			if (ticks == 0) {
    896 				/*
    897 				 * We can just switch back to
    898 				 * DELAYED_WORK_SCHEDULED so that the
    899 				 * callout will queue the work as soon
    900 				 * as it gets the lock.
    901 				 */
    902 				dw->dw_state = DELAYED_WORK_SCHEDULED;
    903 			} else {
    904 				/* Reschedule it.  */
    905 				callout_schedule(&dw->dw_callout,
    906 				    MIN(INT_MAX, ticks));
    907 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
    908 			}
    909 			timer_modified = true;
    910 			break;
    911 		default:
    912 			panic("invalid delayed work state: %d", dw->dw_state);
    913 		}
    914 	}
    915 	mutex_exit(&wq->wq_lock);
    916 
    917 	return timer_modified;
    918 }
    919 
    920 bool
    921 cancel_delayed_work(struct delayed_work *dw)
    922 {
    923 	struct workqueue_struct *wq;
    924 	bool cancelled_p;
    925 
    926 	/* If there's no workqueue, nothing to cancel.   */
    927 	if ((wq = dw->work.work_queue) == NULL)
    928 		return false;
    929 
    930 	mutex_enter(&wq->wq_lock);
    931 	if (__predict_false(dw->work.work_queue != wq)) {
    932 		cancelled_p = false;
    933 	} else {
    934 		switch (dw->dw_state) {
    935 		case DELAYED_WORK_IDLE:
    936 			if (wq->wq_current_work == &dw->work) {
    937 				/*
    938 				 * Too late, it's already running.  If
    939 				 * it's been requeued, tough -- it'll
    940 				 * run again.
    941 				 */
    942 				cancelled_p = false;
    943 			} else {
    944 				/* Got in before it started.  Remove it.  */
    945 				TAILQ_REMOVE(&wq->wq_queue, &dw->work,
    946 				    work_entry);
    947 				cancelled_p = true;
    948 			}
    949 			break;
    950 		case DELAYED_WORK_SCHEDULED:
    951 		case DELAYED_WORK_RESCHEDULED:
    952 		case DELAYED_WORK_CANCELLED:
    953 			/*
    954 			 * If it is scheduled, mark it cancelled and
    955 			 * try to stop the callout before it starts.
    956 			 *
    957 			 * If it's too late and the callout has already
    958 			 * begun to execute, tough.
    959 			 *
    960 			 * If we stopped the callout before it started,
    961 			 * however, then destroy the callout and
    962 			 * dissociate it from the workqueue ourselves.
    963 			 */
    964 			dw->dw_state = DELAYED_WORK_CANCELLED;
    965 			cancelled_p = true;
    966 			if (!callout_stop(&dw->dw_callout))
    967 				cancel_delayed_work_done(wq, dw);
    968 			break;
    969 		default:
    970 			panic("invalid delayed work state: %d",
    971 			    dw->dw_state);
    972 		}
    973 	}
    974 	mutex_exit(&wq->wq_lock);
    975 
    976 	return cancelled_p;
    977 }
    978 
    979 bool
    980 cancel_delayed_work_sync(struct delayed_work *dw)
    981 {
    982 	struct workqueue_struct *wq;
    983 	bool cancelled_p;
    984 
    985 	/* If there's no workqueue, nothing to cancel.  */
    986 	if ((wq = dw->work.work_queue) == NULL)
    987 		return false;
    988 
    989 	mutex_enter(&wq->wq_lock);
    990 	if (__predict_false(dw->work.work_queue != wq)) {
    991 		cancelled_p = false;
    992 	} else {
    993 		switch (dw->dw_state) {
    994 		case DELAYED_WORK_IDLE:
    995 			if (wq->wq_current_work == &dw->work) {
    996 				/*
    997 				 * Too late, it's already running.
    998 				 * First, make sure it's not requeued.
    999 				 * Then wait for it to complete, at
   1000 				 * most one generation.
   1001 				 */
   1002 				uint64_t gen = wq->wq_gen;
   1003 				if (wq->wq_requeued) {
   1004 					TAILQ_REMOVE(&wq->wq_queue, &dw->work,
   1005 					    work_entry);
   1006 					wq->wq_requeued = false;
   1007 				}
   1008 				do {
   1009 					cv_wait(&wq->wq_cv, &wq->wq_lock);
   1010 				} while (wq->wq_current_work == &dw->work &&
   1011 				    wq->wq_gen == gen);
   1012 				cancelled_p = false;
   1013 			} else {
   1014 				/* Got in before it started.  Remove it.  */
   1015 				TAILQ_REMOVE(&wq->wq_queue, &dw->work,
   1016 				    work_entry);
   1017 				cancelled_p = true;
   1018 			}
   1019 			break;
   1020 		case DELAYED_WORK_SCHEDULED:
   1021 		case DELAYED_WORK_RESCHEDULED:
   1022 		case DELAYED_WORK_CANCELLED:
   1023 			/*
   1024 			 * If it is scheduled, mark it cancelled and
   1025 			 * try to stop the callout before it starts.
   1026 			 *
   1027 			 * If it's too late and the callout has already
   1028 			 * begun to execute, we must wait for it to
   1029 			 * complete.  But we got in soon enough to ask
   1030 			 * the callout not to run, so we successfully
   1031 			 * cancelled it in that case.
   1032 			 *
   1033 			 * If we stopped the callout before it started,
   1034 			 * however, then destroy the callout and
   1035 			 * dissociate it from the workqueue ourselves.
   1036 			 */
   1037 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1038 			cancelled_p = true;
   1039 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1040 				cancel_delayed_work_done(wq, dw);
   1041 			break;
   1042 		default:
   1043 			panic("invalid delayed work state: %d",
   1044 			    dw->dw_state);
   1045 		}
   1046 	}
   1047 	mutex_exit(&wq->wq_lock);
   1048 
   1049 	return cancelled_p;
   1050 }
   1051 
   1052 /*
   1054  * Flush
   1055  */
   1056 
   1057 void
   1058 flush_scheduled_work(void)
   1059 {
   1060 
   1061 	flush_workqueue(system_wq);
   1062 }
   1063 
   1064 static void
   1065 flush_workqueue_locked(struct workqueue_struct *wq)
   1066 {
   1067 	uint64_t gen;
   1068 
   1069 	KASSERT(mutex_owned(&wq->wq_lock));
   1070 
   1071 	/* Get the current generation number.  */
   1072 	gen = wq->wq_gen;
   1073 
   1074 	/*
   1075 	 * If there's a batch of work in progress, we must wait for the
   1076 	 * worker thread to finish that batch.
   1077 	 */
   1078 	if (wq->wq_current_work != NULL)
   1079 		gen++;
   1080 
   1081 	/*
   1082 	 * If there's any work yet to be claimed from the queue by the
   1083 	 * worker thread, we must wait for it to finish one more batch
   1084 	 * too.
   1085 	 */
   1086 	if (!TAILQ_EMPTY(&wq->wq_queue))
   1087 		gen++;
   1088 
   1089 	/* Wait until the generation number has caught up.  */
   1090 	while (wq->wq_gen < gen)
   1091 		cv_wait(&wq->wq_cv, &wq->wq_lock);
   1092 }
   1093 
   1094 void
   1095 flush_workqueue(struct workqueue_struct *wq)
   1096 {
   1097 
   1098 	mutex_enter(&wq->wq_lock);
   1099 	flush_workqueue_locked(wq);
   1100 	mutex_exit(&wq->wq_lock);
   1101 }
   1102 
   1103 void
   1104 flush_work(struct work_struct *work)
   1105 {
   1106 	struct workqueue_struct *wq;
   1107 
   1108 	/* If there's no workqueue, nothing to flush.  */
   1109 	if ((wq = work->work_queue) == NULL)
   1110 		return;
   1111 
   1112 	flush_workqueue(wq);
   1113 }
   1114 
   1115 void
   1116 flush_delayed_work(struct delayed_work *dw)
   1117 {
   1118 	struct workqueue_struct *wq;
   1119 
   1120 	/* If there's no workqueue, nothing to flush.  */
   1121 	if ((wq = dw->work.work_queue) == NULL)
   1122 		return;
   1123 
   1124 	mutex_enter(&wq->wq_lock);
   1125 	if (__predict_true(dw->work.work_queue == wq)) {
   1126 		switch (dw->dw_state) {
   1127 		case DELAYED_WORK_IDLE:
   1128 			/*
   1129 			 * It has a workqueue assigned and the callout
   1130 			 * is idle, so it must be in progress or on the
   1131 			 * queue.  In that case, wait for it to
   1132 			 * complete.  Waiting for the whole queue to
   1133 			 * flush is overkill, but doesn't hurt.
   1134 			 */
   1135 			flush_workqueue_locked(wq);
   1136 			break;
   1137 		case DELAYED_WORK_SCHEDULED:
   1138 		case DELAYED_WORK_RESCHEDULED:
   1139 		case DELAYED_WORK_CANCELLED:
   1140 			/*
   1141 			 * The callout is still scheduled to run.
   1142 			 * Notify it that we are cancelling, and try to
   1143 			 * stop the callout before it runs.
   1144 			 *
   1145 			 * If we do stop the callout, we are now
   1146 			 * responsible for dissociating the work from
   1147 			 * the queue.
   1148 			 *
   1149 			 * Otherwise, wait for it to complete and
   1150 			 * dissociate itself -- it will not put itself
   1151 			 * on the workqueue once it is cancelled.
   1152 			 */
   1153 			dw->dw_state = DELAYED_WORK_CANCELLED;
   1154 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
   1155 				cancel_delayed_work_done(wq, dw);
   1156 		default:
   1157 			panic("invalid delayed work state: %d",
   1158 			    dw->dw_state);
   1159 		}
   1160 	}
   1161 	mutex_exit(&wq->wq_lock);
   1162 }
   1163