linux_work.c revision 1.57 1 /* $NetBSD: linux_work.c,v 1.57 2021/12/19 12:11:28 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Taylor R. Campbell.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.57 2021/12/19 12:11:28 riastradh Exp $");
34
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #ifndef _MODULE
45 #include <sys/once.h>
46 #endif
47 #include <sys/queue.h>
48 #include <sys/sdt.h>
49
50 #include <linux/workqueue.h>
51
52 TAILQ_HEAD(work_head, work_struct);
53 TAILQ_HEAD(dwork_head, delayed_work);
54
55 struct workqueue_struct {
56 kmutex_t wq_lock;
57 kcondvar_t wq_cv;
58 struct dwork_head wq_delayed; /* delayed work scheduled */
59 struct work_head wq_rcu; /* RCU work scheduled */
60 struct work_head wq_queue; /* work to run */
61 struct work_head wq_dqueue; /* delayed work to run now */
62 struct work_struct *wq_current_work;
63 int wq_flags;
64 bool wq_dying;
65 uint64_t wq_gen;
66 struct lwp *wq_lwp;
67 const char *wq_name;
68 };
69
70 static void __dead linux_workqueue_thread(void *);
71 static void linux_workqueue_timeout(void *);
72 static bool work_claimed(struct work_struct *,
73 struct workqueue_struct *);
74 static struct workqueue_struct *
75 work_queue(struct work_struct *);
76 static bool acquire_work(struct work_struct *,
77 struct workqueue_struct *);
78 static void release_work(struct work_struct *,
79 struct workqueue_struct *);
80 static void wait_for_current_work(struct work_struct *,
81 struct workqueue_struct *);
82 static void dw_callout_init(struct workqueue_struct *,
83 struct delayed_work *);
84 static void dw_callout_destroy(struct workqueue_struct *,
85 struct delayed_work *);
86 static void cancel_delayed_work_done(struct workqueue_struct *,
87 struct delayed_work *);
88
89 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
90 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
91 SDT_PROBE_DEFINE2(sdt, linux, work, release,
92 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
93 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
94 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
95 SDT_PROBE_DEFINE2(sdt, linux, work, rcu,
96 "struct rcu_work *"/*work*/, "struct workqueue_struct *"/*wq*/);
97 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
98 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
99 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
100 "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
101 "unsigned long"/*ticks*/);
102 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
103 "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
104 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
105 "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
106 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
107 "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
108 SDT_PROBE_DEFINE2(sdt, linux, work, run,
109 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
110 SDT_PROBE_DEFINE2(sdt, linux, work, done,
111 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
112 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
113 "struct workqueue_struct *"/*wq*/);
114 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
115 "struct workqueue_struct *"/*wq*/);
116 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
117 "struct workqueue_struct *"/*wq*/);
118 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
119 "struct workqueue_struct *"/*wq*/);
120
121 static specificdata_key_t workqueue_key __read_mostly;
122
123 struct workqueue_struct *system_highpri_wq __read_mostly;
124 struct workqueue_struct *system_long_wq __read_mostly;
125 struct workqueue_struct *system_power_efficient_wq __read_mostly;
126 struct workqueue_struct *system_unbound_wq __read_mostly;
127 struct workqueue_struct *system_wq __read_mostly;
128
129 static inline uintptr_t
130 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
131 {
132
133 return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
134 }
135
136 /*
137 * linux_workqueue_init()
138 *
139 * Initialize the Linux workqueue subsystem. Return 0 on success,
140 * NetBSD error on failure.
141 */
142 static int
143 linux_workqueue_init0(void)
144 {
145 int error;
146
147 error = lwp_specific_key_create(&workqueue_key, NULL);
148 if (error)
149 goto out;
150
151 system_highpri_wq = alloc_ordered_workqueue("lnxhipwq", 0);
152 if (system_highpri_wq == NULL) {
153 error = ENOMEM;
154 goto out;
155 }
156
157 system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
158 if (system_long_wq == NULL) {
159 error = ENOMEM;
160 goto out;
161 }
162
163 system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
164 if (system_power_efficient_wq == NULL) {
165 error = ENOMEM;
166 goto out;
167 }
168
169 system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
170 if (system_unbound_wq == NULL) {
171 error = ENOMEM;
172 goto out;
173 }
174
175 system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
176 if (system_wq == NULL) {
177 error = ENOMEM;
178 goto out;
179 }
180
181 /* Success! */
182 error = 0;
183
184 out: if (error) {
185 if (system_highpri_wq)
186 destroy_workqueue(system_highpri_wq);
187 if (system_long_wq)
188 destroy_workqueue(system_long_wq);
189 if (system_power_efficient_wq)
190 destroy_workqueue(system_power_efficient_wq);
191 if (system_unbound_wq)
192 destroy_workqueue(system_unbound_wq);
193 if (system_wq)
194 destroy_workqueue(system_wq);
195 if (workqueue_key)
196 lwp_specific_key_delete(workqueue_key);
197 }
198
199 return error;
200 }
201
202 /*
203 * linux_workqueue_fini()
204 *
205 * Destroy the Linux workqueue subsystem. Never fails.
206 */
207 static void
208 linux_workqueue_fini0(void)
209 {
210
211 destroy_workqueue(system_power_efficient_wq);
212 destroy_workqueue(system_long_wq);
213 destroy_workqueue(system_wq);
214 lwp_specific_key_delete(workqueue_key);
215 }
216
217 #ifndef _MODULE
218 static ONCE_DECL(linux_workqueue_init_once);
219 #endif
220
221 int
222 linux_workqueue_init(void)
223 {
224 #ifdef _MODULE
225 return linux_workqueue_init0();
226 #else
227 return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
228 #endif
229 }
230
231 void
232 linux_workqueue_fini(void)
233 {
234 #ifdef _MODULE
235 return linux_workqueue_fini0();
236 #else
237 return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
238 #endif
239 }
240
241 /*
243 * Workqueues
244 */
245
246 /*
247 * alloc_workqueue(name, flags, max_active)
248 *
249 * Create a workqueue of the given name. max_active is the
250 * maximum number of work items in flight, or 0 for the default.
251 * Return NULL on failure, pointer to struct workqueue_struct
252 * object on success.
253 */
254 struct workqueue_struct *
255 alloc_workqueue(const char *name, int flags, unsigned max_active)
256 {
257 struct workqueue_struct *wq;
258 int error;
259
260 KASSERT(max_active == 0 || max_active == 1);
261
262 wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
263
264 mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
265 cv_init(&wq->wq_cv, name);
266 TAILQ_INIT(&wq->wq_delayed);
267 TAILQ_INIT(&wq->wq_rcu);
268 TAILQ_INIT(&wq->wq_queue);
269 TAILQ_INIT(&wq->wq_dqueue);
270 wq->wq_current_work = NULL;
271 wq->wq_flags = 0;
272 wq->wq_dying = false;
273 wq->wq_gen = 0;
274 wq->wq_lwp = NULL;
275 wq->wq_name = name;
276
277 error = kthread_create(PRI_NONE,
278 KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
279 &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
280 if (error)
281 goto fail0;
282
283 return wq;
284
285 fail0: KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
286 KASSERT(TAILQ_EMPTY(&wq->wq_queue));
287 KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
288 KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
289 cv_destroy(&wq->wq_cv);
290 mutex_destroy(&wq->wq_lock);
291 kmem_free(wq, sizeof(*wq));
292 return NULL;
293 }
294
295 /*
296 * alloc_ordered_workqueue(name, flags)
297 *
298 * Same as alloc_workqueue(name, flags, 1).
299 */
300 struct workqueue_struct *
301 alloc_ordered_workqueue(const char *name, int flags)
302 {
303
304 return alloc_workqueue(name, flags, 1);
305 }
306
307 /*
308 * destroy_workqueue(wq)
309 *
310 * Destroy a workqueue created with wq. Cancel any pending
311 * delayed work. Wait for all queued work to complete.
312 *
313 * May sleep.
314 */
315 void
316 destroy_workqueue(struct workqueue_struct *wq)
317 {
318
319 /*
320 * Cancel all delayed work. We do this first because any
321 * delayed work that that has already timed out, which we can't
322 * cancel, may have queued new work.
323 */
324 mutex_enter(&wq->wq_lock);
325 while (!TAILQ_EMPTY(&wq->wq_delayed)) {
326 struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
327
328 KASSERT(work_queue(&dw->work) == wq);
329 KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
330 dw->dw_state == DELAYED_WORK_RESCHEDULED ||
331 dw->dw_state == DELAYED_WORK_CANCELLED),
332 "delayed work %p in bad state: %d",
333 dw, dw->dw_state);
334
335 /*
336 * Mark it cancelled and try to stop the callout before
337 * it starts.
338 *
339 * If it's too late and the callout has already begun
340 * to execute, then it will notice that we asked to
341 * cancel it and remove itself from the queue before
342 * returning.
343 *
344 * If we stopped the callout before it started,
345 * however, then we can safely destroy the callout and
346 * dissociate it from the workqueue ourselves.
347 */
348 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
349 dw->dw_state = DELAYED_WORK_CANCELLED;
350 if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
351 cancel_delayed_work_done(wq, dw);
352 }
353 mutex_exit(&wq->wq_lock);
354
355 /* Wait for all scheduled RCU work to complete. */
356 mutex_enter(&wq->wq_lock);
357 while (!TAILQ_EMPTY(&wq->wq_rcu))
358 cv_wait(&wq->wq_cv, &wq->wq_lock);
359 mutex_exit(&wq->wq_lock);
360
361 /*
362 * At this point, no new work can be put on the queue.
363 */
364
365 /* Tell the thread to exit. */
366 mutex_enter(&wq->wq_lock);
367 wq->wq_dying = true;
368 cv_broadcast(&wq->wq_cv);
369 mutex_exit(&wq->wq_lock);
370
371 /* Wait for it to exit. */
372 (void)kthread_join(wq->wq_lwp);
373
374 KASSERT(wq->wq_dying);
375 KASSERT(wq->wq_flags == 0);
376 KASSERT(wq->wq_current_work == NULL);
377 KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
378 KASSERT(TAILQ_EMPTY(&wq->wq_queue));
379 KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
380 KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
381 cv_destroy(&wq->wq_cv);
382 mutex_destroy(&wq->wq_lock);
383
384 kmem_free(wq, sizeof(*wq));
385 }
386
387 /*
389 * Work thread and callout
390 */
391
392 /*
393 * linux_workqueue_thread(cookie)
394 *
395 * Main function for a workqueue's worker thread. Waits until
396 * there is work queued, grabs a batch of work off the queue,
397 * executes it all, bumps the generation number, and repeats,
398 * until dying.
399 */
400 static void __dead
401 linux_workqueue_thread(void *cookie)
402 {
403 struct workqueue_struct *const wq = cookie;
404 struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
405 struct work_struct marker, *work;
406 unsigned i;
407
408 lwp_setspecific(workqueue_key, wq);
409
410 mutex_enter(&wq->wq_lock);
411 for (;;) {
412 /*
413 * Wait until there's activity. If there's no work and
414 * we're dying, stop here.
415 */
416 if (TAILQ_EMPTY(&wq->wq_queue) &&
417 TAILQ_EMPTY(&wq->wq_dqueue)) {
418 if (wq->wq_dying)
419 break;
420 cv_wait(&wq->wq_cv, &wq->wq_lock);
421 continue;
422 }
423
424 /*
425 * Start a batch of work. Use a marker to delimit when
426 * the batch ends so we can advance the generation
427 * after the batch.
428 */
429 SDT_PROBE1(sdt, linux, work, batch__start, wq);
430 for (i = 0; i < 2; i++) {
431 if (TAILQ_EMPTY(q[i]))
432 continue;
433 TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
434 while ((work = TAILQ_FIRST(q[i])) != &marker) {
435 void (*func)(struct work_struct *);
436
437 KASSERT(work_queue(work) == wq);
438 KASSERT(work_claimed(work, wq));
439 KASSERTMSG((q[i] != &wq->wq_dqueue ||
440 container_of(work, struct delayed_work,
441 work)->dw_state ==
442 DELAYED_WORK_IDLE),
443 "delayed work %p queued and scheduled",
444 work);
445
446 TAILQ_REMOVE(q[i], work, work_entry);
447 KASSERT(wq->wq_current_work == NULL);
448 wq->wq_current_work = work;
449 func = work->func;
450 release_work(work, wq);
451 /* Can't dereference work after this point. */
452
453 mutex_exit(&wq->wq_lock);
454 SDT_PROBE2(sdt, linux, work, run, work, wq);
455 (*func)(work);
456 SDT_PROBE2(sdt, linux, work, done, work, wq);
457 mutex_enter(&wq->wq_lock);
458
459 KASSERT(wq->wq_current_work == work);
460 wq->wq_current_work = NULL;
461 cv_broadcast(&wq->wq_cv);
462 }
463 TAILQ_REMOVE(q[i], &marker, work_entry);
464 }
465
466 /* Notify cancel that we've completed a batch of work. */
467 wq->wq_gen++;
468 cv_broadcast(&wq->wq_cv);
469 SDT_PROBE1(sdt, linux, work, batch__done, wq);
470 }
471 mutex_exit(&wq->wq_lock);
472
473 kthread_exit(0);
474 }
475
476 /*
477 * linux_workqueue_timeout(cookie)
478 *
479 * Delayed work timeout callback.
480 *
481 * - If scheduled, queue it.
482 * - If rescheduled, callout_schedule ourselves again.
483 * - If cancelled, destroy the callout and release the work from
484 * the workqueue.
485 */
486 static void
487 linux_workqueue_timeout(void *cookie)
488 {
489 struct delayed_work *const dw = cookie;
490 struct workqueue_struct *const wq = work_queue(&dw->work);
491
492 KASSERTMSG(wq != NULL,
493 "delayed work %p state %d resched %d",
494 dw, dw->dw_state, dw->dw_resched);
495
496 SDT_PROBE2(sdt, linux, work, timer, dw, wq);
497
498 mutex_enter(&wq->wq_lock);
499 KASSERT(work_queue(&dw->work) == wq);
500 switch (dw->dw_state) {
501 case DELAYED_WORK_IDLE:
502 panic("delayed work callout uninitialized: %p", dw);
503 case DELAYED_WORK_SCHEDULED:
504 dw_callout_destroy(wq, dw);
505 TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
506 cv_broadcast(&wq->wq_cv);
507 SDT_PROBE2(sdt, linux, work, queue, &dw->work, wq);
508 break;
509 case DELAYED_WORK_RESCHEDULED:
510 KASSERT(dw->dw_resched >= 0);
511 callout_schedule(&dw->dw_callout, dw->dw_resched);
512 dw->dw_state = DELAYED_WORK_SCHEDULED;
513 dw->dw_resched = -1;
514 break;
515 case DELAYED_WORK_CANCELLED:
516 cancel_delayed_work_done(wq, dw);
517 /* Can't dereference dw after this point. */
518 goto out;
519 default:
520 panic("delayed work callout in bad state: %p", dw);
521 }
522 KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
523 dw->dw_state == DELAYED_WORK_SCHEDULED);
524 out: mutex_exit(&wq->wq_lock);
525 }
526
527 /*
528 * current_work()
529 *
530 * If in a workqueue worker thread, return the work it is
531 * currently executing. Otherwise return NULL.
532 */
533 struct work_struct *
534 current_work(void)
535 {
536 struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
537
538 /* If we're not a workqueue thread, then there's no work. */
539 if (wq == NULL)
540 return NULL;
541
542 /*
543 * Otherwise, this should be possible only while work is in
544 * progress. Return the current work item.
545 */
546 KASSERT(wq->wq_current_work != NULL);
547 return wq->wq_current_work;
548 }
549
550 /*
552 * Work
553 */
554
555 /*
556 * INIT_WORK(work, fn)
557 *
558 * Initialize work for use with a workqueue to call fn in a worker
559 * thread. There is no corresponding destruction operation.
560 */
561 void
562 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
563 {
564
565 work->work_owner = 0;
566 work->func = fn;
567 }
568
569 /*
570 * work_claimed(work, wq)
571 *
572 * True if work is currently claimed by a workqueue, meaning it is
573 * either on the queue or scheduled in a callout. The workqueue
574 * must be wq, and caller must hold wq's lock.
575 */
576 static bool
577 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
578 {
579
580 KASSERT(work_queue(work) == wq);
581 KASSERT(mutex_owned(&wq->wq_lock));
582
583 return atomic_load_relaxed(&work->work_owner) & 1;
584 }
585
586 /*
587 * work_pending(work)
588 *
589 * True if work is currently claimed by any workqueue, scheduled
590 * to run on that workqueue.
591 */
592 bool
593 work_pending(const struct work_struct *work)
594 {
595
596 return atomic_load_relaxed(&work->work_owner) & 1;
597 }
598
599 /*
600 * work_queue(work)
601 *
602 * Return the last queue that work was queued on, or NULL if it
603 * was never queued.
604 */
605 static struct workqueue_struct *
606 work_queue(struct work_struct *work)
607 {
608
609 return (struct workqueue_struct *)
610 (atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
611 }
612
613 /*
614 * acquire_work(work, wq)
615 *
616 * Try to claim work for wq. If work is already claimed, it must
617 * be claimed by wq; return false. If work is not already
618 * claimed, claim it, issue a memory barrier to match any prior
619 * release_work, and return true.
620 *
621 * Caller must hold wq's lock.
622 */
623 static bool
624 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
625 {
626 uintptr_t owner0, owner;
627
628 KASSERT(mutex_owned(&wq->wq_lock));
629 KASSERT(((uintptr_t)wq & 1) == 0);
630
631 owner = (uintptr_t)wq | 1;
632 do {
633 owner0 = atomic_load_relaxed(&work->work_owner);
634 if (owner0 & 1) {
635 KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
636 return false;
637 }
638 KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
639 } while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
640 owner0);
641
642 KASSERT(work_queue(work) == wq);
643 membar_enter();
644 SDT_PROBE2(sdt, linux, work, acquire, work, wq);
645 return true;
646 }
647
648 /*
649 * release_work(work, wq)
650 *
651 * Issue a memory barrier to match any subsequent acquire_work and
652 * dissociate work from wq.
653 *
654 * Caller must hold wq's lock and work must be associated with wq.
655 */
656 static void
657 release_work(struct work_struct *work, struct workqueue_struct *wq)
658 {
659
660 KASSERT(work_queue(work) == wq);
661 KASSERT(mutex_owned(&wq->wq_lock));
662
663 SDT_PROBE2(sdt, linux, work, release, work, wq);
664 membar_exit();
665
666 /*
667 * Non-interlocked r/m/w is safe here because nobody else can
668 * write to this while the claimed bit is set and the workqueue
669 * lock is held.
670 */
671 atomic_store_relaxed(&work->work_owner,
672 atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
673 }
674
675 /*
676 * schedule_work(work)
677 *
678 * If work is not already queued on system_wq, queue it to be run
679 * by system_wq's worker thread when it next can. True if it was
680 * newly queued, false if it was already queued. If the work was
681 * already running, queue it to run again.
682 *
683 * Caller must ensure work is not queued to run on a different
684 * workqueue.
685 */
686 bool
687 schedule_work(struct work_struct *work)
688 {
689
690 return queue_work(system_wq, work);
691 }
692
693 /*
694 * queue_work(wq, work)
695 *
696 * If work is not already queued on wq, queue it to be run by wq's
697 * worker thread when it next can. True if it was newly queued,
698 * false if it was already queued. If the work was already
699 * running, queue it to run again.
700 *
701 * Caller must ensure work is not queued to run on a different
702 * workqueue.
703 */
704 bool
705 queue_work(struct workqueue_struct *wq, struct work_struct *work)
706 {
707 bool newly_queued;
708
709 KASSERT(wq != NULL);
710
711 mutex_enter(&wq->wq_lock);
712 if (__predict_true(acquire_work(work, wq))) {
713 /*
714 * It wasn't on any workqueue at all. Put it on this
715 * one, and signal the worker thread that there is work
716 * to do.
717 */
718 TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
719 cv_broadcast(&wq->wq_cv);
720 SDT_PROBE2(sdt, linux, work, queue, work, wq);
721 newly_queued = true;
722 } else {
723 /*
724 * It was already on this workqueue. Nothing to do
725 * since it is already queued.
726 */
727 newly_queued = false;
728 }
729 mutex_exit(&wq->wq_lock);
730
731 return newly_queued;
732 }
733
734 /*
735 * cancel_work(work)
736 *
737 * If work was queued, remove it from the queue and return true.
738 * If work was not queued, return false. Work may still be
739 * running when this returns.
740 */
741 bool
742 cancel_work(struct work_struct *work)
743 {
744 struct workqueue_struct *wq;
745 bool cancelled_p = false;
746
747 /* If there's no workqueue, nothing to cancel. */
748 if ((wq = work_queue(work)) == NULL)
749 goto out;
750
751 mutex_enter(&wq->wq_lock);
752 if (__predict_false(work_queue(work) != wq)) {
753 /*
754 * It has finished execution or been cancelled by
755 * another thread, and has been moved off the
756 * workqueue, so it's too to cancel.
757 */
758 cancelled_p = false;
759 } else {
760 /* Check whether it's on the queue. */
761 if (work_claimed(work, wq)) {
762 /*
763 * It is still on the queue. Take it off the
764 * queue and report successful cancellation.
765 */
766 TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
767 SDT_PROBE2(sdt, linux, work, cancel, work, wq);
768 release_work(work, wq);
769 /* Can't dereference work after this point. */
770 cancelled_p = true;
771 } else {
772 /* Not on the queue. Couldn't cancel it. */
773 cancelled_p = false;
774 }
775 }
776 mutex_exit(&wq->wq_lock);
777
778 out: return cancelled_p;
779 }
780
781 /*
782 * cancel_work_sync(work)
783 *
784 * If work was queued, remove it from the queue and return true.
785 * If work was not queued, return false. Either way, if work is
786 * currently running, wait for it to complete.
787 *
788 * May sleep.
789 */
790 bool
791 cancel_work_sync(struct work_struct *work)
792 {
793 struct workqueue_struct *wq;
794 bool cancelled_p = false;
795
796 /* If there's no workqueue, nothing to cancel. */
797 if ((wq = work_queue(work)) == NULL)
798 goto out;
799
800 mutex_enter(&wq->wq_lock);
801 if (__predict_false(work_queue(work) != wq)) {
802 /*
803 * It has finished execution or been cancelled by
804 * another thread, and has been moved off the
805 * workqueue, so it's too late to cancel.
806 */
807 cancelled_p = false;
808 } else {
809 /* Check whether it's on the queue. */
810 if (work_claimed(work, wq)) {
811 /*
812 * It is still on the queue. Take it off the
813 * queue and report successful cancellation.
814 */
815 TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
816 SDT_PROBE2(sdt, linux, work, cancel, work, wq);
817 release_work(work, wq);
818 /* Can't dereference work after this point. */
819 cancelled_p = true;
820 } else {
821 /* Not on the queue. Couldn't cancel it. */
822 cancelled_p = false;
823 }
824 /* If it's still running, wait for it to complete. */
825 if (wq->wq_current_work == work)
826 wait_for_current_work(work, wq);
827 }
828 mutex_exit(&wq->wq_lock);
829
830 out: return cancelled_p;
831 }
832
833 /*
834 * wait_for_current_work(work, wq)
835 *
836 * wq must be currently executing work. Wait for it to finish.
837 *
838 * Does not dereference work.
839 */
840 static void
841 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
842 {
843 uint64_t gen;
844
845 KASSERT(mutex_owned(&wq->wq_lock));
846 KASSERT(wq->wq_current_work == work);
847
848 /* Wait only one generation in case it gets requeued quickly. */
849 SDT_PROBE2(sdt, linux, work, wait__start, work, wq);
850 gen = wq->wq_gen;
851 do {
852 cv_wait(&wq->wq_cv, &wq->wq_lock);
853 } while (wq->wq_current_work == work && wq->wq_gen == gen);
854 SDT_PROBE2(sdt, linux, work, wait__done, work, wq);
855 }
856
857 /*
859 * Delayed work
860 */
861
862 /*
863 * INIT_DELAYED_WORK(dw, fn)
864 *
865 * Initialize dw for use with a workqueue to call fn in a worker
866 * thread after a delay. There is no corresponding destruction
867 * operation.
868 */
869 void
870 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
871 {
872
873 INIT_WORK(&dw->work, fn);
874 dw->dw_state = DELAYED_WORK_IDLE;
875 dw->dw_resched = -1;
876
877 /*
878 * Defer callout_init until we are going to schedule the
879 * callout, which can then callout_destroy it, because
880 * otherwise since there's no DESTROY_DELAYED_WORK or anything
881 * we have no opportunity to call callout_destroy.
882 */
883 }
884
885 /*
886 * schedule_delayed_work(dw, ticks)
887 *
888 * If it is not currently scheduled, schedule dw to run after
889 * ticks on system_wq. If currently executing and not already
890 * rescheduled, reschedule it. True if it was newly scheduled,
891 * false if it was already scheduled.
892 *
893 * If ticks == 0, queue it to run as soon as the worker can,
894 * without waiting for the next callout tick to run.
895 */
896 bool
897 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
898 {
899
900 return queue_delayed_work(system_wq, dw, ticks);
901 }
902
903 /*
904 * dw_callout_init(wq, dw)
905 *
906 * Initialize the callout of dw and transition to
907 * DELAYED_WORK_SCHEDULED. Caller must use callout_schedule.
908 */
909 static void
910 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
911 {
912
913 KASSERT(mutex_owned(&wq->wq_lock));
914 KASSERT(work_queue(&dw->work) == wq);
915 KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
916
917 callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
918 callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
919 TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
920 dw->dw_state = DELAYED_WORK_SCHEDULED;
921 }
922
923 /*
924 * dw_callout_destroy(wq, dw)
925 *
926 * Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
927 */
928 static void
929 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
930 {
931
932 KASSERT(mutex_owned(&wq->wq_lock));
933 KASSERT(work_queue(&dw->work) == wq);
934 KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
935 dw->dw_state == DELAYED_WORK_RESCHEDULED ||
936 dw->dw_state == DELAYED_WORK_CANCELLED);
937
938 TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
939 callout_destroy(&dw->dw_callout);
940 dw->dw_resched = -1;
941 dw->dw_state = DELAYED_WORK_IDLE;
942 }
943
944 /*
945 * cancel_delayed_work_done(wq, dw)
946 *
947 * Complete cancellation of a delayed work: transition from
948 * DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
949 * workqueue. Caller must not dereference dw after this returns.
950 */
951 static void
952 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
953 {
954
955 KASSERT(mutex_owned(&wq->wq_lock));
956 KASSERT(work_queue(&dw->work) == wq);
957 KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
958
959 dw_callout_destroy(wq, dw);
960 release_work(&dw->work, wq);
961 /* Can't dereference dw after this point. */
962 }
963
964 /*
965 * queue_delayed_work(wq, dw, ticks)
966 *
967 * If it is not currently scheduled, schedule dw to run after
968 * ticks on wq. If currently queued, remove it from the queue
969 * first.
970 *
971 * If ticks == 0, queue it to run as soon as the worker can,
972 * without waiting for the next callout tick to run.
973 */
974 bool
975 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
976 unsigned long ticks)
977 {
978 bool newly_queued;
979
980 mutex_enter(&wq->wq_lock);
981 if (__predict_true(acquire_work(&dw->work, wq))) {
982 /*
983 * It wasn't on any workqueue at all. Schedule it to
984 * run on this one.
985 */
986 KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
987 if (ticks == 0) {
988 TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
989 work_entry);
990 cv_broadcast(&wq->wq_cv);
991 SDT_PROBE2(sdt, linux, work, queue, &dw->work, wq);
992 } else {
993 /*
994 * Initialize a callout and schedule to run
995 * after a delay.
996 */
997 dw_callout_init(wq, dw);
998 callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
999 SDT_PROBE3(sdt, linux, work, schedule, dw, wq, ticks);
1000 }
1001 newly_queued = true;
1002 } else {
1003 /* It was already on this workqueue. */
1004 switch (dw->dw_state) {
1005 case DELAYED_WORK_IDLE:
1006 case DELAYED_WORK_SCHEDULED:
1007 case DELAYED_WORK_RESCHEDULED:
1008 /* On the queue or already scheduled. Leave it. */
1009 newly_queued = false;
1010 break;
1011 case DELAYED_WORK_CANCELLED:
1012 /*
1013 * Scheduled and the callout began, but it was
1014 * cancelled. Reschedule it.
1015 */
1016 if (ticks == 0) {
1017 dw->dw_state = DELAYED_WORK_SCHEDULED;
1018 SDT_PROBE2(sdt, linux, work, queue,
1019 &dw->work, wq);
1020 } else {
1021 dw->dw_state = DELAYED_WORK_RESCHEDULED;
1022 dw->dw_resched = MIN(INT_MAX, ticks);
1023 SDT_PROBE3(sdt, linux, work, schedule,
1024 dw, wq, ticks);
1025 }
1026 newly_queued = true;
1027 break;
1028 default:
1029 panic("invalid delayed work state: %d",
1030 dw->dw_state);
1031 }
1032 }
1033 mutex_exit(&wq->wq_lock);
1034
1035 return newly_queued;
1036 }
1037
1038 /*
1039 * mod_delayed_work(wq, dw, ticks)
1040 *
1041 * Schedule dw to run after ticks. If scheduled or queued,
1042 * reschedule. If ticks == 0, run without delay.
1043 *
1044 * True if it modified the timer of an already scheduled work,
1045 * false if it newly scheduled the work.
1046 */
1047 bool
1048 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
1049 unsigned long ticks)
1050 {
1051 bool timer_modified;
1052
1053 mutex_enter(&wq->wq_lock);
1054 if (acquire_work(&dw->work, wq)) {
1055 /*
1056 * It wasn't on any workqueue at all. Schedule it to
1057 * run on this one.
1058 */
1059 KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
1060 if (ticks == 0) {
1061 /*
1062 * Run immediately: put it on the queue and
1063 * signal the worker thread.
1064 */
1065 TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1066 work_entry);
1067 cv_broadcast(&wq->wq_cv);
1068 SDT_PROBE2(sdt, linux, work, queue, &dw->work, wq);
1069 } else {
1070 /*
1071 * Initialize a callout and schedule to run
1072 * after a delay.
1073 */
1074 dw_callout_init(wq, dw);
1075 callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
1076 SDT_PROBE3(sdt, linux, work, schedule, dw, wq, ticks);
1077 }
1078 timer_modified = false;
1079 } else {
1080 /* It was already on this workqueue. */
1081 switch (dw->dw_state) {
1082 case DELAYED_WORK_IDLE:
1083 /* On the queue. */
1084 if (ticks == 0) {
1085 /* Leave it be. */
1086 SDT_PROBE2(sdt, linux, work, cancel,
1087 &dw->work, wq);
1088 SDT_PROBE2(sdt, linux, work, queue,
1089 &dw->work, wq);
1090 } else {
1091 /* Remove from the queue and schedule. */
1092 TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1093 work_entry);
1094 dw_callout_init(wq, dw);
1095 callout_schedule(&dw->dw_callout,
1096 MIN(INT_MAX, ticks));
1097 SDT_PROBE2(sdt, linux, work, cancel,
1098 &dw->work, wq);
1099 SDT_PROBE3(sdt, linux, work, schedule,
1100 dw, wq, ticks);
1101 }
1102 timer_modified = true;
1103 break;
1104 case DELAYED_WORK_SCHEDULED:
1105 /*
1106 * It is scheduled to run after a delay. Try
1107 * to stop it and reschedule it; if we can't,
1108 * either reschedule it or cancel it to put it
1109 * on the queue, and inform the callout.
1110 */
1111 if (callout_stop(&dw->dw_callout)) {
1112 /* Can't stop, callout has begun. */
1113 if (ticks == 0) {
1114 /*
1115 * We don't actually need to do
1116 * anything. The callout will
1117 * queue it as soon as it gets
1118 * the lock.
1119 */
1120 SDT_PROBE2(sdt, linux, work, cancel,
1121 &dw->work, wq);
1122 SDT_PROBE2(sdt, linux, work, queue,
1123 &dw->work, wq);
1124 } else {
1125 /* Ask the callout to reschedule. */
1126 dw->dw_state = DELAYED_WORK_RESCHEDULED;
1127 dw->dw_resched = MIN(INT_MAX, ticks);
1128 SDT_PROBE2(sdt, linux, work, cancel,
1129 &dw->work, wq);
1130 SDT_PROBE3(sdt, linux, work, schedule,
1131 dw, wq, ticks);
1132 }
1133 } else {
1134 /* We stopped the callout before it began. */
1135 if (ticks == 0) {
1136 /*
1137 * Run immediately: destroy the
1138 * callout, put it on the
1139 * queue, and signal the worker
1140 * thread.
1141 */
1142 dw_callout_destroy(wq, dw);
1143 TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1144 &dw->work, work_entry);
1145 cv_broadcast(&wq->wq_cv);
1146 SDT_PROBE2(sdt, linux, work, cancel,
1147 &dw->work, wq);
1148 SDT_PROBE2(sdt, linux, work, queue,
1149 &dw->work, wq);
1150 } else {
1151 /*
1152 * Reschedule the callout. No
1153 * state change.
1154 */
1155 callout_schedule(&dw->dw_callout,
1156 MIN(INT_MAX, ticks));
1157 SDT_PROBE2(sdt, linux, work, cancel,
1158 &dw->work, wq);
1159 SDT_PROBE3(sdt, linux, work, schedule,
1160 dw, wq, ticks);
1161 }
1162 }
1163 timer_modified = true;
1164 break;
1165 case DELAYED_WORK_RESCHEDULED:
1166 /*
1167 * Someone rescheduled it after the callout
1168 * started but before the poor thing even had a
1169 * chance to acquire the lock.
1170 */
1171 if (ticks == 0) {
1172 /*
1173 * We can just switch back to
1174 * DELAYED_WORK_SCHEDULED so that the
1175 * callout will queue the work as soon
1176 * as it gets the lock.
1177 */
1178 dw->dw_state = DELAYED_WORK_SCHEDULED;
1179 dw->dw_resched = -1;
1180 SDT_PROBE2(sdt, linux, work, cancel,
1181 &dw->work, wq);
1182 SDT_PROBE2(sdt, linux, work, queue,
1183 &dw->work, wq);
1184 } else {
1185 /* Change the rescheduled time. */
1186 dw->dw_resched = ticks;
1187 SDT_PROBE2(sdt, linux, work, cancel,
1188 &dw->work, wq);
1189 SDT_PROBE3(sdt, linux, work, schedule,
1190 dw, wq, ticks);
1191 }
1192 timer_modified = true;
1193 break;
1194 case DELAYED_WORK_CANCELLED:
1195 /*
1196 * Someone cancelled it after the callout
1197 * started but before the poor thing even had a
1198 * chance to acquire the lock.
1199 */
1200 if (ticks == 0) {
1201 /*
1202 * We can just switch back to
1203 * DELAYED_WORK_SCHEDULED so that the
1204 * callout will queue the work as soon
1205 * as it gets the lock.
1206 */
1207 dw->dw_state = DELAYED_WORK_SCHEDULED;
1208 SDT_PROBE2(sdt, linux, work, queue,
1209 &dw->work, wq);
1210 } else {
1211 /* Ask it to reschedule. */
1212 dw->dw_state = DELAYED_WORK_RESCHEDULED;
1213 dw->dw_resched = MIN(INT_MAX, ticks);
1214 SDT_PROBE3(sdt, linux, work, schedule,
1215 dw, wq, ticks);
1216 }
1217 timer_modified = false;
1218 break;
1219 default:
1220 panic("invalid delayed work state: %d", dw->dw_state);
1221 }
1222 }
1223 mutex_exit(&wq->wq_lock);
1224
1225 return timer_modified;
1226 }
1227
1228 /*
1229 * cancel_delayed_work(dw)
1230 *
1231 * If work was scheduled or queued, remove it from the schedule or
1232 * queue and return true. If work was not scheduled or queued,
1233 * return false. Note that work may already be running; if it
1234 * hasn't been rescheduled or requeued, then cancel_delayed_work
1235 * will return false, and either way, cancel_delayed_work will NOT
1236 * wait for the work to complete.
1237 */
1238 bool
1239 cancel_delayed_work(struct delayed_work *dw)
1240 {
1241 struct workqueue_struct *wq;
1242 bool cancelled_p;
1243
1244 /* If there's no workqueue, nothing to cancel. */
1245 if ((wq = work_queue(&dw->work)) == NULL)
1246 return false;
1247
1248 mutex_enter(&wq->wq_lock);
1249 if (__predict_false(work_queue(&dw->work) != wq)) {
1250 cancelled_p = false;
1251 } else {
1252 switch (dw->dw_state) {
1253 case DELAYED_WORK_IDLE:
1254 /*
1255 * It is either on the queue or already running
1256 * or both.
1257 */
1258 if (work_claimed(&dw->work, wq)) {
1259 /* On the queue. Remove and release. */
1260 TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1261 work_entry);
1262 SDT_PROBE2(sdt, linux, work, cancel,
1263 &dw->work, wq);
1264 release_work(&dw->work, wq);
1265 /* Can't dereference dw after this point. */
1266 cancelled_p = true;
1267 } else {
1268 /* Not on the queue, so didn't cancel. */
1269 cancelled_p = false;
1270 }
1271 break;
1272 case DELAYED_WORK_SCHEDULED:
1273 /*
1274 * If it is scheduled, mark it cancelled and
1275 * try to stop the callout before it starts.
1276 *
1277 * If it's too late and the callout has already
1278 * begun to execute, tough.
1279 *
1280 * If we stopped the callout before it started,
1281 * however, then destroy the callout and
1282 * dissociate it from the workqueue ourselves.
1283 */
1284 dw->dw_state = DELAYED_WORK_CANCELLED;
1285 cancelled_p = true;
1286 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
1287 if (!callout_stop(&dw->dw_callout))
1288 cancel_delayed_work_done(wq, dw);
1289 break;
1290 case DELAYED_WORK_RESCHEDULED:
1291 /*
1292 * If it is being rescheduled, the callout has
1293 * already fired. We must ask it to cancel.
1294 */
1295 dw->dw_state = DELAYED_WORK_CANCELLED;
1296 dw->dw_resched = -1;
1297 cancelled_p = true;
1298 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
1299 break;
1300 case DELAYED_WORK_CANCELLED:
1301 /*
1302 * If it is being cancelled, the callout has
1303 * already fired. There is nothing more for us
1304 * to do. Someone else claims credit for
1305 * cancelling it.
1306 */
1307 cancelled_p = false;
1308 break;
1309 default:
1310 panic("invalid delayed work state: %d",
1311 dw->dw_state);
1312 }
1313 }
1314 mutex_exit(&wq->wq_lock);
1315
1316 return cancelled_p;
1317 }
1318
1319 /*
1320 * cancel_delayed_work_sync(dw)
1321 *
1322 * If work was scheduled or queued, remove it from the schedule or
1323 * queue and return true. If work was not scheduled or queued,
1324 * return false. Note that work may already be running; if it
1325 * hasn't been rescheduled or requeued, then cancel_delayed_work
1326 * will return false; either way, wait for it to complete.
1327 */
1328 bool
1329 cancel_delayed_work_sync(struct delayed_work *dw)
1330 {
1331 struct workqueue_struct *wq;
1332 bool cancelled_p;
1333
1334 /* If there's no workqueue, nothing to cancel. */
1335 if ((wq = work_queue(&dw->work)) == NULL)
1336 return false;
1337
1338 mutex_enter(&wq->wq_lock);
1339 if (__predict_false(work_queue(&dw->work) != wq)) {
1340 cancelled_p = false;
1341 } else {
1342 switch (dw->dw_state) {
1343 case DELAYED_WORK_IDLE:
1344 /*
1345 * It is either on the queue or already running
1346 * or both.
1347 */
1348 if (work_claimed(&dw->work, wq)) {
1349 /* On the queue. Remove and release. */
1350 TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1351 work_entry);
1352 SDT_PROBE2(sdt, linux, work, cancel,
1353 &dw->work, wq);
1354 release_work(&dw->work, wq);
1355 /* Can't dereference dw after this point. */
1356 cancelled_p = true;
1357 } else {
1358 /* Not on the queue, so didn't cancel. */
1359 cancelled_p = false;
1360 }
1361 /* If it's still running, wait for it to complete. */
1362 if (wq->wq_current_work == &dw->work)
1363 wait_for_current_work(&dw->work, wq);
1364 break;
1365 case DELAYED_WORK_SCHEDULED:
1366 /*
1367 * If it is scheduled, mark it cancelled and
1368 * try to stop the callout before it starts.
1369 *
1370 * If it's too late and the callout has already
1371 * begun to execute, we must wait for it to
1372 * complete. But we got in soon enough to ask
1373 * the callout not to run, so we successfully
1374 * cancelled it in that case.
1375 *
1376 * If we stopped the callout before it started,
1377 * then we must destroy the callout and
1378 * dissociate it from the workqueue ourselves.
1379 */
1380 dw->dw_state = DELAYED_WORK_CANCELLED;
1381 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
1382 if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1383 cancel_delayed_work_done(wq, dw);
1384 cancelled_p = true;
1385 break;
1386 case DELAYED_WORK_RESCHEDULED:
1387 /*
1388 * If it is being rescheduled, the callout has
1389 * already fired. We must ask it to cancel and
1390 * wait for it to complete.
1391 */
1392 dw->dw_state = DELAYED_WORK_CANCELLED;
1393 dw->dw_resched = -1;
1394 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
1395 (void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1396 cancelled_p = true;
1397 break;
1398 case DELAYED_WORK_CANCELLED:
1399 /*
1400 * If it is being cancelled, the callout has
1401 * already fired. We need only wait for it to
1402 * complete. Someone else, however, claims
1403 * credit for cancelling it.
1404 */
1405 (void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1406 cancelled_p = false;
1407 break;
1408 default:
1409 panic("invalid delayed work state: %d",
1410 dw->dw_state);
1411 }
1412 }
1413 mutex_exit(&wq->wq_lock);
1414
1415 return cancelled_p;
1416 }
1417
1418 /*
1420 * Flush
1421 */
1422
1423 /*
1424 * flush_scheduled_work()
1425 *
1426 * Wait for all work queued on system_wq to complete. This does
1427 * not include delayed work.
1428 */
1429 void
1430 flush_scheduled_work(void)
1431 {
1432
1433 flush_workqueue(system_wq);
1434 }
1435
1436 struct flush_work {
1437 kmutex_t fw_lock;
1438 kcondvar_t fw_cv;
1439 struct work_struct fw_work;
1440 bool fw_done;
1441 };
1442
1443 static void
1444 flush_work_cb(struct work_struct *work)
1445 {
1446 struct flush_work *fw = container_of(work, struct flush_work, fw_work);
1447
1448 mutex_enter(&fw->fw_lock);
1449 fw->fw_done = true;
1450 cv_broadcast(&fw->fw_cv);
1451 mutex_exit(&fw->fw_lock);
1452 }
1453
1454 /*
1455 * flush_workqueue(wq)
1456 *
1457 * Wait for all work queued on wq to complete. This does not
1458 * include delayed work.
1459 */
1460 void
1461 flush_workqueue(struct workqueue_struct *wq)
1462 {
1463 struct flush_work fw;
1464
1465 mutex_init(&fw.fw_lock, MUTEX_DEFAULT, IPL_VM);
1466 cv_init(&fw.fw_cv, "lxwqflsh");
1467 INIT_WORK(&fw.fw_work, &flush_work_cb);
1468 fw.fw_done = false;
1469
1470 SDT_PROBE1(sdt, linux, work, flush__start, wq);
1471 queue_work(wq, &fw.fw_work);
1472
1473 mutex_enter(&fw.fw_lock);
1474 while (!fw.fw_done)
1475 cv_wait(&fw.fw_cv, &fw.fw_lock);
1476 mutex_exit(&fw.fw_lock);
1477 SDT_PROBE1(sdt, linux, work, flush__done, wq);
1478
1479 KASSERT(fw.fw_done);
1480 /* no DESTROY_WORK */
1481 cv_destroy(&fw.fw_cv);
1482 mutex_destroy(&fw.fw_lock);
1483 }
1484
1485 /*
1486 * drain_workqueue(wq)
1487 *
1488 * Repeatedly flush wq until there is no more work.
1489 */
1490 void
1491 drain_workqueue(struct workqueue_struct *wq)
1492 {
1493 unsigned ntries = 0;
1494 bool done;
1495
1496 do {
1497 if (ntries++ == 10 || (ntries % 100) == 0)
1498 printf("linux workqueue %s"
1499 ": still clogged after %u flushes",
1500 wq->wq_name, ntries);
1501 flush_workqueue(wq);
1502 mutex_enter(&wq->wq_lock);
1503 done = wq->wq_current_work == NULL;
1504 done &= TAILQ_EMPTY(&wq->wq_queue);
1505 done &= TAILQ_EMPTY(&wq->wq_dqueue);
1506 mutex_exit(&wq->wq_lock);
1507 } while (!done);
1508 }
1509
1510 /*
1511 * flush_work(work)
1512 *
1513 * If work is queued or currently executing, wait for it to
1514 * complete.
1515 *
1516 * Return true if we waited to flush it, false if it was already
1517 * idle.
1518 */
1519 bool
1520 flush_work(struct work_struct *work)
1521 {
1522 struct workqueue_struct *wq;
1523
1524 /* If there's no workqueue, nothing to flush. */
1525 if ((wq = work_queue(work)) == NULL)
1526 return false;
1527
1528 flush_workqueue(wq);
1529 return true;
1530 }
1531
1532 /*
1533 * flush_delayed_work(dw)
1534 *
1535 * If dw is scheduled to run after a delay, queue it immediately
1536 * instead. Then, if dw is queued or currently executing, wait
1537 * for it to complete.
1538 */
1539 bool
1540 flush_delayed_work(struct delayed_work *dw)
1541 {
1542 struct workqueue_struct *wq;
1543 bool waited = false;
1544
1545 /* If there's no workqueue, nothing to flush. */
1546 if ((wq = work_queue(&dw->work)) == NULL)
1547 return false;
1548
1549 mutex_enter(&wq->wq_lock);
1550 if (__predict_false(work_queue(&dw->work) != wq)) {
1551 /*
1552 * Moved off the queue already (and possibly to another
1553 * queue, though that would be ill-advised), so it must
1554 * have completed, and we have nothing more to do.
1555 */
1556 waited = false;
1557 } else {
1558 switch (dw->dw_state) {
1559 case DELAYED_WORK_IDLE:
1560 /*
1561 * It has a workqueue assigned and the callout
1562 * is idle, so it must be in progress or on the
1563 * queue. In that case, we'll wait for it to
1564 * complete.
1565 */
1566 break;
1567 case DELAYED_WORK_SCHEDULED:
1568 case DELAYED_WORK_RESCHEDULED:
1569 case DELAYED_WORK_CANCELLED:
1570 /*
1571 * The callout is scheduled, and may have even
1572 * started. Mark it as scheduled so that if
1573 * the callout has fired it will queue the work
1574 * itself. Try to stop the callout -- if we
1575 * can, queue the work now; if we can't, wait
1576 * for the callout to complete, which entails
1577 * queueing it.
1578 */
1579 dw->dw_state = DELAYED_WORK_SCHEDULED;
1580 if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1581 /*
1582 * We stopped it before it ran. No
1583 * state change in the interim is
1584 * possible. Destroy the callout and
1585 * queue it ourselves.
1586 */
1587 KASSERT(dw->dw_state ==
1588 DELAYED_WORK_SCHEDULED);
1589 dw_callout_destroy(wq, dw);
1590 TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1591 work_entry);
1592 cv_broadcast(&wq->wq_cv);
1593 SDT_PROBE2(sdt, linux, work, queue,
1594 &dw->work, wq);
1595 }
1596 break;
1597 default:
1598 panic("invalid delayed work state: %d", dw->dw_state);
1599 }
1600 /*
1601 * Waiting for the whole queue to flush is overkill,
1602 * but doesn't hurt.
1603 */
1604 mutex_exit(&wq->wq_lock);
1605 flush_workqueue(wq);
1606 mutex_enter(&wq->wq_lock);
1607 waited = true;
1608 }
1609 mutex_exit(&wq->wq_lock);
1610
1611 return waited;
1612 }
1613
1614 /*
1615 * delayed_work_pending(dw)
1616 *
1617 * True if dw is currently scheduled to execute, false if not.
1618 */
1619 bool
1620 delayed_work_pending(const struct delayed_work *dw)
1621 {
1622
1623 return work_pending(&dw->work);
1624 }
1625
1626 /*
1627 * INIT_RCU_WORK(rw, fn)
1628 *
1629 * Initialize rw for use with a workqueue to call fn in a worker
1630 * thread after an RCU grace period. There is no corresponding
1631 * destruction operation.
1632 */
1633 void
1634 INIT_RCU_WORK(struct rcu_work *rw, void (*fn)(struct work_struct *))
1635 {
1636
1637 INIT_WORK(&rw->work, fn);
1638 }
1639
1640 static void
1641 queue_rcu_work_cb(struct rcu_head *r)
1642 {
1643 struct rcu_work *rw = container_of(r, struct rcu_work, rw_rcu);
1644 struct workqueue_struct *wq = work_queue(&rw->work);
1645
1646 mutex_enter(&wq->wq_lock);
1647 KASSERT(work_pending(&rw->work));
1648 KASSERT(work_queue(&rw->work) == wq);
1649 destroy_rcu_head(&rw->rw_rcu);
1650 TAILQ_REMOVE(&wq->wq_rcu, &rw->work, work_entry);
1651 TAILQ_INSERT_TAIL(&wq->wq_queue, &rw->work, work_entry);
1652 cv_broadcast(&wq->wq_cv);
1653 SDT_PROBE2(sdt, linux, work, queue, &rw->work, wq);
1654 mutex_exit(&wq->wq_lock);
1655 }
1656
1657 /*
1658 * queue_rcu_work(wq, rw)
1659 *
1660 * Schedule rw to run on wq after an RCU grace period.
1661 */
1662 void
1663 queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rw)
1664 {
1665
1666 mutex_enter(&wq->wq_lock);
1667 if (acquire_work(&rw->work, wq)) {
1668 init_rcu_head(&rw->rw_rcu);
1669 SDT_PROBE2(sdt, linux, work, rcu, rw, wq);
1670 TAILQ_INSERT_TAIL(&wq->wq_rcu, &rw->work, work_entry);
1671 call_rcu(&rw->rw_rcu, &queue_rcu_work_cb);
1672 }
1673 mutex_exit(&wq->wq_lock);
1674 }
1675