1 /* $NetBSD: sys_aio.c,v 1.52 2025/10/10 17:08:01 kre Exp $ */ 2 3 /* 4 * Copyright (c) 2025 The NetBSD Foundation, Inc. 5 * Copyright (c) 2007 Mindaugas Rasiukevicius <rmind at NetBSD org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * NetBSD asynchronous I/O service pool implementation 32 * 33 * Design overview 34 * 35 * Thread pool architecture: 36 * Each process owns an aiosp (service pool) with work threads (aiost). 37 * Workes are reused via freelist/active lists to avoid churn. 38 * Workers sleep on service_cv until a job is assigned. 39 * On process teardown, outstanding working is quiesced and threads are 40 # destroyed. 41 * 42 * Job distribution: 43 * Jobs are appended to aiosp->jobs which are then distributed to a worker 44 * thread. 45 * Regular files: Jobs are grouped together by file handle to allow for future 46 * optimisaton. 47 * Non-regular files: No grouping. Each jobs is handled directly by a discrete 48 * worker thread. 49 * Only regular files are candidates for non-blocking operation, however the 50 * non-blocking path is not implemented yet. Everything currently falls back to 51 * blocking I/O 52 * Distribution is triggered by aiosp_distribute_jobs 53 * 54 * Job tracking: 55 * A hash table (by userspace aiocb pointer) maps aiocb -> kernel job. 56 * This gives O(1)ish lookup for aio_error/aio_return/aio_suspend. 57 * Resubmission of the same aiocb updates the mapping. To allow userspace to 58 * reuse aiocb storage liberally. 59 * 60 * File group management: 61 * RB tree (aiost_file_tree) maintains active file groups. 62 * Groups are created ondemand when regular file jobs are distributed. 63 * Groups are destroyed when all jobs for that fp complete. 64 * Enables future enhancements like dynamic job appending during processing. 65 * 66 * Implementation notes 67 * io_read/io_write currently use fallback implementations 68 */ 69 70 #include <sys/cdefs.h> 71 __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.52 2025/10/10 17:08:01 kre Exp $"); 72 73 #ifdef _KERNEL_OPT 74 #include "opt_ddb.h" 75 #endif 76 77 #include <sys/param.h> 78 #include <sys/types.h> 79 #include <sys/bitops.h> 80 #include <sys/hash.h> 81 #include <sys/uio.h> 82 83 #include <sys/atomic.h> 84 #include <sys/buf.h> 85 #include <sys/condvar.h> 86 #include <sys/file.h> 87 #include <sys/filedesc.h> 88 #include <sys/kernel.h> 89 #include <sys/kmem.h> 90 #include <sys/lwp.h> 91 #include <sys/module.h> 92 #include <sys/mutex.h> 93 #include <sys/kthread.h> 94 #include <sys/pool.h> 95 #include <sys/proc.h> 96 #include <sys/queue.h> 97 #include <sys/sdt.h> 98 #include <sys/signal.h> 99 #include <sys/signalvar.h> 100 #include <sys/syscall.h> 101 #include <sys/syscallargs.h> 102 #include <sys/syscallvar.h> 103 #include <sys/sysctl.h> 104 #include <sys/systm.h> 105 #include <sys/types.h> 106 #include <sys/vnode.h> 107 108 #include <uvm/uvm_extern.h> 109 110 MODULE(MODULE_CLASS_MISC, aio, NULL); 111 112 /* 113 * System-wide limits and counter of AIO operations. 114 */ 115 u_int aio_listio_max = AIO_LISTIO_MAX; 116 static u_int aio_max = AIO_MAX; 117 static u_int aio_jobs_count; 118 119 static struct pool aio_job_pool; 120 static struct pool aio_lio_pool; 121 static void * aio_ehook; 122 123 static int aio_enqueue_job(int, void *, struct lio_req *); 124 static void aio_exit(proc_t *, void *); 125 126 static int sysctl_aio_listio_max(SYSCTLFN_PROTO); 127 static int sysctl_aio_max(SYSCTLFN_PROTO); 128 129 /* Service pool functions */ 130 static int aiost_create(struct aiosp *, struct aiost **); 131 static int aiost_terminate(struct aiost *); 132 static void aiost_entry(void *); 133 static void aiost_sigsend(struct proc *, struct sigevent *); 134 static int aiosp_worker_extract(struct aiosp *, struct aiost **); 135 136 static int io_write(struct aio_job *); 137 static int io_read(struct aio_job *); 138 static int io_sync(struct aio_job *); 139 static int uio_construct(struct aio_job *, struct file **, 140 struct iovec *, struct uio *); 141 static int io_write_fallback(struct aio_job *); 142 static int io_read_fallback(struct aio_job *); 143 144 static void aio_job_fini(struct aio_job *); 145 static void aio_job_mark_complete(struct aio_job *); 146 static void aio_file_hold(struct file *); 147 static void aio_file_release(struct file *); 148 149 static void aiocbp_destroy(struct aiosp *); 150 static int aiocbp_init(struct aiosp *, u_int); 151 static int aiocbp_insert(struct aiosp *, struct aiocbp *); 152 static int aiocbp_lookup_job(struct aiosp *, const void *, 153 struct aio_job **); 154 static int aiocbp_remove_job(struct aiosp *, const void *, 155 struct aio_job **, struct aiocbp **); 156 157 static const struct syscall_package aio_syscalls[] = { 158 { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, 159 { SYS_aio_error, 0, (sy_call_t *)sys_aio_error }, 160 { SYS_aio_fsync, 0, (sy_call_t *)sys_aio_fsync }, 161 { SYS_aio_read, 0, (sy_call_t *)sys_aio_read }, 162 { SYS_aio_return, 0, (sy_call_t *)sys_aio_return }, 163 { SYS___aio_suspend50, 0, (sy_call_t *)sys___aio_suspend50 }, 164 { SYS_aio_write, 0, (sy_call_t *)sys_aio_write }, 165 { SYS_lio_listio, 0, (sy_call_t *)sys_lio_listio }, 166 { 0, 0, NULL }, 167 }; 168 169 /* 170 * Order RB with respect to fp 171 */ 172 static int 173 aiost_file_group_cmp(struct aiost_file_group *a, struct aiost_file_group *b) 174 { 175 if (a == NULL || b == NULL) { 176 return (a == b) ? 0 : (a ? 1 : -1); 177 } 178 179 uintptr_t ap = (uintptr_t)a->fp; 180 uintptr_t bp = (uintptr_t)b->fp; 181 182 return (ap < bp) ? -1 : (ap > bp) ? 1 : 0; 183 } 184 185 RB_HEAD(aiost_file_tree, aiost_file_group); 186 RB_PROTOTYPE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); 187 RB_GENERATE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); 188 189 /* 190 * Tear down all AIO state. 191 */ 192 static int 193 aio_fini(bool interface) 194 { 195 int error; 196 proc_t *p; 197 198 if (interface) { 199 /* Stop syscall activity. */ 200 error = syscall_disestablish(NULL, aio_syscalls); 201 if (error != 0) 202 return error; 203 /* Abort if any processes are using AIO. */ 204 mutex_enter(&proc_lock); 205 PROCLIST_FOREACH(p, &allproc) { 206 if (p->p_aio != NULL) 207 break; 208 } 209 mutex_exit(&proc_lock); 210 if (p != NULL) { 211 error = syscall_establish(NULL, aio_syscalls); 212 KASSERT(error == 0); 213 return SET_ERROR(EBUSY); 214 } 215 } 216 217 KASSERT(aio_jobs_count == 0); 218 exithook_disestablish(aio_ehook); 219 pool_destroy(&aio_job_pool); 220 pool_destroy(&aio_lio_pool); 221 return 0; 222 } 223 224 /* 225 * Initialize global AIO state. 226 */ 227 static int 228 aio_init(void) 229 { 230 int error; 231 232 pool_init(&aio_job_pool, sizeof(struct aio_job), 0, 0, 0, 233 "aio_jobs_pool", &pool_allocator_nointr, IPL_NONE); 234 pool_init(&aio_lio_pool, sizeof(struct lio_req), 0, 0, 0, 235 "aio_lio_pool", &pool_allocator_nointr, IPL_NONE); 236 aio_ehook = exithook_establish(aio_exit, NULL); 237 238 error = syscall_establish(NULL, aio_syscalls); 239 if (error != 0) { 240 aio_fini(false); 241 } 242 return error; 243 } 244 245 /* 246 * Module interface. 247 */ 248 static int 249 aio_modcmd(modcmd_t cmd, void *arg) 250 { 251 switch (cmd) { 252 case MODULE_CMD_INIT: 253 return aio_init(); 254 case MODULE_CMD_FINI: 255 return aio_fini(true); 256 default: 257 return SET_ERROR(ENOTTY); 258 } 259 } 260 261 /* 262 * Initialize Asynchronous I/O data structures for the process. 263 */ 264 static int 265 aio_procinit(struct proc *p) 266 { 267 struct aioproc *aio; 268 int error; 269 270 /* Allocate and initialize AIO structure */ 271 aio = kmem_zalloc(sizeof(*aio), KM_SLEEP); 272 273 /* Initialize the service pool */ 274 error = aiosp_initialize(&aio->aiosp); 275 if (error) { 276 kmem_free(aio, sizeof(*aio)); 277 return error; 278 } 279 280 error = aiocbp_init(&aio->aiosp, 256); 281 if (error) { 282 aiosp_destroy(&aio->aiosp, NULL); 283 kmem_free(aio, sizeof(*aio)); 284 return error; 285 } 286 287 /* Initialize queue and their synchronization structures */ 288 mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); 289 290 /* Recheck if we are really first */ 291 mutex_enter(p->p_lock); 292 if (p->p_aio) { 293 mutex_exit(p->p_lock); 294 aio_exit(p, aio); 295 return 0; 296 } 297 p->p_aio = aio; 298 mutex_exit(p->p_lock); 299 300 return 0; 301 } 302 303 /* 304 * Exit of Asynchronous I/O subsystem of process. 305 */ 306 static void 307 aio_exit(struct proc *p, void *cookie) 308 { 309 struct aioproc *aio; 310 311 if (cookie != NULL) { 312 aio = cookie; 313 } else if ((aio = p->p_aio) == NULL) { 314 return; 315 } 316 317 aiocbp_destroy(&aio->aiosp); 318 aiosp_destroy(&aio->aiosp, NULL); 319 mutex_destroy(&aio->aio_mtx); 320 kmem_free(aio, sizeof(*aio)); 321 } 322 323 /* 324 * Destroy job structure 325 */ 326 static void 327 aio_job_fini(struct aio_job *job) 328 { 329 mutex_enter(&job->mtx); 330 aiowaitgrouplk_fini(&job->lk); 331 mutex_exit(&job->mtx); 332 mutex_destroy(&job->mtx); 333 } 334 335 /* 336 * Mark job as complete 337 */ 338 static void 339 aio_job_mark_complete(struct aio_job *job) 340 { 341 mutex_enter(&job->mtx); 342 job->completed = true; 343 aio_file_release(job->fp); 344 job->fp = NULL; 345 346 aiowaitgrouplk_flush(&job->lk); 347 mutex_exit(&job->mtx); 348 349 aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); 350 } 351 352 /* 353 * Acquire a file reference for async ops 354 */ 355 static void 356 aio_file_hold(struct file *fp) 357 { 358 mutex_enter(&fp->f_lock); 359 fp->f_count++; 360 mutex_exit(&fp->f_lock); 361 } 362 363 /* 364 * Release a file reference for async ops 365 */ 366 static void 367 aio_file_release(struct file *fp) 368 { 369 mutex_enter(&fp->f_lock); 370 fp->f_count--; 371 if (!fp->f_count) { 372 mutex_exit(&fp->f_lock); 373 closef(fp); 374 return; 375 } 376 mutex_exit(&fp->f_lock); 377 } 378 379 /* 380 * Release a job back to the pool 381 */ 382 static inline void 383 aio_job_release(struct aio_job *job) 384 { 385 if (job->fp) { 386 aio_file_release(job->fp); 387 job->fp = NULL; 388 } 389 390 aio_job_fini(job); 391 pool_put(&aio_job_pool, job); 392 atomic_dec_uint(&aio_jobs_count); 393 } 394 395 /* 396 * Cancel a job pending on aiosp->jobs 397 */ 398 static inline void 399 aio_job_cancel(struct aiosp *aiosp, struct aio_job *job) 400 { 401 mutex_enter(&job->mtx); 402 TAILQ_REMOVE(&aiosp->jobs, job, list); 403 aiosp->jobs_pending--; 404 job->on_queue = false; 405 job->aiocbp._errno = ECANCELED; 406 mutex_exit(&job->mtx); 407 } 408 409 /* 410 * Remove file group from tree locked 411 */ 412 static inline void 413 aiosp_fg_teardown_locked(struct aiosp *sp, struct aiost_file_group *fg) 414 { 415 if (fg == NULL) { 416 return; 417 } 418 419 RB_REMOVE(aiost_file_tree, sp->fg_root, fg); 420 mutex_destroy(&fg->mtx); 421 kmem_free(fg, sizeof(*fg)); 422 } 423 424 /* 425 * Remove file group from tree 426 */ 427 static inline void 428 aiosp_fg_teardown(struct aiosp *sp, struct aiost_file_group *fg) 429 { 430 if (fg == NULL) { 431 return; 432 } 433 434 mutex_enter(&sp->mtx); 435 aiosp_fg_teardown_locked(sp, fg); 436 mutex_exit(&sp->mtx); 437 } 438 439 /* 440 * Group jobs by file descriptor and distribute to service threads. 441 * Regular files are coalesced per-fp, others get individual threads. 442 * Must be called with jobs queued in sp->jobs 443 */ 444 int 445 aiosp_distribute_jobs(struct aiosp *sp) 446 { 447 struct aio_job *job, *tmp; 448 struct file *fp; 449 int error = 0; 450 451 mutex_enter(&sp->mtx); 452 if (!sp->jobs_pending) { 453 mutex_exit(&sp->mtx); 454 return 0; 455 } 456 457 TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { 458 fp = job->fp; 459 KASSERT(fp); 460 461 struct aiost_file_group *fg = NULL; 462 struct aiost *aiost = NULL; 463 464 if (fp->f_vnode != NULL && fp->f_vnode->v_type == VREG) { 465 struct aiost_file_group key = { .fp = fp }; 466 fg = RB_FIND(aiost_file_tree, sp->fg_root, &key); 467 468 if (fg == NULL) { 469 fg = kmem_zalloc(sizeof(*fg), KM_SLEEP); 470 fg->fp = fp; 471 fg->queue_size = 0; 472 mutex_init(&fg->mtx, MUTEX_DEFAULT, IPL_NONE); 473 TAILQ_INIT(&fg->queue); 474 475 error = aiosp_worker_extract(sp, &aiost); 476 if (error) { 477 kmem_free(fg, sizeof(*fg)); 478 mutex_exit(&sp->mtx); 479 return error; 480 } 481 RB_INSERT(aiost_file_tree, sp->fg_root, fg); 482 fg->aiost = aiost; 483 484 aiost->fg = fg; 485 aiost->job = NULL; 486 } else { 487 aiost = fg->aiost; 488 } 489 } else { 490 error = aiosp_worker_extract(sp, &aiost); 491 if (error) { 492 mutex_exit(&sp->mtx); 493 return error; 494 } 495 aiost->fg = NULL; 496 aiost->job = job; 497 } 498 499 TAILQ_REMOVE(&sp->jobs, job, list); 500 sp->jobs_pending--; 501 job->on_queue = false; 502 503 if (fg) { 504 mutex_enter(&fg->mtx); 505 TAILQ_INSERT_TAIL(&fg->queue, job, list); 506 fg->queue_size++; 507 mutex_exit(&fg->mtx); 508 } 509 510 mutex_enter(&aiost->mtx); 511 aiost->freelist = false; 512 aiost->state = AIOST_STATE_OPERATION; 513 mutex_exit(&aiost->mtx); 514 cv_signal(&aiost->service_cv); 515 } 516 517 mutex_exit(&sp->mtx); 518 return error; 519 } 520 521 /* 522 * Wait for specified AIO operations to complete 523 * Create a waitgroup to monitor the specified aiocb list. 524 * Returns when timeout expires or completion criteria met 525 * 526 * AIOSP_SUSPEND_ANY return when any job completes 527 * AIOSP_SUSPEND_ALL return when all jobs complete 528 */ 529 int 530 aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, 531 struct timespec *ts, int flags) 532 { 533 struct aio_job *job; 534 struct aiowaitgroup *wg; 535 int error = 0, timo = 0; 536 size_t joined = 0; 537 538 if (ts) { 539 timo = tstohz(ts); 540 if (timo <= 0) { 541 error = SET_ERROR(EAGAIN); 542 return error; 543 } 544 } 545 546 wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); 547 aiowaitgroup_init(wg); 548 549 for (int i = 0; i < nent; i++) { 550 if (aiocbp_list[i] == NULL) { 551 continue; 552 } 553 554 error = aiocbp_lookup_job(aiosp, aiocbp_list[i], &job); 555 if (error) { 556 goto done; 557 } 558 if (job == NULL) { 559 continue; 560 } 561 562 if (job->completed) { 563 mutex_enter(&wg->mtx); 564 wg->completed++; 565 wg->total++; 566 mutex_exit(&wg->mtx); 567 mutex_exit(&job->mtx); 568 continue; 569 } 570 571 aiowaitgroup_join(wg, &job->lk); 572 joined++; 573 mutex_exit(&job->mtx); 574 } 575 576 if (!joined) { 577 goto done; 578 } 579 580 mutex_enter(&wg->mtx); 581 const size_t target = (flags & AIOSP_SUSPEND_ANY) ? 1 : wg->total; 582 while (wg->completed < target) { 583 error = aiowaitgroup_wait(wg, timo); 584 if (error) { 585 break; 586 } 587 } 588 mutex_exit(&wg->mtx); 589 done: 590 mutex_enter(&wg->mtx); 591 wg->active = false; 592 if (--wg->refcnt == 0) { 593 mutex_exit(&wg->mtx); 594 aiowaitgroup_fini(wg); 595 } else { 596 mutex_exit(&wg->mtx); 597 } 598 return error; 599 } 600 601 int 602 aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, 603 struct timespec *ts) 604 { 605 struct proc *p = l->l_proc; 606 struct aioproc *aio = p->p_aio; 607 struct aiosp *aiosp = &aio->aiosp; 608 609 return aiosp_suspend(aiosp, aiocbp_list, nent, ts, AIOSP_SUSPEND_ANY); 610 } 611 612 /* 613 * Initializes a servicing pool. 614 */ 615 int 616 aiosp_initialize(struct aiosp *sp) 617 { 618 mutex_init(&sp->mtx, MUTEX_DEFAULT, IPL_NONE); 619 TAILQ_INIT(&sp->freelist); 620 TAILQ_INIT(&sp->active); 621 TAILQ_INIT(&sp->jobs); 622 sp->fg_root = kmem_zalloc(sizeof(*sp->fg_root), KM_SLEEP); 623 RB_INIT(sp->fg_root); 624 625 return 0; 626 } 627 628 /* 629 * Extract an available worker thread from pool or create new one 630 */ 631 static int 632 aiosp_worker_extract(struct aiosp *sp, struct aiost **aiost) 633 { 634 int error; 635 636 if (sp->nthreads_free == 0) { 637 error = aiost_create(sp, aiost); 638 if (error) { 639 return error; 640 } 641 } else { 642 *aiost = TAILQ_LAST(&sp->freelist, aiost_list); 643 } 644 645 TAILQ_REMOVE(&sp->freelist, *aiost, list); 646 sp->nthreads_free--; 647 TAILQ_INSERT_TAIL(&sp->active, *aiost, list); 648 sp->nthreads_active++; 649 650 return 0; 651 } 652 653 /* 654 * Each process keeps track of all the service threads instantiated to service 655 * an asynchronous operation by the process. When a process is terminated we 656 * must also terminate all of its active and pending asynchronous operation. 657 */ 658 int 659 aiosp_destroy(struct aiosp *sp, int *cn) 660 { 661 struct aiost *st; 662 int error, cnt = 0; 663 664 for (;;) { 665 /* 666 * peek one worker under sp->mtx 667 */ 668 mutex_enter(&sp->mtx); 669 st = TAILQ_FIRST(&sp->freelist); 670 if (st == NULL) { 671 st = TAILQ_FIRST(&sp->active); 672 } 673 mutex_exit(&sp->mtx); 674 675 if (st == NULL) 676 break; 677 678 error = aiost_terminate(st); 679 if (error) { 680 return error; 681 } 682 st->lwp = NULL; 683 684 kmem_free(st, sizeof(*st)); 685 cnt++; 686 } 687 688 if (cn) { 689 *cn = cnt; 690 } 691 692 mutex_destroy(&sp->mtx); 693 return 0; 694 } 695 696 /* 697 * Enqueue a job for processing by the process's servicing pool 698 */ 699 int 700 aiosp_enqueue_job(struct aiosp *aiosp, struct aio_job *job) 701 { 702 mutex_enter(&aiosp->mtx); 703 704 TAILQ_INSERT_TAIL(&aiosp->jobs, job, list); 705 aiosp->jobs_pending++; 706 job->on_queue = true; 707 708 mutex_exit(&aiosp->mtx); 709 710 return 0; 711 } 712 713 /* 714 * Create and initialise a new servicing thread and append it to the freelist. 715 */ 716 static int 717 aiost_create(struct aiosp *sp, struct aiost **ret) 718 { 719 struct proc *p = curlwp->l_proc; 720 struct aiost *st; 721 722 st = kmem_zalloc(sizeof(*st), KM_SLEEP); 723 724 mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); 725 cv_init(&st->service_cv, "aioservice"); 726 727 st->job = NULL; 728 st->state = AIOST_STATE_NONE; 729 st->aiosp = sp; 730 st->freelist = true; 731 732 TAILQ_INSERT_TAIL(&sp->freelist, st, list); 733 sp->nthreads_free++; 734 sp->nthreads_total++; 735 736 int error = kthread_create(PRI_NONE, KTHREAD_MUSTJOIN | KTHREAD_TS, 737 NULL, aiost_entry, st, &st->lwp, "aio_%d_%zu", p->p_pid, 738 sp->nthreads_total); 739 if (error) { 740 return error; 741 } 742 743 if (ret) { 744 *ret = st; 745 } 746 747 return 0; 748 } 749 750 /* 751 * Process single job without coalescing. 752 */ 753 static void 754 aiost_process_singleton(struct aio_job *job) 755 { 756 if ((job->aio_op & AIO_READ) == AIO_READ) { 757 io_read(job); 758 } else if ((job->aio_op & AIO_WRITE) == AIO_WRITE) { 759 io_write(job); 760 } else if ((job->aio_op & AIO_SYNC) == AIO_SYNC) { 761 io_sync(job); 762 } else { 763 panic("%s: invalid operation code {%x}n", __func__, 764 job->aio_op); 765 } 766 767 aio_job_mark_complete(job); 768 } 769 770 /* 771 * Process all jobs in a file group. 772 */ 773 static void 774 aiost_process_fg(struct aiosp *sp, struct aiost_file_group *fg) 775 { 776 for (struct aio_job *job;;) { 777 mutex_enter(&fg->mtx); 778 job = TAILQ_FIRST(&fg->queue); 779 if (job) { 780 TAILQ_REMOVE(&fg->queue, job, list); 781 fg->queue_size--; 782 } 783 mutex_exit(&fg->mtx); 784 if (job == NULL) { 785 break; 786 } 787 788 aiost_process_singleton(job); 789 } 790 } 791 792 /* 793 * Service thread entry point. Processes assigned jobs until termination. 794 * Handles both singleton jobs and file-grouped job batches. 795 */ 796 static void 797 aiost_entry(void *arg) 798 { 799 struct aiost *st = arg; 800 struct aiosp *sp = st->aiosp; 801 int error; 802 803 /* 804 * We want to handle abrupt process terminations effectively. We use 805 * st->exit to indicate that the thread must exit. When a thread is 806 * terminated aiost_terminate(st) unblocks those sleeping on 807 * st->service_cv 808 */ 809 mutex_enter(&st->mtx); 810 for(;;) { 811 for (; st->state == AIOST_STATE_NONE;) { 812 error = cv_wait_sig(&st->service_cv, &st->mtx); 813 if (error) { 814 /* 815 * Thread was interrupt. Check for pending exit 816 * or suspension 817 */ 818 mutex_exit(&st->mtx); 819 lwp_userret(curlwp); 820 mutex_enter(&st->mtx); 821 } 822 } 823 824 if (st->state == AIOST_STATE_TERMINATE) { 825 break; 826 } 827 828 if (st->state != AIOST_STATE_OPERATION) { 829 panic("aio_process: invalid aiost state {%x}\n", 830 st->state); 831 } 832 833 if (st->fg) { 834 struct aiost_file_group *fg = st->fg; 835 st->fg = NULL; 836 837 mutex_exit(&st->mtx); 838 aiost_process_fg(sp, fg); 839 mutex_enter(&st->mtx); 840 841 aiosp_fg_teardown(sp, fg); 842 } else if (st->job) { 843 struct aio_job *job = st->job; 844 845 mutex_exit(&st->mtx); 846 aiost_process_singleton(job); 847 mutex_enter(&st->mtx); 848 } else { 849 KASSERT(0); 850 } 851 852 /* 853 * check whether or not a termination was queued while handling 854 * a job 855 */ 856 if (st->state == AIOST_STATE_TERMINATE) { 857 break; 858 } 859 860 st->state = AIOST_STATE_NONE; 861 st->job = NULL; 862 st->fg = NULL; 863 864 /* 865 * Remove st from list of active service threads, append to 866 * freelist, dance around locks, then iterate loop and block on 867 * st->service_cv 868 */ 869 mutex_exit(&st->mtx); 870 mutex_enter(&sp->mtx); 871 mutex_enter(&st->mtx); 872 873 st->freelist = true; 874 875 TAILQ_REMOVE(&sp->active, st, list); 876 sp->nthreads_active--; 877 878 TAILQ_INSERT_TAIL(&sp->freelist, st, list); 879 sp->nthreads_free++; 880 881 mutex_exit(&sp->mtx); 882 } 883 884 if (st->job) { 885 aio_job_release(st->job); 886 } else if (st->fg) { 887 struct aiost_file_group *fg = st->fg; 888 st->fg = NULL; 889 890 for (struct aio_job *job;;) { 891 mutex_enter(&fg->mtx); 892 job = TAILQ_FIRST(&fg->queue); 893 if (job) { 894 TAILQ_REMOVE(&fg->queue, job, list); 895 fg->queue_size--; 896 } 897 mutex_exit(&fg->mtx); 898 if (job == NULL) { 899 break; 900 } 901 902 aio_job_release(job); 903 } 904 905 aiosp_fg_teardown(sp, fg); 906 } 907 908 909 mutex_exit(&st->mtx); 910 mutex_enter(&sp->mtx); 911 912 if (st->freelist) { 913 TAILQ_REMOVE(&sp->freelist, st, list); 914 sp->nthreads_free--; 915 } else { 916 TAILQ_REMOVE(&sp->active, st, list); 917 sp->nthreads_active--; 918 } 919 sp->nthreads_total--; 920 921 mutex_exit(&sp->mtx); 922 kthread_exit(0); 923 } 924 925 /* 926 * send AIO signal. 927 */ 928 static void 929 aiost_sigsend(struct proc *p, struct sigevent *sig) 930 { 931 ksiginfo_t ksi; 932 933 if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) 934 return; 935 936 KSI_INIT(&ksi); 937 ksi.ksi_signo = sig->sigev_signo; 938 ksi.ksi_code = SI_ASYNCIO; 939 ksi.ksi_value = sig->sigev_value; 940 941 mutex_enter(&proc_lock); 942 kpsignal(p, &ksi, NULL); 943 mutex_exit(&proc_lock); 944 } 945 946 /* 947 * Process write operation for non-blocking jobs. 948 */ 949 static int 950 io_write(struct aio_job *job) 951 { 952 return io_write_fallback(job); 953 } 954 955 /* 956 * Process read operation for non-blocking jobs. 957 */ 958 static int 959 io_read(struct aio_job *job) 960 { 961 return io_read_fallback(job); 962 } 963 964 /* 965 * Initialize UIO structure for I/O operation. 966 */ 967 static int 968 uio_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, 969 struct uio *auio) 970 { 971 struct aiocb *aiocbp = &job->aiocbp; 972 973 if (aiocbp->aio_nbytes > SSIZE_MAX) 974 return SET_ERROR(EINVAL); 975 976 *fp = job->fp; 977 if (*fp == NULL) { 978 return SET_ERROR(EBADF); 979 } 980 981 aiov->iov_base = aiocbp->aio_buf; 982 aiov->iov_len = aiocbp->aio_nbytes; 983 984 auio->uio_iov = aiov; 985 auio->uio_iovcnt = 1; 986 auio->uio_resid = aiocbp->aio_nbytes; 987 auio->uio_offset = aiocbp->aio_offset; 988 auio->uio_vmspace = job->p->p_vmspace; 989 990 return 0; 991 } 992 993 /* 994 * Perform synchronous write via file operations. 995 */ 996 static int 997 io_write_fallback(struct aio_job *job) 998 { 999 struct file *fp = NULL; 1000 struct iovec aiov; 1001 struct uio auio; 1002 struct aiocb *aiocbp = &job->aiocbp; 1003 int error; 1004 1005 error = uio_construct(job, &fp, &aiov, &auio); 1006 if (error) { 1007 goto done; 1008 } 1009 1010 /* Write using pinned file */ 1011 if ((fp->f_flag & FWRITE) == 0) { 1012 error = SET_ERROR(EBADF); 1013 goto done; 1014 } 1015 1016 auio.uio_rw = UIO_WRITE; 1017 error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, 1018 &auio, fp->f_cred, FOF_UPDATE_OFFSET); 1019 1020 /* result */ 1021 job->aiocbp.aio_nbytes -= auio.uio_resid; 1022 job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; 1023 done: 1024 job->aiocbp._errno = error; 1025 job->aiocbp._state = JOB_DONE; 1026 return 0; 1027 } 1028 1029 /* 1030 * Perform synchronous read via file operations. 1031 */ 1032 static int 1033 io_read_fallback(struct aio_job *job) 1034 { 1035 struct file *fp = NULL; 1036 struct iovec aiov; 1037 struct uio auio; 1038 struct aiocb *aiocbp = &job->aiocbp; 1039 int error; 1040 1041 error = uio_construct(job, &fp, &aiov, &auio); 1042 if (error) 1043 goto done; 1044 1045 /* Read using pinned file */ 1046 if ((fp->f_flag & FREAD) == 0) { 1047 error = SET_ERROR(EBADF); 1048 goto done; 1049 } 1050 1051 auio.uio_rw = UIO_READ; 1052 error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, 1053 &auio, fp->f_cred, FOF_UPDATE_OFFSET); 1054 1055 job->aiocbp.aio_nbytes -= auio.uio_resid; 1056 job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; 1057 done: 1058 job->aiocbp._errno = error; 1059 job->aiocbp._state = JOB_DONE; 1060 return 0; 1061 } 1062 1063 /* 1064 * Perform sync via file operations 1065 */ 1066 static int 1067 io_sync(struct aio_job *job) 1068 { 1069 struct file *fp = job->fp; 1070 int error = 0; 1071 1072 if (fp == NULL) { 1073 error = SET_ERROR(EBADF); 1074 goto done; 1075 } 1076 1077 if ((fp->f_flag & FWRITE) == 0) { 1078 error = SET_ERROR(EBADF); 1079 goto done; 1080 } 1081 1082 struct vnode *vp = fp->f_vnode; 1083 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1084 if (vp->v_type == VREG) { 1085 if (job->aio_op & AIO_DSYNC) { 1086 error = VOP_FSYNC(vp, fp->f_cred, 1087 FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); 1088 } else { 1089 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 1090 } 1091 } 1092 VOP_UNLOCK(vp); 1093 1094 job->aiocbp._retval = (error == 0) ? 0 : -1; 1095 done: 1096 job->aiocbp._errno = error; 1097 job->aiocbp._state = JOB_DONE; 1098 1099 copyout(&job->aiocbp, job->aiocb_uptr, sizeof(job->aiocbp)); 1100 1101 return 0; 1102 } 1103 1104 /* 1105 * Destroy a servicing thread. Set st->exit high such that when we unblock the 1106 * thread blocking on st->service_cv it will invoke an exit routine within 1107 * aiost_entry. 1108 */ 1109 static int 1110 aiost_terminate(struct aiost *st) 1111 { 1112 int error = 0; 1113 1114 mutex_enter(&st->mtx); 1115 1116 st->state = AIOST_STATE_TERMINATE; 1117 1118 mutex_exit(&st->mtx); 1119 1120 cv_signal(&st->service_cv); 1121 kthread_join(st->lwp); 1122 1123 cv_destroy(&st->service_cv); 1124 mutex_destroy(&st->mtx); 1125 1126 return error; 1127 } 1128 1129 /* 1130 * Ensure that the same job can not be enqueued twice. 1131 */ 1132 int 1133 aiosp_validate_conflicts(struct aiosp *aiosp, const void *uptr) 1134 { 1135 struct aiost *st; 1136 struct aio_job *job; 1137 1138 mutex_enter(&aiosp->mtx); 1139 1140 /* check active threads */ 1141 TAILQ_FOREACH(st, &aiosp->active, list) { 1142 job = st->job; 1143 if (job && st->job->aiocb_uptr == uptr) { 1144 mutex_exit(&aiosp->mtx); 1145 return EINVAL; 1146 } else if (st->fg) { 1147 mutex_enter(&st->fg->mtx); 1148 TAILQ_FOREACH(job, &st->fg->queue, list) { 1149 if (job->aiocb_uptr == uptr) { 1150 mutex_exit(&st->fg->mtx); 1151 mutex_exit(&aiosp->mtx); 1152 return EINVAL; 1153 } 1154 } 1155 mutex_exit(&st->fg->mtx); 1156 } 1157 } 1158 1159 /* no need to check freelist threads as they have no jobs */ 1160 1161 mutex_exit(&aiosp->mtx); 1162 return 0; 1163 } 1164 1165 /* 1166 * Get error status of async I/O operation 1167 */ 1168 int 1169 aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) 1170 { 1171 struct aio_job *job; 1172 int error = 0; 1173 1174 error = aiocbp_lookup_job(aiosp, uptr, &job); 1175 if (error || job == NULL) { 1176 return error; 1177 } 1178 1179 if (job->aiocbp._state == JOB_NONE) { 1180 mutex_exit(&job->mtx); 1181 return SET_ERROR(EINVAL); 1182 } 1183 1184 *retval = job->aiocbp._errno; 1185 mutex_exit(&job->mtx); 1186 1187 return error; 1188 } 1189 1190 /* 1191 * Get return value of completed async I/O operation 1192 */ 1193 int 1194 aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) 1195 { 1196 struct aiocbp *handle = NULL; 1197 struct aio_job *job = NULL; 1198 int error; 1199 1200 error = aiocbp_remove_job(aiosp, uptr, &job, &handle); 1201 if (error) { 1202 return error; 1203 } 1204 1205 if (job == NULL) { 1206 if (handle) { 1207 kmem_free(handle, sizeof(*handle)); 1208 } 1209 return SET_ERROR(ENOENT); 1210 } 1211 1212 if (job->aiocbp._state != JOB_DONE) { 1213 mutex_exit(&job->mtx); 1214 if (handle) { 1215 kmem_free(handle, sizeof(*handle)); 1216 } 1217 return SET_ERROR(EINVAL); 1218 } 1219 1220 *retval = job->aiocbp._retval; 1221 1222 if (job->fp) { 1223 aio_file_release(job->fp); 1224 job->fp = NULL; 1225 } 1226 1227 job->aiocbp._errno = 0; 1228 job->aiocbp._retval = -1; 1229 job->aiocbp._state = JOB_NONE; 1230 1231 mutex_exit(&job->mtx); 1232 if (handle) { 1233 kmem_free(handle, sizeof(*handle)); 1234 } 1235 1236 aio_job_fini(job); 1237 pool_put(&aio_job_pool, job); 1238 atomic_dec_uint(&aio_jobs_count); 1239 1240 return 0; 1241 } 1242 1243 /* 1244 * Hash function for aiocb user pointers. 1245 */ 1246 static inline u_int 1247 aiocbp_hash(const void *uptr) 1248 { 1249 return hash32_buf(&uptr, sizeof(uptr), HASH32_BUF_INIT); 1250 } 1251 1252 /* 1253 * Find aiocb entry by user pointer and locks. 1254 */ 1255 static int 1256 aiocbp_lookup_job(struct aiosp *aiosp, const void *uptr, 1257 struct aio_job **jobp) 1258 { 1259 struct aiocbp *aiocbp; 1260 struct aio_job *job = NULL; 1261 u_int hash; 1262 1263 *jobp = NULL; 1264 hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; 1265 1266 mutex_enter(&aiosp->aio_hash_mtx); 1267 TAILQ_FOREACH(aiocbp, &aiosp->aio_hash[hash], list) { 1268 if (aiocbp->uptr == uptr) { 1269 job = aiocbp->job; 1270 if (job) { 1271 mutex_enter(&job->mtx); 1272 } 1273 1274 mutex_exit(&aiosp->aio_hash_mtx); 1275 *jobp = job; 1276 return 0; 1277 } 1278 } 1279 mutex_exit(&aiosp->aio_hash_mtx); 1280 1281 *jobp = NULL; 1282 return SET_ERROR(ENOENT); 1283 } 1284 1285 /* 1286 * Detach job and return job with job->mtx held 1287 */ 1288 static int 1289 aiocbp_remove_job(struct aiosp *aiosp, const void *uptr, 1290 struct aio_job **jobp, struct aiocbp **handlep) 1291 { 1292 struct aiocbp *aiocbp; 1293 struct aio_job *job = NULL; 1294 u_int hash; 1295 1296 *jobp = NULL; 1297 if (handlep) { 1298 *handlep = NULL; 1299 } 1300 hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; 1301 1302 mutex_enter(&aiosp->aio_hash_mtx); 1303 TAILQ_FOREACH(aiocbp, &aiosp->aio_hash[hash], list) { 1304 if (aiocbp->uptr == uptr) { 1305 job = aiocbp->job; 1306 if (job) { 1307 mutex_enter(&job->mtx); 1308 } 1309 1310 TAILQ_REMOVE(&aiosp->aio_hash[hash], aiocbp, list); 1311 mutex_exit(&aiosp->aio_hash_mtx); 1312 if (handlep) { 1313 *handlep = aiocbp; 1314 } 1315 *jobp = job; 1316 1317 return 0; 1318 } 1319 } 1320 mutex_exit(&aiosp->aio_hash_mtx); 1321 1322 return SET_ERROR(ENOENT); 1323 } 1324 1325 /* 1326 * Insert aiocb entry into hash table. 1327 */ 1328 int 1329 aiocbp_insert(struct aiosp *aiosp, struct aiocbp *aiocbp) 1330 { 1331 struct aiocbp *found; 1332 const void *uptr; 1333 u_int hash; 1334 1335 uptr = aiocbp->uptr; 1336 hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; 1337 1338 mutex_enter(&aiosp->aio_hash_mtx); 1339 TAILQ_FOREACH(found, &aiosp->aio_hash[hash], list) { 1340 if (found->uptr == uptr) { 1341 found->job = aiocbp->job; 1342 mutex_exit(&aiosp->aio_hash_mtx); 1343 return EEXIST; 1344 } 1345 } 1346 1347 TAILQ_INSERT_HEAD(&aiosp->aio_hash[hash], aiocbp, list); 1348 mutex_exit(&aiosp->aio_hash_mtx); 1349 1350 return 0; 1351 } 1352 1353 /* 1354 * Initialize aiocb hash table. 1355 */ 1356 int 1357 aiocbp_init(struct aiosp *aiosp, u_int hashsize) 1358 { 1359 if (!powerof2(hashsize)) { 1360 return EINVAL; 1361 } 1362 1363 aiosp->aio_hash = kmem_zalloc(hashsize * sizeof(*aiosp->aio_hash), 1364 KM_SLEEP); 1365 1366 aiosp->aio_hash_mask = hashsize - 1; 1367 aiosp->aio_hash_size = hashsize; 1368 1369 mutex_init(&aiosp->aio_hash_mtx, MUTEX_DEFAULT, IPL_NONE); 1370 1371 for (size_t i = 0; i < hashsize; i++) { 1372 TAILQ_INIT(&aiosp->aio_hash[i]); 1373 } 1374 1375 return 0; 1376 } 1377 1378 /* 1379 * Destroy aiocb hash table and free entries. 1380 */ 1381 void 1382 aiocbp_destroy(struct aiosp *aiosp) 1383 { 1384 if (aiosp->aio_hash == NULL) { 1385 return; 1386 } 1387 1388 struct aiocbp *aiocbp; 1389 1390 mutex_enter(&aiosp->aio_hash_mtx); 1391 for (size_t i = 0; i < aiosp->aio_hash_size; i++) { 1392 struct aiocbp *tmp; 1393 TAILQ_FOREACH_SAFE(aiocbp, &aiosp->aio_hash[i], list, tmp) { 1394 TAILQ_REMOVE(&aiosp->aio_hash[i], aiocbp, list); 1395 kmem_free(aiocbp, sizeof(*aiocbp)); 1396 } 1397 } 1398 mutex_exit(&aiosp->aio_hash_mtx); 1399 1400 kmem_free(aiosp->aio_hash, 1401 aiosp->aio_hash_size * sizeof(*aiosp->aio_hash)); 1402 aiosp->aio_hash = NULL; 1403 aiosp->aio_hash_mask = 0; 1404 aiosp->aio_hash_size = 0; 1405 mutex_destroy(&aiosp->aio_hash_mtx); 1406 } 1407 1408 /* 1409 * Initialize wait group for suspend operations. 1410 */ 1411 void 1412 aiowaitgroup_init(struct aiowaitgroup *wg) 1413 { 1414 wg->completed = 0; 1415 wg->total = 0; 1416 wg->refcnt = 1; 1417 wg->active = true; 1418 cv_init(&wg->done_cv, "aiodone"); 1419 mutex_init(&wg->mtx, MUTEX_DEFAULT, IPL_NONE); 1420 } 1421 1422 /* 1423 * Clean up wait group resources. 1424 */ 1425 void 1426 aiowaitgroup_fini(struct aiowaitgroup *wg) 1427 { 1428 cv_destroy(&wg->done_cv); 1429 mutex_destroy(&wg->mtx); 1430 kmem_free(wg, sizeof(*wg)); 1431 } 1432 1433 /* 1434 * Block until wait group signals completion. 1435 */ 1436 int 1437 aiowaitgroup_wait(struct aiowaitgroup *wg, int timo) 1438 { 1439 int error; 1440 1441 error = cv_timedwait_sig(&wg->done_cv, &wg->mtx, timo); 1442 if (error) { 1443 if (error == EWOULDBLOCK) { 1444 error = SET_ERROR(EAGAIN); 1445 } 1446 return error; 1447 } 1448 1449 return 0; 1450 } 1451 1452 /* 1453 * Initialize wait group link for job tracking. 1454 */ 1455 void 1456 aiowaitgrouplk_init(struct aiowaitgrouplk *lk) 1457 { 1458 mutex_init(&lk->mtx, MUTEX_DEFAULT, IPL_NONE); 1459 lk->n = 0; 1460 lk->s = 2; 1461 lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); 1462 } 1463 1464 /* 1465 * Clean up wait group link resources. 1466 * Caller must hold job->mtx 1467 */ 1468 void 1469 aiowaitgrouplk_fini(struct aiowaitgrouplk *lk) 1470 { 1471 mutex_enter(&lk->mtx); 1472 1473 for (size_t i = 0; i < lk->n; i++) { 1474 struct aiowaitgroup *wg = lk->wgs[i]; 1475 if (!wg) { 1476 continue; 1477 } 1478 1479 lk->wgs[i] = NULL; 1480 1481 mutex_enter(&wg->mtx); 1482 if (--wg->refcnt == 0) { 1483 mutex_exit(&wg->mtx); 1484 aiowaitgroup_fini(wg); 1485 } else { 1486 mutex_exit(&wg->mtx); 1487 } 1488 } 1489 1490 if (lk->wgs) { 1491 kmem_free(lk->wgs, lk->s * sizeof(*lk->wgs)); 1492 } 1493 lk->wgs = NULL; 1494 lk->n = 0; 1495 lk->s = 0; 1496 1497 mutex_exit(&lk->mtx); 1498 mutex_destroy(&lk->mtx); 1499 } 1500 1501 /* 1502 * Notify all wait groups of job completion. 1503 */ 1504 void 1505 aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) 1506 { 1507 mutex_enter(&lk->mtx); 1508 for (int i = 0; i < lk->n; i++) { 1509 struct aiowaitgroup *wg = lk->wgs[i]; 1510 if (wg == NULL) { 1511 continue; 1512 } 1513 1514 mutex_enter(&wg->mtx); 1515 1516 if (wg->active) { 1517 wg->completed++; 1518 cv_signal(&wg->done_cv); 1519 } 1520 1521 if (--wg->refcnt == 0) { 1522 mutex_exit(&wg->mtx); 1523 aiowaitgroup_fini(wg); 1524 } else { 1525 mutex_exit(&wg->mtx); 1526 } 1527 } 1528 1529 if (lk->n) { 1530 kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); 1531 1532 lk->n = 0; 1533 lk->s = 2; 1534 lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); 1535 } 1536 1537 mutex_exit(&lk->mtx); 1538 } 1539 1540 /* 1541 * Attach wait group to jobs notification list. 1542 */ 1543 void 1544 aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) 1545 { 1546 mutex_enter(&lk->mtx); 1547 if (lk->n == lk->s) { 1548 size_t new_size = lk->s * lk->s; 1549 1550 void **new_wgs = kmem_zalloc(new_size * 1551 sizeof(*new_wgs), KM_SLEEP); 1552 1553 memcpy(new_wgs, lk->wgs, lk->n * sizeof(*lk->wgs)); 1554 kmem_free(lk->wgs, lk->s * sizeof(*lk->wgs)); 1555 1556 lk->s = new_size; 1557 lk->wgs = new_wgs; 1558 } 1559 lk->wgs[lk->n] = wg; 1560 lk->n++; 1561 wg->total++; 1562 wg->refcnt++; 1563 mutex_exit(&lk->mtx); 1564 } 1565 1566 /* 1567 * Enqueue the job. 1568 */ 1569 static int 1570 aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) 1571 { 1572 struct proc *p = curlwp->l_proc; 1573 struct aioproc *aio; 1574 struct aio_job *a_job; 1575 struct aiocb aiocb; 1576 struct sigevent *sig; 1577 int error; 1578 1579 /* Get the data structure from user-space */ 1580 error = copyin(aiocb_uptr, &aiocb, sizeof(aiocb)); 1581 if (error) { 1582 return error; 1583 } 1584 1585 /* Check if signal is set, and validate it */ 1586 sig = &aiocb.aio_sigevent; 1587 if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || 1588 sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) { 1589 return SET_ERROR(EINVAL); 1590 } 1591 1592 /* Buffer and byte count */ 1593 if (((AIO_SYNC | AIO_DSYNC) & op) == 0) 1594 if (aiocb.aio_buf == NULL || aiocb.aio_nbytes > SSIZE_MAX) 1595 return SET_ERROR(EINVAL); 1596 1597 /* Check the opcode, if LIO_NOP - simply ignore */ 1598 if (op == AIO_LIO) { 1599 KASSERT(lio != NULL); 1600 if (aiocb.aio_lio_opcode == LIO_WRITE) { 1601 op = AIO_WRITE; 1602 } else if (aiocb.aio_lio_opcode == LIO_READ) { 1603 op = AIO_READ; 1604 } else { 1605 if (aiocb.aio_lio_opcode == LIO_NOP) { 1606 return 0; 1607 } else { 1608 return SET_ERROR(EINVAL); 1609 } 1610 } 1611 } else { 1612 KASSERT(lio == NULL); 1613 } 1614 1615 /* 1616 * Look for already existing job. If found the job is in-progress. 1617 * According to POSIX this is invalid, so return the error. 1618 */ 1619 aio = p->p_aio; 1620 if (aio) { 1621 error = aiosp_validate_conflicts(&aio->aiosp, aiocb_uptr); 1622 if (error) { 1623 return SET_ERROR(error); 1624 } 1625 } 1626 1627 /* 1628 * Check if AIO structure is initialized, if not initialize it 1629 */ 1630 if (p->p_aio == NULL) { 1631 if (aio_procinit(p)) { 1632 return SET_ERROR(EAGAIN); 1633 } 1634 } 1635 aio = p->p_aio; 1636 1637 /* 1638 * Set the state with errno, and copy data 1639 * structure back to the user-space. 1640 */ 1641 aiocb._state = JOB_WIP; 1642 aiocb._errno = SET_ERROR(EINPROGRESS); 1643 aiocb._retval = -1; 1644 error = copyout(&aiocb, aiocb_uptr, sizeof(aiocb)); 1645 if (error) { 1646 return error; 1647 } 1648 1649 /* Allocate and initialize a new AIO job */ 1650 a_job = pool_get(&aio_job_pool, PR_WAITOK | PR_ZERO); 1651 1652 memcpy(&a_job->aiocbp, &aiocb, sizeof(aiocb)); 1653 a_job->aiocb_uptr = aiocb_uptr; 1654 a_job->aio_op |= op; 1655 a_job->lio = lio; 1656 mutex_init(&a_job->mtx, MUTEX_DEFAULT, IPL_NONE); 1657 aiowaitgrouplk_init(&a_job->lk); 1658 a_job->p = p; 1659 a_job->on_queue = false; 1660 a_job->completed = false; 1661 a_job->fp = NULL; 1662 1663 const int fd = aiocb.aio_fildes; 1664 struct file *fp = fd_getfile2(p, fd); 1665 if (fp == NULL) { 1666 aio_job_fini(a_job); 1667 pool_put(&aio_job_pool, a_job); 1668 return SET_ERROR(EBADF); 1669 } 1670 1671 aio_file_hold(fp); 1672 a_job->fp = fp; 1673 1674 struct aiocbp *aiocbp = kmem_zalloc(sizeof(*aiocbp), KM_SLEEP); 1675 aiocbp->job = a_job; 1676 aiocbp->uptr = aiocb_uptr; 1677 error = aiocbp_insert(&aio->aiosp, aiocbp); 1678 if (error) { 1679 aio_file_release(a_job->fp); 1680 a_job->fp = NULL; 1681 kmem_free(aiocbp, sizeof(*aiocbp)); 1682 aio_job_fini(a_job); 1683 pool_put(&aio_job_pool, a_job); 1684 return SET_ERROR(error); 1685 } 1686 1687 /* 1688 * Add the job to the queue, update the counters, and 1689 * notify the AIO worker thread to handle the job. 1690 */ 1691 mutex_enter(&aio->aio_mtx); 1692 if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || 1693 aio->jobs_count >= aio_listio_max) { 1694 mutex_exit(&aio->aio_mtx); 1695 error = SET_ERROR(EAGAIN); 1696 goto error; 1697 } 1698 1699 mutex_exit(&aio->aio_mtx); 1700 1701 error = aiosp_enqueue_job(&aio->aiosp, a_job); 1702 if (error) { 1703 error = SET_ERROR(EAGAIN); 1704 goto error; 1705 } 1706 1707 mutex_enter(&aio->aio_mtx); 1708 aio->jobs_count++; 1709 if (lio) { 1710 lio->refcnt++; 1711 } 1712 mutex_exit(&aio->aio_mtx); 1713 1714 return 0; 1715 error: 1716 aiocbp_remove_job(&aio->aiosp, aiocb_uptr, &a_job, NULL); 1717 kmem_free(aiocbp, sizeof(*aiocbp)); 1718 1719 aio_file_release(a_job->fp); 1720 a_job->fp = NULL; 1721 1722 aio_job_fini(a_job); 1723 atomic_dec_uint(&aio_jobs_count); 1724 pool_put(&aio_job_pool, a_job); 1725 1726 return SET_ERROR(error); 1727 } 1728 1729 /* 1730 * Syscall functions. 1731 */ 1732 int 1733 sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, 1734 register_t *retval) 1735 { 1736 struct proc *p = l->l_proc; 1737 struct aioproc *aio; 1738 struct aiocb *aiocbp_uptr; 1739 struct filedesc *fdp = p->p_fd; 1740 struct aiosp *aiosp; 1741 struct aio_job *job; 1742 struct file *fp; 1743 struct aiost_file_group find = { 0 }, *fg; 1744 unsigned int fildes, canceled = 0; 1745 bool have_active = false; 1746 fdtab_t *dt; 1747 int error = 0; 1748 1749 fildes = (unsigned int)SCARG(uap, fildes); 1750 dt = atomic_load_consume(&fdp->fd_dt); 1751 if (fildes >= dt->dt_nfiles) { 1752 return SET_ERROR(EBADF); 1753 } 1754 if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) { 1755 return SET_ERROR(EBADF); 1756 } 1757 fp = dt->dt_ff[fildes]->ff_file; 1758 1759 /* Check if AIO structure is initialized */ 1760 if (p->p_aio == NULL) { 1761 *retval = AIO_NOTCANCELED; 1762 return 0; 1763 } 1764 1765 aio = p->p_aio; 1766 aiocbp_uptr = (struct aiocb *)SCARG(uap, aiocbp); 1767 aiosp = &aio->aiosp; 1768 1769 mutex_enter(&aio->aio_mtx); 1770 mutex_enter(&aiosp->mtx); 1771 1772 /* 1773 * If there is a live file-group for this fp, then some requests 1774 * are active and could not be canceled. 1775 */ 1776 find.fp = fp; 1777 fg = RB_FIND(aiost_file_tree, aiosp->fg_root, &find); 1778 if (fg) { 1779 have_active = fg->queue_size ? true : false; 1780 } 1781 1782 /* 1783 * if aiocbp_uptr != NULL, then just cancel the job associated with that 1784 * uptr. 1785 * if aiocbp_uptr == NULL, then cancel all jobs associated with fildes. 1786 */ 1787 if (aiocbp_uptr) { 1788 error = aiocbp_lookup_job(aiosp, aiocbp_uptr, &job); 1789 if (error || job == NULL) { 1790 *retval = AIO_ALLDONE; 1791 goto finish; 1792 } 1793 1794 if (job->completed) { 1795 *retval = AIO_ALLDONE; 1796 } else { 1797 *retval = AIO_NOTCANCELED; 1798 } 1799 1800 /* 1801 * If the job is on sp->job (signified by job->on_queue) 1802 * that means that it has been distribtued yet. And if 1803 * it is not on the queue that means it is currently 1804 * beign processed. 1805 */ 1806 if (job->on_queue) { 1807 aio_job_cancel(aiosp, job); 1808 aio_job_mark_complete(job); 1809 *retval = AIO_CANCELED; 1810 } 1811 1812 mutex_exit(&job->mtx); 1813 } else { 1814 /* 1815 * Cancel all queued jobs associated with this file descriptor 1816 */ 1817 struct aio_job *tmp; 1818 TAILQ_FOREACH_SAFE(job, &aiosp->jobs, list, tmp) { 1819 if (job->aiocbp.aio_fildes == (int)fildes) { 1820 aio_job_cancel(aiosp, job); 1821 aio_job_mark_complete(job); 1822 canceled++; 1823 } 1824 } 1825 1826 if (canceled && !have_active) { 1827 *retval = AIO_CANCELED; 1828 } else if (!canceled) { 1829 *retval = have_active ? AIO_NOTCANCELED : AIO_ALLDONE; 1830 } else { 1831 *retval = AIO_NOTCANCELED; 1832 } 1833 } 1834 finish: 1835 mutex_exit(&aiosp->mtx); 1836 mutex_exit(&aio->aio_mtx); 1837 1838 return 0; 1839 } 1840 1841 int 1842 sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, 1843 register_t *retval) 1844 { 1845 struct proc *p = l->l_proc; 1846 struct aioproc *aio = p->p_aio; 1847 1848 if (aio == NULL) { 1849 return SET_ERROR(EINVAL); 1850 } 1851 1852 const void *uptr = SCARG(uap, aiocbp); 1853 return aiosp_error(&aio->aiosp, uptr, retval); 1854 } 1855 1856 int 1857 sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, 1858 register_t *retval) 1859 { 1860 int op = SCARG(uap, op); 1861 1862 if ((op != O_DSYNC) && (op != O_SYNC)) { 1863 return SET_ERROR(EINVAL); 1864 } 1865 1866 op = (op == O_DSYNC) ? AIO_DSYNC : AIO_SYNC; 1867 1868 return aio_enqueue_job(op, SCARG(uap, aiocbp), NULL); 1869 } 1870 1871 int 1872 sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, 1873 register_t *retval) 1874 { 1875 int error; 1876 1877 error = aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); 1878 if (error) { 1879 return error; 1880 } 1881 1882 struct proc *p = l->l_proc; 1883 struct aioproc *aio = p->p_aio; 1884 KASSERT(aio); 1885 return aiosp_distribute_jobs(&aio->aiosp); 1886 } 1887 1888 int 1889 sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, 1890 register_t *retval) 1891 { 1892 struct proc *p = l->l_proc; 1893 struct aioproc *aio = p->p_aio; 1894 1895 if (aio == NULL) { 1896 return SET_ERROR(EINVAL); 1897 } 1898 1899 const void *uptr = SCARG(uap, aiocbp); 1900 return aiosp_return(&aio->aiosp, uptr, retval); 1901 } 1902 1903 int 1904 sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, 1905 register_t *retval) 1906 { 1907 struct proc *p = l->l_proc; 1908 struct aioproc *aio = p->p_aio; 1909 struct aiocb **list; 1910 struct timespec ts; 1911 int error, nent; 1912 1913 nent = SCARG(uap, nent); 1914 if (nent <= 0 || nent > aio_listio_max) { 1915 return SET_ERROR(EAGAIN); 1916 } 1917 1918 if (aio == NULL) { 1919 return SET_ERROR(EINVAL); 1920 } 1921 1922 if (SCARG(uap, timeout)) { 1923 /* Convert timespec to ticks */ 1924 error = copyin(SCARG(uap, timeout), &ts, 1925 sizeof(ts)); 1926 if (error) 1927 return error; 1928 } 1929 1930 list = kmem_alloc(nent * sizeof(*list), KM_SLEEP); 1931 error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); 1932 if (error) { 1933 goto out; 1934 } 1935 1936 error = aiosp_suspend(&aio->aiosp, list, nent, SCARG(uap, timeout) ? 1937 &ts : NULL, AIOSP_SUSPEND_ANY); 1938 out: 1939 kmem_free(list, nent * sizeof(*list)); 1940 return error; 1941 } 1942 1943 int 1944 sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, 1945 register_t *retval) 1946 { 1947 int error; 1948 1949 error = aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); 1950 if (error) { 1951 return error; 1952 } 1953 1954 struct proc *p = l->l_proc; 1955 struct aioproc *aio = p->p_aio; 1956 KASSERT(aio); 1957 return aiosp_distribute_jobs(&aio->aiosp); 1958 } 1959 1960 int 1961 sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, 1962 register_t *retval) 1963 { 1964 struct proc *p = l->l_proc; 1965 struct aioproc *aio; 1966 struct aiocb **aiocbp_list; 1967 struct lio_req *lio; 1968 int i, error = 0, errcnt, mode, nent; 1969 1970 mode = SCARG(uap, mode); 1971 nent = SCARG(uap, nent); 1972 1973 /* Non-accurate checks for the limit and invalid values */ 1974 if (nent < 1 || nent > aio_listio_max) { 1975 return SET_ERROR(EINVAL); 1976 } 1977 1978 /* Check if AIO structure is initialized, if not initialize it */ 1979 if (p->p_aio == NULL) { 1980 if (aio_procinit(p)) { 1981 return SET_ERROR(EAGAIN); 1982 } 1983 } 1984 aio = p->p_aio; 1985 1986 /* Create a LIO structure */ 1987 lio = pool_get(&aio_lio_pool, PR_WAITOK); 1988 lio->refcnt = 1; 1989 error = 0; 1990 1991 switch (mode) { 1992 case LIO_WAIT: 1993 memset(&lio->sig, 0, sizeof(lio->sig)); 1994 break; 1995 case LIO_NOWAIT: 1996 /* Check for signal, validate it */ 1997 if (SCARG(uap, sig)) { 1998 struct sigevent *sig = &lio->sig; 1999 2000 error = copyin(SCARG(uap, sig), &lio->sig, 2001 sizeof(lio->sig)); 2002 if (error == 0 && 2003 (sig->sigev_signo < 0 || 2004 sig->sigev_signo >= NSIG || 2005 sig->sigev_notify < SIGEV_NONE || 2006 sig->sigev_notify > SIGEV_SA)) 2007 error = SET_ERROR(EINVAL); 2008 } else { 2009 memset(&lio->sig, 0, sizeof(lio->sig)); 2010 } 2011 break; 2012 default: 2013 error = SET_ERROR(EINVAL); 2014 break; 2015 } 2016 2017 if (error != 0) { 2018 pool_put(&aio_lio_pool, lio); 2019 return error; 2020 } 2021 2022 /* Get the list from user-space */ 2023 aiocbp_list = kmem_alloc(nent * sizeof(*aiocbp_list), KM_SLEEP); 2024 error = copyin(SCARG(uap, list), aiocbp_list, 2025 nent * sizeof(*aiocbp_list)); 2026 if (error) { 2027 mutex_enter(&aio->aio_mtx); 2028 goto err; 2029 } 2030 2031 /* Enqueue all jobs */ 2032 errcnt = 0; 2033 for (i = 0; i < nent; i++) { 2034 error = aio_enqueue_job(AIO_LIO, aiocbp_list[i], lio); 2035 /* 2036 * According to POSIX, in such error case it may 2037 * fail with other I/O operations initiated. 2038 */ 2039 if (error) { 2040 errcnt++; 2041 } 2042 } 2043 2044 error = aiosp_distribute_jobs(&aio->aiosp); 2045 if (error) { 2046 goto err; 2047 } 2048 2049 mutex_enter(&aio->aio_mtx); 2050 2051 /* Return an error if any */ 2052 if (errcnt) { 2053 error = SET_ERROR(EIO); 2054 goto err; 2055 } 2056 2057 if (mode == LIO_WAIT) { 2058 error = aiosp_suspend(&aio->aiosp, aiocbp_list, nent, 2059 NULL, AIOSP_SUSPEND_ALL); 2060 } 2061 2062 err: 2063 if (--lio->refcnt != 0) { 2064 lio = NULL; 2065 } 2066 mutex_exit(&aio->aio_mtx); 2067 if (lio != NULL) { 2068 aiost_sigsend(p, &lio->sig); 2069 pool_put(&aio_lio_pool, lio); 2070 } 2071 kmem_free(aiocbp_list, nent * sizeof(*aiocbp_list)); 2072 return error; 2073 } 2074 2075 /* 2076 * SysCtl 2077 */ 2078 static int 2079 sysctl_aio_listio_max(SYSCTLFN_ARGS) 2080 { 2081 struct sysctlnode node; 2082 int error, newsize; 2083 2084 node = *rnode; 2085 node.sysctl_data = &newsize; 2086 2087 newsize = aio_listio_max; 2088 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 2089 if (error || newp == NULL) { 2090 return error; 2091 } 2092 2093 if (newsize < 1 || newsize > aio_max) { 2094 return SET_ERROR(EINVAL); 2095 } 2096 aio_listio_max = newsize; 2097 2098 return 0; 2099 } 2100 2101 static int 2102 sysctl_aio_max(SYSCTLFN_ARGS) 2103 { 2104 struct sysctlnode node; 2105 int error, newsize; 2106 2107 node = *rnode; 2108 node.sysctl_data = &newsize; 2109 2110 newsize = aio_max; 2111 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 2112 if (error || newp == NULL) { 2113 return error; 2114 } 2115 2116 if (newsize < 1 || newsize < aio_listio_max) { 2117 return SET_ERROR(EINVAL); 2118 } 2119 aio_max = newsize; 2120 2121 return 0; 2122 } 2123 2124 SYSCTL_SETUP(sysctl_aio_init, "aio sysctl") 2125 { 2126 int rv; 2127 2128 rv = sysctl_createv(clog, 0, NULL, NULL, 2129 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 2130 CTLTYPE_INT, "posix_aio", 2131 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 2132 "Asynchronous I/O option to which the " 2133 "system attempts to conform"), 2134 NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0, 2135 CTL_KERN, CTL_CREATE, CTL_EOL); 2136 2137 if (rv != 0) { 2138 return; 2139 } 2140 2141 rv = sysctl_createv(clog, 0, NULL, NULL, 2142 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 2143 CTLTYPE_INT, "aio_listio_max", 2144 SYSCTL_DESCR("Maximum number of asynchronous I/O " 2145 "operations in a single list I/O call"), 2146 sysctl_aio_listio_max, 0, &aio_listio_max, 0, 2147 CTL_KERN, CTL_CREATE, CTL_EOL); 2148 2149 if (rv != 0) { 2150 return; 2151 } 2152 2153 rv = sysctl_createv(clog, 0, NULL, NULL, 2154 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 2155 CTLTYPE_INT, "aio_max", 2156 SYSCTL_DESCR("Maximum number of asynchronous I/O " 2157 "operations"), 2158 sysctl_aio_max, 0, &aio_max, 0, 2159 CTL_KERN, CTL_CREATE, CTL_EOL); 2160 2161 return; 2162 } 2163 2164 /* 2165 * Debugging 2166 */ 2167 #if defined(DDB) 2168 void 2169 aio_print_jobs(void (*pr)(const char *, ...)) 2170 { 2171 struct proc *p = curlwp->l_proc; 2172 struct aioproc *aio; 2173 struct aiosp *sp; 2174 struct aiost *st; 2175 struct aio_job *job; 2176 2177 if (p == NULL) { 2178 (*pr)("AIO: no current process context.\n"); 2179 return; 2180 } 2181 2182 aio = p->p_aio; 2183 if (aio == NULL) { 2184 (*pr)("AIO: not initialized (pid=%d).\n", p->p_pid); 2185 return; 2186 } 2187 2188 sp = &aio->aiosp; 2189 2190 (*pr)("AIO: pid=%d\n", p->p_pid); 2191 (*pr)("AIO: global jobs=%u, proc jobs=%u\n", aio_jobs_count, 2192 aio->jobs_count); 2193 (*pr)("AIO: sp{ total_threads=%zu active=%zu free=%zu pending=%zu\n" 2194 " processing=%lu hash_buckets=%zu mask=%#x }\n", 2195 sp->nthreads_total, sp->nthreads_active, sp->nthreads_free, 2196 sp->jobs_pending, (u_long)sp->njobs_processing, 2197 sp->aio_hash_size, sp->aio_hash_mask); 2198 2199 /* Pending queue */ 2200 (*pr)("\nqueue (%zu pending):\n", sp->jobs_pending); 2201 TAILQ_FOREACH(job, &sp->jobs, list) { 2202 (*pr)(" op=%d err=%d state=%d uptr=%p completed=%d\n", 2203 job->aio_op, job->aiocbp._errno, job->aiocbp._state, 2204 job->aiocb_uptr, job->completed); 2205 (*pr)(" fd=%d off=%llu buf=%p nbytes=%zu lio=%p\n", 2206 job->aiocbp.aio_fildes, 2207 (unsigned long long)job->aiocbp.aio_offset, 2208 (void *)job->aiocbp.aio_buf, 2209 (size_t)job->aiocbp.aio_nbytes, 2210 job->lio); 2211 } 2212 2213 /* Active service threads */ 2214 (*pr)("\nactive threads (%zu):\n", sp->nthreads_active); 2215 TAILQ_FOREACH(st, &sp->active, list) { 2216 (*pr)(" lwp=%p state=%d freelist=%d\n", 2217 st->lwp, st->state, st->freelist ? 1 : 0); 2218 2219 if (st->job) { 2220 struct aio_job *j = st->job; 2221 (*pr)( 2222 " job: op=%d err=%d state=%d uptr=%p\n", 2223 j->aio_op, j->aiocbp._errno, 2224 j->aiocbp._state, j->aiocb_uptr); 2225 (*pr)( 2226 " fd=%d off=%llu buf=%p nbytes=%zu\n", 2227 j->aiocbp.aio_fildes, 2228 (unsigned long long)j->aiocbp.aio_offset, 2229 j->aiocbp.aio_buf, 2230 (size_t)j->aiocbp.aio_nbytes); 2231 } 2232 2233 if (st->fg) { 2234 (*pr)(" file-group: fp=%p qlen=%zu\n", 2235 st->fg->fp, st->fg->queue_size); 2236 } 2237 } 2238 2239 /* Freelist summary */ 2240 (*pr)("\nfree threads (%zu)\n", sp->nthreads_free); 2241 2242 /* aiocbp hash maps user aiocbp to kernel job */ 2243 (*pr)("\naiocbp hash: buckets=%zu\n", sp->aio_hash_size); 2244 if (sp->aio_hash != NULL && sp->aio_hash_size != 0) { 2245 size_t b; 2246 for (b = 0; b < sp->aio_hash_size; b++) { 2247 struct aiocbp *hc; 2248 if (TAILQ_EMPTY(&sp->aio_hash[b])) { 2249 continue; 2250 } 2251 2252 (*pr)(" [%zu]:", b); 2253 TAILQ_FOREACH(hc, &sp->aio_hash[b], list) { 2254 (*pr)(" uptr=%p job=%p", hc->uptr, hc->job); 2255 } 2256 (*pr)("\n"); 2257 } 2258 } 2259 } 2260 #endif /* defined(DDB) */ 2261