1 /* $NetBSD: lfs_subr.c,v 1.110 2026/01/05 05:02:47 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1991, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_subr.c 8.4 (Berkeley) 5/8/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.110 2026/01/05 05:02:47 perseant Exp $"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/namei.h> 68 #include <sys/vnode.h> 69 #include <sys/buf.h> 70 #include <sys/mount.h> 71 #include <sys/malloc.h> 72 #include <sys/proc.h> 73 #include <sys/kauth.h> 74 75 #include <ufs/lfs/ulfs_inode.h> 76 #include <ufs/lfs/lfs.h> 77 #include <ufs/lfs/lfs_accessors.h> 78 #include <ufs/lfs/lfs_kernel.h> 79 #include <ufs/lfs/lfs_extern.h> 80 81 #ifdef DEBUG 82 const char *lfs_res_names[LFS_NB_COUNT] = { 83 "summary", 84 "superblock", 85 "file block", 86 "cluster", 87 "clean", 88 "blkiov", 89 }; 90 #endif 91 92 int lfs_res_qty[LFS_NB_COUNT] = { 93 LFS_N_SUMMARIES, 94 LFS_N_SBLOCKS, 95 LFS_N_IBLOCKS, 96 LFS_N_CLUSTERS, 97 LFS_N_CLEAN, 98 LFS_N_BLKIOV, 99 }; 100 101 void 102 lfs_setup_resblks(struct lfs *fs) 103 { 104 int i, j; 105 int maxbpp; 106 107 ASSERT_NO_SEGLOCK(fs); 108 fs->lfs_resblk = malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT, 109 M_WAITOK); 110 for (i = 0; i < LFS_N_TOTAL; i++) { 111 fs->lfs_resblk[i].inuse = 0; 112 fs->lfs_resblk[i].p = NULL; 113 } 114 for (i = 0; i < LFS_RESHASH_WIDTH; i++) 115 LIST_INIT(fs->lfs_reshash + i); 116 117 /* 118 * These types of allocations can be larger than a page, 119 * so we can't use the pool subsystem for them. 120 */ 121 for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++) 122 fs->lfs_resblk[i].size = lfs_sb_getsumsize(fs); 123 for (j = 0; j < LFS_N_SBLOCKS; j++, i++) 124 fs->lfs_resblk[i].size = LFS_SBPAD; 125 for (j = 0; j < LFS_N_IBLOCKS; j++, i++) 126 fs->lfs_resblk[i].size = lfs_sb_getbsize(fs); 127 for (j = 0; j < LFS_N_CLUSTERS; j++, i++) 128 fs->lfs_resblk[i].size = MAXPHYS; 129 for (j = 0; j < LFS_N_CLEAN; j++, i++) 130 fs->lfs_resblk[i].size = MAXPHYS; 131 for (j = 0; j < LFS_N_BLKIOV; j++, i++) 132 fs->lfs_resblk[i].size = LFS_MARKV_MAXBLKCNT * sizeof(BLOCK_INFO); 133 134 for (i = 0; i < LFS_N_TOTAL; i++) { 135 fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size, 136 M_SEGMENT, M_WAITOK); 137 } 138 139 /* 140 * Initialize pools for small types (XXX is BPP small?) 141 */ 142 pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 0, 143 "lfsclpl", &pool_allocator_nointr, IPL_NONE); 144 pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 0, 145 "lfssegpool", &pool_allocator_nointr, IPL_NONE); 146 /* XXX: should this int32 be 32/64? */ 147 maxbpp = ((lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2); 148 maxbpp = MIN(maxbpp, lfs_segsize(fs) / lfs_sb_getfsize(fs) + 2); 149 pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 0, 150 "lfsbpppl", &pool_allocator_nointr, IPL_NONE); 151 } 152 153 void 154 lfs_free_resblks(struct lfs *fs) 155 { 156 int i; 157 158 pool_destroy(&fs->lfs_bpppool); 159 pool_destroy(&fs->lfs_segpool); 160 pool_destroy(&fs->lfs_clpool); 161 162 mutex_enter(&lfs_lock); 163 for (i = 0; i < LFS_N_TOTAL; i++) { 164 while (fs->lfs_resblk[i].inuse) 165 mtsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0, 166 &lfs_lock); 167 if (fs->lfs_resblk[i].p != NULL) 168 free(fs->lfs_resblk[i].p, M_SEGMENT); 169 } 170 free(fs->lfs_resblk, M_SEGMENT); 171 mutex_exit(&lfs_lock); 172 } 173 174 static unsigned int 175 lfs_mhash(void *vp) 176 { 177 return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH; 178 } 179 180 /* 181 * Return memory of the given size for the given purpose, or use one of a 182 * number of spare last-resort buffers, if malloc returns NULL. 183 */ 184 void * 185 lfs_malloc(struct lfs *fs, size_t size, int type) 186 { 187 struct lfs_res_blk *re; 188 void *r; 189 int i, start; 190 unsigned int h; 191 192 ASSERT_MAYBE_SEGLOCK(fs); 193 r = NULL; 194 195 /* If no mem allocated for this type, it just waits */ 196 if (lfs_res_qty[type] == 0) { 197 r = malloc(size, M_SEGMENT, M_WAITOK); 198 return r; 199 } 200 201 /* Otherwise try a quick malloc, and if it works, great */ 202 if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) { 203 return r; 204 } 205 206 /* 207 * If malloc returned NULL, we are forced to use one of our 208 * reserve blocks. We have on hand at least one summary block, 209 * at least one cluster block, at least one superblock, 210 * and several indirect blocks. 211 */ 212 213 mutex_enter(&lfs_lock); 214 /* skip over blocks of other types */ 215 for (i = 0, start = 0; i < type; i++) 216 start += lfs_res_qty[i]; 217 while (r == NULL) { 218 for (i = 0; i < lfs_res_qty[type]; i++) { 219 if (fs->lfs_resblk[start + i].inuse == 0) { 220 re = fs->lfs_resblk + start + i; 221 re->inuse = 1; 222 r = re->p; 223 KASSERT(re->size >= size); 224 h = lfs_mhash(r); 225 LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res); 226 mutex_exit(&lfs_lock); 227 return r; 228 } 229 } 230 DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n", 231 lfs_res_names[type], lfs_res_qty[type])); 232 mtsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0, 233 &lfs_lock); 234 DLOG((DLOG_MALLOC, "done sleeping on %s\n", 235 lfs_res_names[type])); 236 } 237 /* NOTREACHED */ 238 mutex_exit(&lfs_lock); 239 return r; 240 } 241 242 void 243 lfs_free(struct lfs *fs, void *p, int type) 244 { 245 unsigned int h; 246 res_t *re; 247 248 ASSERT_MAYBE_SEGLOCK(fs); 249 h = lfs_mhash(p); 250 mutex_enter(&lfs_lock); 251 LIST_FOREACH(re, &fs->lfs_reshash[h], res) { 252 if (re->p == p) { 253 KASSERT(re->inuse == 1); 254 LIST_REMOVE(re, res); 255 re->inuse = 0; 256 wakeup(&fs->lfs_resblk); 257 mutex_exit(&lfs_lock); 258 return; 259 } 260 } 261 262 #ifdef notyet /* XXX this assert fires */ 263 for (int i = 0; i < LFS_N_TOTAL; i++) { 264 KDASSERTMSG(fs->lfs_resblk[i].p == p, 265 "lfs_free: inconsistent reserved block"); 266 } 267 #endif 268 269 mutex_exit(&lfs_lock); 270 271 /* 272 * If we didn't find it, free it. 273 */ 274 free(p, M_SEGMENT); 275 } 276 277 /* 278 * Fragment lock. This is a reader/writer lock controlling, primarily, 279 * the expansion of file fragments. 280 */ 281 void 282 lfs_fraglock_enter(struct lfs *fs, int enter_exit) 283 { 284 lfs_prelock(fs, 0); 285 } 286 287 bool 288 lfs_fraglock_held(struct lfs *fs, int read_write) 289 { 290 return lfs_prelock_held(fs); 291 } 292 293 void 294 lfs_fraglock_exit(struct lfs *fs) 295 { 296 lfs_preunlock(fs); 297 } 298 299 /* 300 * lfs_seglock -- 301 * Single thread the segment writer. 302 */ 303 int 304 lfs_seglock(struct lfs *fs, unsigned long flags) 305 { 306 struct segment *sp; 307 int error; 308 309 error = lfs_prelock(fs, flags); 310 if (error) 311 return error; 312 313 if (fs->lfs_seglock) { 314 ++fs->lfs_seglock; 315 fs->lfs_sp->seg_flags |= flags; 316 return 0; 317 } 318 319 fs->lfs_seglock = 1; 320 fs->lfs_cleanind = 0; 321 322 LFS_ENTER_LOG("seglock", __FILE__, __LINE__, 0, flags, curproc->p_pid); 323 324 sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK); 325 sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK); 326 sp->seg_flags = flags; 327 sp->vp = NULL; 328 sp->seg_iocount = 0; 329 sp->bytes_written = 0; 330 sp->gatherblock_loopcount = 0; 331 (void) lfs_initseg(fs, 0); 332 333 /* 334 * Keep a cumulative count of the outstanding I/O operations. If the 335 * disk drive catches up with us it could go to zero before we finish, 336 * so we artificially increment it by one until we've scheduled all of 337 * the writes we intend to do. 338 */ 339 mutex_enter(&lfs_lock); 340 ++fs->lfs_iocount; 341 fs->lfs_startseg = lfs_sb_getcurseg(fs); 342 mutex_exit(&lfs_lock); 343 return 0; 344 } 345 346 /* 347 * Create a marker inode. 348 */ 349 struct inode * 350 lfs_create_marker(void) 351 { 352 struct inode *marker; 353 354 marker = pool_get(&lfs_inode_pool, PR_WAITOK); 355 memset(marker, 0, sizeof(*marker)); 356 marker->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK); 357 memset(marker->inode_ext.lfs, 0, sizeof(*marker->inode_ext.lfs)); 358 marker->i_state |= IN_MARKER; 359 360 return marker; 361 } 362 363 void 364 lfs_destroy_marker(struct inode *marker) 365 { 366 pool_put(&lfs_inoext_pool, marker->inode_ext.lfs); 367 pool_put(&lfs_inode_pool, marker); 368 } 369 370 static void lfs_unmark_dirop(struct lfs *); 371 372 static void 373 lfs_unmark_dirop(struct lfs *fs) 374 { 375 struct inode *ip, *marker; 376 struct vnode *vp; 377 int doit; 378 379 KASSERT(fs != NULL); 380 ASSERT_NO_SEGLOCK(fs); 381 mutex_enter(&lfs_lock); 382 doit = !(fs->lfs_flags & LFS_UNDIROP); 383 if (doit) 384 fs->lfs_flags |= LFS_UNDIROP; 385 mutex_exit(&lfs_lock); 386 387 if (!doit) 388 return; 389 390 marker = lfs_create_marker(); 391 392 mutex_enter(&lfs_lock); 393 TAILQ_INSERT_HEAD(&fs->lfs_dchainhd, marker, i_lfs_dchain); 394 while ((ip = TAILQ_NEXT(marker, i_lfs_dchain)) != NULL) { 395 TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); 396 TAILQ_INSERT_AFTER(&fs->lfs_dchainhd, ip, marker, 397 i_lfs_dchain); 398 if (ip->i_state & IN_MARKER) 399 continue; 400 vp = ITOV(ip); 401 if ((ip->i_state & (IN_ADIROP | IN_CDIROP)) == IN_CDIROP) { 402 --lfs_dirvcount; 403 --fs->lfs_dirvcount; 404 vp->v_uflag &= ~VU_DIROP; 405 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 406 wakeup(&lfs_dirvcount); 407 fs->lfs_unlockvp = vp; 408 mutex_exit(&lfs_lock); 409 vrele(vp); 410 mutex_enter(&lfs_lock); 411 fs->lfs_unlockvp = NULL; 412 ip->i_state &= ~IN_CDIROP; 413 } 414 } 415 TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); 416 fs->lfs_flags &= ~LFS_UNDIROP; 417 wakeup(&fs->lfs_flags); 418 mutex_exit(&lfs_lock); 419 420 lfs_destroy_marker(marker); 421 } 422 423 static void 424 lfs_auto_segclean(struct lfs *fs) 425 { 426 int i, waited, changed; 427 SEGUSE *sup; 428 struct buf *bp; 429 430 ASSERT_SEGLOCK(fs); 431 /* 432 * Now that we've swapped lfs_activesb, but while we still 433 * hold the segment lock, run through the segment list promoting 434 * empty segments. 435 * XXX - do we really need to do them all at once? 436 */ 437 waited = 0; 438 for (i = 0; i < lfs_sb_getnseg(fs); i++) { 439 changed = 0; 440 LFS_SEGENTRY(sup, fs, i, bp); 441 if (sup->su_nbytes == 0) { 442 switch (sup->su_flags & (SEGUSE_ACTIVE 443 | SEGUSE_DIRTY 444 | SEGUSE_EMPTY 445 | SEGUSE_READY)) { 446 case SEGUSE_DIRTY: 447 sup->su_flags |= SEGUSE_EMPTY; 448 ++changed; 449 break; 450 451 case SEGUSE_DIRTY | SEGUSE_EMPTY: 452 sup->su_flags |= SEGUSE_READY; 453 ++changed; 454 break; 455 456 case SEGUSE_DIRTY | SEGUSE_EMPTY | SEGUSE_READY: 457 /* Make sure the sb is written */ 458 mutex_enter(&lfs_lock); 459 while (waited == 0 && fs->lfs_sbactive) 460 mtsleep(&fs->lfs_sbactive, PRIBIO+1, 461 "lfs asb", 0, &lfs_lock); 462 mutex_exit(&lfs_lock); 463 waited = 1; 464 465 lfs_markclean(fs, i, sup, NOCRED, curlwp); 466 ++changed; 467 break; 468 469 default: 470 break; 471 } 472 } 473 if (changed) 474 LFS_WRITESEGENTRY(sup, fs, i, bp); 475 else 476 brelse(bp, 0); 477 } 478 } 479 480 bool 481 lfs_seglock_held(struct lfs *fs) 482 { 483 return lfs_prelock_held(fs) && fs->lfs_seglock != 0; 484 } 485 486 /* 487 * lfs_segunlock -- 488 * Single thread the segment writer. 489 */ 490 void 491 lfs_segunlock(struct lfs *fs) 492 { 493 struct segment *sp; 494 unsigned long sync, ckp; 495 struct buf *bp; 496 int do_unmark_dirop = 0; 497 498 sp = fs->lfs_sp; 499 500 if (!LFS_SEGLOCK_HELD(fs)) 501 panic("lfs seglock not held"); 502 503 if (fs->lfs_seglock == 1) { 504 if ((sp->seg_flags & SEGM_CLEAN) == 0) 505 do_unmark_dirop = 1; 506 sync = sp->seg_flags & SEGM_SYNC; 507 ckp = sp->seg_flags & SEGM_CKP; 508 509 /* We should have a segment summary, and nothing else */ 510 KASSERT(sp->cbpp == sp->bpp + 1); 511 512 /* Free allocated segment summary */ 513 lfs_sb_suboffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 514 bp = *sp->bpp; 515 lfs_freebuf(fs, bp); 516 517 pool_put(&fs->lfs_bpppool, sp->bpp); 518 sp->bpp = NULL; 519 520 /* 521 * If we're not sync, we're done with sp, get rid of it. 522 * Otherwise, we keep a local copy around but free 523 * fs->lfs_sp so another process can use it (we have to 524 * wait but they don't have to wait for us). 525 */ 526 if (!sync) 527 pool_put(&fs->lfs_segpool, sp); 528 fs->lfs_sp = NULL; 529 530 /* 531 * If the I/O count is non-zero, sleep until it reaches zero. 532 * At the moment, the user's process hangs around so we can 533 * sleep. 534 */ 535 mutex_enter(&lfs_lock); 536 if (--fs->lfs_iocount <= 1) 537 wakeup(&fs->lfs_iocount); 538 mutex_exit(&lfs_lock); 539 540 /* 541 * If we're not checkpointing, we don't have to block 542 * other processes to wait for a synchronous write 543 * to complete. 544 */ 545 if (!ckp) { 546 LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid); 547 548 --fs->lfs_seglock; 549 } 550 /* 551 * We let checkpoints happen asynchronously. That means 552 * that during recovery, we have to roll forward between 553 * the two segments described by the first and second 554 * superblocks to make sure that the checkpoint described 555 * by a superblock completed. 556 */ 557 mutex_enter(&lfs_lock); 558 while (ckp && sync && fs->lfs_iocount) { 559 (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1, 560 "lfs_iocount", 0, &lfs_lock); 561 DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", fs, fs->lfs_iocount)); 562 } 563 while (sync && sp->seg_iocount) { 564 (void)mtsleep(&sp->seg_iocount, PRIBIO + 1, 565 "seg_iocount", 0, &lfs_lock); 566 DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount)); 567 } 568 mutex_exit(&lfs_lock); 569 if (sync) 570 pool_put(&fs->lfs_segpool, sp); 571 572 if (ckp) { 573 fs->lfs_nactive = 0; 574 /* If we *know* everything's on disk, write both sbs */ 575 /* XXX should wait for this one */ 576 if (sync) 577 lfs_writesuper(fs, lfs_sb_getsboff(fs, fs->lfs_activesb)); 578 lfs_writesuper(fs, lfs_sb_getsboff(fs, 1 - fs->lfs_activesb)); 579 if (!(fs->lfs_ivnode->v_mount->mnt_iflag & 580 (IMNT_UNMOUNT | IMNT_WANTRDONLY))) { 581 lfs_auto_segclean(fs); 582 /* If sync, we can clean the remainder too */ 583 if (sync) 584 lfs_auto_segclean(fs); 585 } 586 fs->lfs_activesb = 1 - fs->lfs_activesb; 587 588 LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid); 589 590 --fs->lfs_seglock; 591 } 592 if (do_unmark_dirop) 593 lfs_unmark_dirop(fs); 594 } else { 595 --fs->lfs_seglock; 596 KASSERT(fs->lfs_seglock != 0); 597 } 598 599 lfs_preunlock(fs); 600 } 601 602 /* 603 * Single thread the cleaner. 604 */ 605 int 606 lfs_cleanerlock(struct lfs *fs) 607 { 608 int error; 609 610 mutex_enter(&lfs_lock); 611 while (fs->lfs_cleanlock) { 612 printf("cleanlock=%p, waiting\n", fs->lfs_cleanlock); 613 error = cv_wait_sig(&fs->lfs_cleanercv, &lfs_lock); 614 if (error) 615 break; 616 } 617 if (error == 0) 618 fs->lfs_cleanlock = curlwp; 619 mutex_exit(&lfs_lock); 620 621 return error; 622 } 623 624 /* 625 * Check whether we hold the cleaner lock. 626 */ 627 int 628 lfs_cleanerlock_held(struct lfs *fs) 629 { 630 int retval = 0; 631 632 mutex_enter(&lfs_lock); 633 retval = (fs->lfs_cleanlock == curlwp); 634 mutex_exit(&lfs_lock); 635 636 return retval; 637 } 638 639 /* 640 * Single thread the cleaner. 641 */ 642 void 643 lfs_cleanerunlock(struct lfs *fs) 644 { 645 struct inode *ip; 646 647 /* Clear out the cleaning list */ 648 while ((ip = TAILQ_FIRST(&fs->lfs_cleanhd)) != NULL) 649 lfs_clrclean(fs, ITOV(ip)); 650 651 mutex_enter(&lfs_lock); 652 fs->lfs_cleanlock = NULL; 653 cv_broadcast(&fs->lfs_cleanercv); 654 mutex_exit(&lfs_lock); 655 } 656 657 /* 658 * Preventative / prerequisite lock. 659 * This is the "lock" part of the segment lock, 660 * though it can also be taken independently to 661 * prevent segment writing. 662 */ 663 int 664 lfs_prelock(struct lfs *fs, unsigned long flags) 665 { 666 int error; 667 668 mutex_enter(&lfs_lock); 669 670 error = 0; 671 if (fs->lfs_prelock) { 672 if (fs->lfs_prelocklwp == curlwp) { 673 /* Locked by us already */ 674 ++fs->lfs_prelock; 675 goto out; 676 } else if (flags & SEGM_PAGEDAEMON) { 677 /* Pagedaemon cannot wait */ 678 error = EWOULDBLOCK; 679 goto out; 680 } else { 681 /* Wait for lock */ 682 while (fs->lfs_prelock) { 683 cv_wait(&fs->lfs_prelockcv, &lfs_lock); 684 } 685 } 686 } 687 688 /* Acquire lock */ 689 fs->lfs_prelock = 1; 690 fs->lfs_prelocklwp = curlwp; 691 out: 692 mutex_exit(&lfs_lock); 693 694 return error; 695 } 696 697 bool 698 lfs_prelock_held(struct lfs *fs) 699 { 700 bool held; 701 bool waslocked; 702 703 waslocked = mutex_owned(&lfs_lock); 704 if (!waslocked) 705 mutex_enter(&lfs_lock); 706 707 held = (fs->lfs_prelock && fs->lfs_prelocklwp == curlwp); 708 709 if (!waslocked) 710 mutex_exit(&lfs_lock); 711 712 return held; 713 } 714 715 void 716 lfs_preunlock(struct lfs *fs) 717 { 718 mutex_enter(&lfs_lock); 719 if (--fs->lfs_prelock == 0) { 720 fs->lfs_prelocklwp = NULL; 721 cv_broadcast(&fs->lfs_prelockcv); 722 } 723 mutex_exit(&lfs_lock); 724 } 725 726 /* 727 * Drain dirops and start writer. 728 * 729 * No simple_locks are held when we enter and none are held when we return. 730 */ 731 void 732 lfs_writer_enter(struct lfs *fs, const char *wmesg) 733 { 734 int error __diagused; 735 736 ASSERT_NO_SEGLOCK(fs); 737 mutex_enter(&lfs_lock); 738 739 /* disallow dirops during flush */ 740 fs->lfs_writer++; 741 742 while (fs->lfs_dirops > 0) { 743 ++fs->lfs_diropwait; 744 error = mtsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0, 745 &lfs_lock); 746 KASSERT(error == 0); 747 --fs->lfs_diropwait; 748 } 749 750 mutex_exit(&lfs_lock); 751 } 752 753 int 754 lfs_writer_tryenter(struct lfs *fs) 755 { 756 int writer_set; 757 758 ASSERT_MAYBE_SEGLOCK(fs); 759 mutex_enter(&lfs_lock); 760 writer_set = (fs->lfs_dirops == 0); 761 if (writer_set) 762 fs->lfs_writer++; 763 mutex_exit(&lfs_lock); 764 765 return writer_set; 766 } 767 768 void 769 lfs_writer_leave(struct lfs *fs) 770 { 771 bool dowakeup; 772 773 ASSERT_MAYBE_SEGLOCK(fs); 774 mutex_enter(&lfs_lock); 775 dowakeup = !(--fs->lfs_writer); 776 if (dowakeup) 777 cv_broadcast(&fs->lfs_diropscv); 778 mutex_exit(&lfs_lock); 779 } 780 781 /* 782 * Unlock, wait for the cleaner, then relock to where we were before. 783 * To be used only at a fairly high level, to address a paucity of free 784 * segments propagated back from lfs_gop_write(). 785 */ 786 void 787 lfs_segunlock_relock(struct lfs *fs) 788 { 789 int n = fs->lfs_seglock; 790 u_int16_t seg_flags; 791 CLEANERINFO *cip; 792 struct buf *bp; 793 794 if (n == 0) 795 return; 796 797 /* Write anything we've already gathered to disk */ 798 lfs_writeseg(fs, fs->lfs_sp); 799 800 /* Tell cleaner */ 801 mutex_enter(&lfs_lock); 802 fs->lfs_flags |= LFS_MUSTCLEAN; 803 mutex_exit(&lfs_lock); 804 LFS_CLEANERINFO(cip, fs, bp); 805 lfs_ci_setflags(fs, cip, 806 lfs_ci_getflags(fs, cip) | LFS_CLEANER_MUST_CLEAN); 807 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 808 809 /* Save segment flags for later */ 810 seg_flags = fs->lfs_sp->seg_flags; 811 812 while(fs->lfs_seglock) 813 lfs_segunlock(fs); 814 815 /* Wait for the cleaner */ 816 lfs_wakeup_cleaner(fs); 817 mutex_enter(&lfs_lock); 818 while (LFS_STARVED_FOR_SEGS(fs)) 819 mtsleep(&fs->lfs_availsleep, PRIBIO, "relock", 0, 820 &lfs_lock); 821 mutex_exit(&lfs_lock); 822 823 /* Put the segment lock back the way it was. */ 824 while(n--) 825 lfs_seglock(fs, seg_flags); 826 827 /* Cleaner can relax now */ 828 mutex_enter(&lfs_lock); 829 fs->lfs_flags &= ~LFS_MUSTCLEAN; 830 mutex_exit(&lfs_lock); 831 LFS_CLEANERINFO(cip, fs, bp); 832 lfs_ci_setflags(fs, cip, 833 lfs_ci_getflags(fs, cip) & ~LFS_CLEANER_MUST_CLEAN); 834 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 835 836 return; 837 } 838 839 /* 840 * Wake up the cleaner, provided that nowrap is not set. 841 */ 842 void 843 lfs_wakeup_cleaner(struct lfs *fs) 844 { 845 if (fs->lfs_nowrap > 0) 846 return; 847 848 cv_broadcast(&fs->lfs_nextsegsleep); 849 cv_broadcast(&lfs_allclean_wakeup); 850 } 851 852 /* 853 * If it wasn't already on the cleaning list, 854 * add it and take a reference. We will clear 855 * the list before dropping the seglock. 856 */ 857 void 858 lfs_setclean(struct lfs *fs, struct vnode *vp) 859 { 860 struct inode *ip; 861 862 KASSERT(lfs_cleanerlock_held(fs)); 863 864 vref(vp); 865 866 ip = VTOI(vp); 867 mutex_enter(&lfs_lock); 868 if (ip->i_state & IN_CLEANING) { 869 mutex_exit(&lfs_lock); 870 vrele(vp); 871 return; 872 } 873 874 TAILQ_INSERT_HEAD(&fs->lfs_cleanhd, ip, i_lfs_clean); 875 LFS_SET_UINO(VTOI(vp), IN_CLEANING); 876 mutex_exit(&lfs_lock); 877 } 878 879 /* 880 * Remove a vnode from the cleaning list, 881 * clear IN_CLEANING and drop the reference. 882 * Find any invalid buffers on the vnode and 883 * toss them. 884 */ 885 void 886 lfs_clrclean(struct lfs *fs, struct vnode *vp) 887 { 888 struct inode *ip; 889 890 KASSERT(lfs_cleanerlock_held(fs)); 891 892 ip = VTOI(vp); 893 mutex_enter(&lfs_lock); 894 if (!(ip->i_state & IN_CLEANING)) { 895 mutex_exit(&lfs_lock); 896 return; 897 } 898 mutex_exit(&lfs_lock); 899 900 if (vp->v_type == VREG && vp != fs->lfs_ivnode) 901 lfs_ungather(fs, NULL, vp, lfs_match_data); 902 903 mutex_enter(&lfs_lock); 904 TAILQ_REMOVE(&fs->lfs_cleanhd, ip, i_lfs_clean); 905 LFS_CLR_UINO(VTOI(vp), IN_CLEANING); 906 mutex_exit(&lfs_lock); 907 vrele(vp); 908 } 909 910 /* 911 * Remove the specified flag from all segments. 912 */ 913 void 914 lfs_seguse_clrflag_all(struct lfs *fs, uint32_t flag) 915 { 916 SEGUSE *sup; 917 struct buf *bp; 918 int i; 919 920 for (i = 0; i < lfs_sb_getnseg(fs); i++) { 921 LFS_SEGENTRY(sup, fs, i, bp); 922 if (sup->su_flags & flag) { 923 sup->su_flags &= ~flag; 924 LFS_WRITESEGENTRY(sup, fs, i, bp); 925 } else 926 brelse(bp, 0); 927 } 928 } 929 930