1 /* $NetBSD: lfs_alloc.c,v 1.154 2026/01/05 05:02:47 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1991, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.154 2026/01/05 05:02:47 perseant Exp $"); 64 65 #if defined(_KERNEL_OPT) 66 #include "opt_quota.h" 67 #endif 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/buf.h> 73 #include <sys/lock.h> 74 #include <sys/vnode.h> 75 #include <sys/syslog.h> 76 #include <sys/mount.h> 77 #include <sys/malloc.h> 78 #include <sys/pool.h> 79 #include <sys/proc.h> 80 #include <sys/kauth.h> 81 82 #include <ufs/lfs/ulfs_quotacommon.h> 83 #include <ufs/lfs/ulfs_inode.h> 84 #include <ufs/lfs/ulfsmount.h> 85 #include <ufs/lfs/ulfs_extern.h> 86 87 #include <ufs/lfs/lfs.h> 88 #include <ufs/lfs/lfs_accessors.h> 89 #include <ufs/lfs/lfs_extern.h> 90 #include <ufs/lfs/lfs_kernel.h> 91 92 int lfs_do_check_freelist = 0; 93 94 /* Constants for inode free bitmap */ 95 #define BMSHIFT 5 /* 2 ** 5 = 32 */ 96 #define BMMASK ((1 << BMSHIFT) - 1) 97 #define SET_BITMAP_FREE(F, I) do { \ 98 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), \ 99 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \ 100 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1U << ((I) & BMMASK)); \ 101 } while (0) 102 #define CLR_BITMAP_FREE(F, I) do { \ 103 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), \ 104 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \ 105 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1U << ((I) & BMMASK)); \ 106 } while(0) 107 108 #define ISSET_BITMAP_FREE(F, I) \ 109 ((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1U << ((I) & BMMASK))) 110 111 /* 112 * Add a new block to the Ifile, to accommodate future file creations. 113 * Called with the segment lock held. 114 */ 115 int 116 lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred) 117 { 118 struct vnode *vp; 119 struct inode *ip; 120 IFILE64 *ifp64; 121 IFILE32 *ifp32; 122 IFILE_V1 *ifp_v1; 123 struct buf *bp, *cbp; 124 int error; 125 daddr_t i, blkno, xmax; 126 ino_t oldhead, maxino, tail; 127 CLEANERINFO *cip; 128 129 ASSERT_SEGLOCK(fs); 130 131 /* XXX should check or assert that we aren't readonly. */ 132 133 /* 134 * Get a block and extend the ifile inode. Leave the buffer for 135 * the block in bp. 136 */ 137 138 vp = fs->lfs_ivnode; 139 ip = VTOI(vp); 140 blkno = lfs_lblkno(fs, ip->i_size); 141 if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0, 142 &bp)) != 0) { 143 return (error); 144 } 145 ip->i_size += lfs_sb_getbsize(fs); 146 lfs_dino_setsize(fs, ip->i_din, ip->i_size); 147 uvm_vnp_setsize(vp, ip->i_size); 148 149 /* 150 * Compute the new number of inodes, and reallocate the in-memory 151 * inode freemap. 152 */ 153 154 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) - 155 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs); 156 fs->lfs_ino_bitmap = (lfs_bm_t *) 157 realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) * 158 sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK); 159 KASSERT(fs->lfs_ino_bitmap != NULL); 160 161 /* first new inode number */ 162 i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) * 163 lfs_sb_getifpb(fs); 164 165 /* inode number to stop at (XXX: why *x*max?) */ 166 xmax = i + lfs_sb_getifpb(fs); 167 168 /* 169 * We insert the new inodes at the head of the free list. 170 * Under normal circumstances, the free list is empty here, 171 * so we are also incidentally placing them at the end (which 172 * we must do if we are to keep them in order). 173 */ 174 LFS_GET_HEADFREE(fs, cip, cbp, &oldhead); 175 LFS_PUT_HEADFREE(fs, cip, cbp, i); 176 LFS_GET_TAILFREE(fs, cip, cbp, &tail); 177 DLOG((DLOG_ALLOC, "oldhead=%jd, i=%jd, xmax=%jd, oldtail=%jd\n", 178 (intmax_t)oldhead, (intmax_t)i, (intmax_t)xmax, 179 (intmax_t)tail)); 180 if (tail == LFS_UNUSED_INUM) { 181 tail = xmax - 1; 182 LFS_PUT_TAILFREE(fs, cip, cbp, tail); 183 } 184 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM), 185 "inode 0 allocated [2]"); 186 187 188 /* 189 * Initialize the ifile block. 190 * 191 * XXX: these loops should be restructured to use the accessor 192 * functions instead of using cutpaste polymorphism. 193 */ 194 195 if (fs->lfs_is64) { 196 for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) { 197 SET_BITMAP_FREE(fs, i); 198 ifp64->if_version = 1; 199 ifp64->if_daddr = LFS_UNUSED_DADDR; 200 ifp64->if_nextfree = ++i; 201 } 202 ifp64--; 203 ifp64->if_nextfree = oldhead; 204 } else if (lfs_sb_getversion(fs) > 1) { 205 for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) { 206 SET_BITMAP_FREE(fs, i); 207 ifp32->if_version = 1; 208 ifp32->if_daddr = LFS_UNUSED_DADDR; 209 ifp32->if_nextfree = ++i; 210 } 211 ifp32--; 212 ifp32->if_nextfree = oldhead; 213 } else { 214 for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) { 215 SET_BITMAP_FREE(fs, i); 216 ifp_v1->if_version = 1; 217 ifp_v1->if_daddr = LFS_UNUSED_DADDR; 218 ifp_v1->if_nextfree = ++i; 219 } 220 ifp_v1--; 221 ifp_v1->if_nextfree = oldhead; 222 } 223 DLOG((DLOG_ALLOC, " now head=%jd tail=%jd\n", 224 (intmax_t)xmax - lfs_sb_getifpb(fs), (intmax_t)tail)); 225 226 /* 227 * Write out the new block. 228 */ 229 230 (void) LFS_BWRITE_LOG(bp); /* Ifile */ 231 232 return 0; 233 } 234 235 /* 236 * Allocate an inode for a new file. 237 * 238 * Takes the segment lock. Also (while holding it) takes lfs_lock 239 * to frob fs->lfs_fmod. 240 * 241 * XXX: the mode argument is unused; should just get rid of it. 242 */ 243 /* ARGSUSED */ 244 /* VOP_BWRITE 2i times */ 245 int 246 lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, 247 ino_t *ino, int *gen) 248 { 249 struct lfs *fs; 250 struct buf *bp, *cbp; 251 IFILE *ifp; 252 int error; 253 CLEANERINFO *cip; 254 255 fs = VTOI(pvp)->i_lfs; 256 if (fs->lfs_ronly) 257 return EROFS; 258 259 if (!(fs->lfs_flags & LFS_NOTYET)) 260 ASSERT_NO_SEGLOCK(fs); 261 262 DEBUG_CHECK_FREELIST(fs); 263 264 lfs_prelock(fs, 0); 265 266 /* Get the head of the freelist. */ 267 LFS_GET_HEADFREE(fs, cip, cbp, ino); 268 269 /* paranoia */ 270 KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM); 271 DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n", 272 *ino)); 273 274 /* Update the in-memory inode freemap */ 275 CLR_BITMAP_FREE(fs, *ino); 276 277 /* 278 * Fetch the ifile entry and make sure the inode is really 279 * free. 280 */ 281 LFS_IENTRY(ifp, fs, *ino, bp); 282 if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR) 283 panic("lfs_valloc: inuse inode %" PRId64 " on the free list", 284 *ino); 285 286 /* Update the inode freelist head in the superblock. */ 287 LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp)); 288 DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n", 289 *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp))); 290 291 /* 292 * Retrieve the version number from the ifile entry. It was 293 * bumped by vfree, so don't bump it again. 294 */ 295 *gen = lfs_if_getversion(fs, ifp); 296 297 /* Done with ifile entry */ 298 299 /* 300 * Note LFS_ILLEGAL_DADDR == LFS_UNUSED_DADDR 301 * unless DEBUG is set. In that case we 302 * use that address to track inodes that have been 303 * allocated (and so not on the free list) but do not 304 * have proper disk addresses yet. 305 */ 306 lfs_if_setdaddr(fs, ifp, LFS_ILLEGAL_DADDR); 307 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM); 308 LFS_WRITEIENTRY(ifp, fs, *ino, bp); 309 310 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) { 311 /* 312 * No more inodes; extend the ifile so that the next 313 * lfs_valloc will succeed. 314 */ 315 if ((error = lfs_extend_ifile(fs, cred)) != 0) { 316 /* restore the freelist */ 317 LFS_PUT_HEADFREE(fs, cip, cbp, *ino); 318 319 /* unlock and return */ 320 lfs_preunlock(fs); 321 return error; 322 } 323 } 324 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM), 325 "inode 0 allocated [3]"); 326 327 /* Set superblock modified bit */ 328 mutex_enter(&lfs_lock); 329 fs->lfs_fmod = 1; 330 mutex_exit(&lfs_lock); 331 332 /* increment file count */ 333 lfs_sb_addnfiles(fs, 1); 334 335 /* done */ 336 lfs_preunlock(fs); 337 338 DEBUG_CHECK_FREELIST(fs); 339 return 0; 340 } 341 342 /* 343 * Allocate an inode for a new file, with given inode number and 344 * version. 345 * 346 * Called in the same context as lfs_valloc and therefore shares the 347 * same locking assumptions. 348 */ 349 int 350 lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers) 351 { 352 IFILE *ifp; 353 struct buf *bp, *cbp; 354 ino_t headino, thisino, oldnext, tailino; 355 CLEANERINFO *cip; 356 int extended = 0; 357 358 if (fs->lfs_ronly) 359 return EROFS; 360 361 if (!(fs->lfs_flags & LFS_NOTYET)) 362 ASSERT_NO_SEGLOCK(fs); 363 364 DEBUG_CHECK_FREELIST(fs); 365 366 lfs_prelock(fs, 0); 367 368 /* 369 * If the ifile is too short to contain this inum, extend it. 370 * 371 * XXX: lfs_extend_ifile should take a size instead of always 372 * doing just one block at time. 373 */ 374 while (VTOI(fs->lfs_ivnode)->i_size <= (ino / 375 lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs)) 376 << lfs_sb_getbshift(fs)) { 377 DLOG((DLOG_ALLOC, "extend ifile to accommodate ino %jd\n", 378 (intmax_t)ino)); 379 lfs_extend_ifile(fs, NOCRED); 380 extended = 1; 381 } 382 383 /* 384 * Get the inode freelist next pointer. 385 */ 386 LFS_IENTRY(ifp, fs, ino, bp); 387 oldnext = lfs_if_getnextfree(fs, ifp); 388 brelse(bp, 0); 389 390 /* Get fail of inode freelist */ 391 LFS_GET_TAILFREE(fs, cip, cbp, &tailino); 392 393 /* Get head of inode freelist */ 394 LFS_GET_HEADFREE(fs, cip, cbp, &headino); 395 if (headino == ino) { 396 /* Easy case: the inode we wanted was at the head */ 397 LFS_PUT_HEADFREE(fs, cip, cbp, oldnext); 398 } else { 399 ino_t nextfree = 0, maxino, count; /* XXX: gcc */ 400 401 /* Have to find the desired inode in the freelist... */ 402 maxino = ((VTOI(fs->lfs_ivnode)->i_size >> lfs_sb_getbshift(fs)) 403 - lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) 404 * lfs_sb_getifpb(fs); 405 count = 0; 406 407 thisino = headino; 408 while (thisino != LFS_UNUSED_INUM) { 409 /* read this ifile entry */ 410 LFS_IENTRY(ifp, fs, thisino, bp); 411 nextfree = lfs_if_getnextfree(fs, ifp); 412 /* stop if we find it or we hit the end */ 413 if (nextfree == ino || 414 nextfree == LFS_UNUSED_INUM) 415 break; 416 /* nope, keep going... */ 417 thisino = nextfree; 418 brelse(bp, 0); 419 if (++count > maxino) 420 break; 421 } 422 if (count > maxino) { 423 panic("loop in free list"); 424 lfs_preunlock(fs); 425 return ENOENT; 426 } 427 if (nextfree == LFS_UNUSED_INUM) { 428 /* hit the end -- this inode is not available */ 429 brelse(bp, 0); 430 lfs_preunlock(fs); 431 if (extended) 432 panic("extended ifile to accommodate but inode not found"); 433 return ENOENT; 434 } 435 /* found it; update the next pointer */ 436 lfs_if_setnextfree(fs, ifp, oldnext); 437 /* write the ifile block */ 438 LFS_BWRITE_LOG(bp); 439 440 /* If our inode was the tail, thisino is now the tail */ 441 if (tailino == ino) 442 LFS_PUT_TAILFREE(fs, cip, cbp, thisino); 443 } 444 445 /* Clear nextfree, note daddr, and set generation number */ 446 LFS_IENTRY(ifp, fs, ino, bp); 447 lfs_if_setversion(fs, ifp, vers); 448 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM); 449 /* See comment in lfs_valloc */ 450 lfs_if_setdaddr(fs, ifp, LFS_ILLEGAL_DADDR); 451 LFS_WRITEIENTRY(ifp, fs, ino, bp); 452 453 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) { 454 int error; 455 /* 456 * No more inodes; extend the ifile so that the next 457 * lfs_valloc will succeed. 458 */ 459 if ((error = lfs_extend_ifile(fs, NOCRED)) != 0) { 460 /* restore the freelist */ 461 LFS_PUT_HEADFREE(fs, cip, cbp, ino); 462 463 /* unlock and return */ 464 lfs_preunlock(fs); 465 return error; 466 } 467 } 468 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM), 469 "inode 0 allocated [4]"); 470 471 /* done */ 472 lfs_preunlock(fs); 473 474 DEBUG_CHECK_FREELIST(fs); 475 476 return 0; 477 } 478 479 #if 0 480 /* 481 * Find the highest-numbered allocated inode. 482 * This will be used to shrink the Ifile. 483 */ 484 static inline ino_t 485 lfs_last_alloc_ino(struct lfs *fs) 486 { 487 ino_t ino, maxino; 488 489 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) - 490 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * 491 lfs_sb_getifpb(fs); 492 for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) { 493 if (ISSET_BITMAP_FREE(fs, ino) == 0) 494 break; 495 } 496 return ino; 497 } 498 499 /* 500 * Find the previous (next lowest numbered) free inode, if any. 501 * If there is none, return LFS_UNUSED_INUM. 502 * 503 * XXX: locking? 504 */ 505 static inline ino_t 506 lfs_freelist_prev(struct lfs *fs, ino_t ino) 507 { 508 ino_t tino, bound, bb, freehdbb; 509 510 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) { 511 /* No free inodes at all */ 512 return LFS_UNUSED_INUM; 513 } 514 515 /* Search our own word first */ 516 bound = ino & ~BMMASK; 517 for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--) 518 if (ISSET_BITMAP_FREE(fs, tino)) 519 return tino; 520 /* If there are no lower words to search, just return */ 521 if (ino >> BMSHIFT == 0) 522 return LFS_UNUSED_INUM; 523 524 /* 525 * Find a word with a free inode in it. We have to be a bit 526 * careful here since ino_t is unsigned. 527 */ 528 freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT); 529 for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb) 530 if (fs->lfs_ino_bitmap[bb]) 531 break; 532 if (fs->lfs_ino_bitmap[bb] == 0) 533 return LFS_UNUSED_INUM; 534 535 /* Search the word we found */ 536 for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) && 537 tino > LFS_UNUSED_INUM; tino--) 538 if (ISSET_BITMAP_FREE(fs, tino)) 539 break; 540 541 /* Avoid returning reserved inode numbers */ 542 if (tino <= LFS_IFILE_INUM) 543 tino = LFS_UNUSED_INUM; 544 545 return tino; 546 } 547 #endif 548 549 /* 550 * Free an inode. 551 * 552 * Takes lfs_prelock. Also (independently) takes vp->v_interlock. 553 */ 554 /* ARGUSED */ 555 /* VOP_BWRITE 2i times */ 556 int 557 lfs_vfree(struct vnode *vp, ino_t ino, int mode) 558 { 559 CLEANERINFO *cip; 560 struct buf *cbp, *bp; 561 IFILE *ifp; 562 struct inode *ip; 563 struct lfs *fs; 564 struct segdelta *isd, *fsd, *tmp; 565 566 /* Get the inode number and file system. */ 567 ip = VTOI(vp); 568 fs = ip->i_lfs; 569 ino = ip->i_number; 570 571 ASSERT_NO_SEGLOCK(fs); 572 KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed"); 573 KASSERT(!fs->lfs_ronly); 574 575 DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino)); 576 577 /* Drain of pending writes */ 578 mutex_enter(vp->v_interlock); 579 while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) { 580 cv_wait(&vp->v_cv, vp->v_interlock); 581 } 582 mutex_exit(vp->v_interlock); 583 584 lfs_prelock(fs, 0); 585 586 DEBUG_CHECK_FREELIST(fs); 587 588 /* 589 * If the inode was in a dirop, it isn't now. 590 * 591 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP) 592 * not updated together in one function? (and why do both exist, 593 * anyway?) 594 */ 595 UNMARK_VNODE(vp); 596 597 mutex_enter(&lfs_lock); 598 if (vp->v_uflag & VU_DIROP) { 599 vp->v_uflag &= ~VU_DIROP; 600 --lfs_dirvcount; 601 --fs->lfs_dirvcount; 602 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 603 wakeup(&fs->lfs_dirvcount); 604 wakeup(&lfs_dirvcount); 605 mutex_exit(&lfs_lock); 606 vrele(vp); 607 608 /* 609 * If this inode is not going to be written any more, any 610 * segment accounting left over from its truncation needs 611 * to occur at the end of the next dirops flush. Move 612 * it to the fs-wide list for that purpose. 613 */ 614 RB_TREE_FOREACH_SAFE(isd, &ip->i_lfs_segdhd, tmp) { 615 rb_tree_remove_node(&ip->i_lfs_segdhd, isd); 616 /* Insert unless node exists */ 617 fsd = rb_tree_insert_node(&fs->lfs_segdhd, isd); 618 if (fsd != isd) { 619 /* Merge into existing */ 620 fsd->num += isd->num; 621 free(isd, M_SEGMENT); 622 } 623 } 624 } else { 625 /* 626 * If it's not a dirop, we can finalize right away. 627 */ 628 mutex_exit(&lfs_lock); 629 lfs_finalize_ino_seguse(fs, ip); 630 } 631 632 /* it is no longer an unwritten inode, so update the counts */ 633 KASSERT(!(ip->i_state & IN_CLEANING)); 634 mutex_enter(&lfs_lock); 635 LFS_CLR_UINO(ip, IN_ACCESSED|IN_MODIFIED); 636 mutex_exit(&lfs_lock); 637 638 /* Turn off all inode modification flags */ 639 ip->i_state &= ~IN_ALLMOD; 640 641 /* Mark it deleted */ 642 ip->i_lfs_iflags |= LFSI_DELETED; 643 644 /* Mark it free in the in-memory inode freemap */ 645 SET_BITMAP_FREE(fs, ino); 646 647 /* 648 * Set the ifile's inode entry to unused, increment its version number 649 * and link it onto the free chain. 650 */ 651 652 /* update the on-disk address (to "nowhere") */ 653 lfs_update_iaddr(fs, ip, LFS_UNUSED_DADDR); 654 655 /* fetch the ifile entry */ 656 LFS_IENTRY(ifp, fs, ino, bp); 657 658 /* bump the version */ 659 lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1); 660 661 #if 0 662 if (lfs_sb_getversion(fs) == 1) { 663 #endif 664 ino_t nextfree; 665 666 /* insert on freelist */ 667 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree); 668 lfs_if_setnextfree(fs, ifp, nextfree); 669 LFS_PUT_HEADFREE(fs, cip, cbp, ino); 670 671 /* write the ifile block */ 672 LFS_WRITEIENTRY(ifp, fs, ino, bp); 673 #if 0 674 } else { 675 ino_t tino, onf, otail; 676 677 /* 678 * Clear the freelist next pointer and write the ifile 679 * block. XXX: why? I'm sure there must be a reason but 680 * it seems both silly and dangerous. 681 */ 682 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM); 683 LFS_WRITEIENTRY(ifp, fs, ino, bp); 684 685 /* 686 * Insert on freelist in order. 687 */ 688 689 /* Find the next lower (by number) free inode */ 690 tino = lfs_freelist_prev(fs, ino); 691 692 if (tino == LFS_UNUSED_INUM) { 693 ino_t nextfree; 694 695 /* 696 * There isn't one; put us on the freelist head. 697 */ 698 699 /* reload the ifile block */ 700 LFS_IENTRY(ifp, fs, ino, bp); 701 /* update the list */ 702 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree); 703 lfs_if_setnextfree(fs, ifp, nextfree); 704 LFS_PUT_HEADFREE(fs, cip, cbp, ino); 705 DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n", 706 (long long)nextfree, (long long)ino)); 707 /* write the ifile block */ 708 LFS_WRITEIENTRY(ifp, fs, ino, bp); 709 710 /* If the list was empty, set tail too */ 711 LFS_GET_TAILFREE(fs, cip, cbp, &otail); 712 if (otail == LFS_UNUSED_INUM) { 713 LFS_PUT_TAILFREE(fs, cip, cbp, ino); 714 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld " 715 "-> %lld\n", (long long)otail, 716 (long long)ino)); 717 } 718 } else { 719 /* 720 * Insert this inode into the list after tino. 721 * We hold the segment lock so we don't have to 722 * worry about blocks being written out of order. 723 */ 724 725 DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld " 726 " after %lld\n", ino, tino)); 727 728 /* load the previous inode's ifile block */ 729 LFS_IENTRY(ifp, fs, tino, bp); 730 /* update the list pointer */ 731 onf = lfs_if_getnextfree(fs, ifp); 732 lfs_if_setnextfree(fs, ifp, ino); 733 /* write the block */ 734 LFS_WRITEIENTRY(ifp, fs, tino, bp); 735 736 /* load this inode's ifile block */ 737 LFS_IENTRY(ifp, fs, ino, bp); 738 /* update the list pointer */ 739 lfs_if_setnextfree(fs, ifp, onf); 740 /* write the block */ 741 LFS_WRITEIENTRY(ifp, fs, tino, bp); 742 743 /* If we're last, put us on the tail */ 744 if (onf == LFS_UNUSED_INUM) { 745 LFS_GET_TAILFREE(fs, cip, cbp, &otail); 746 LFS_PUT_TAILFREE(fs, cip, cbp, ino); 747 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld " 748 "-> %lld\n", (long long)otail, 749 (long long)ino)); 750 } 751 } 752 } 753 #endif 754 755 /* Set superblock modified bit. */ 756 mutex_enter(&lfs_lock); 757 fs->lfs_fmod = 1; 758 mutex_exit(&lfs_lock); 759 760 /* Decrement file count. */ 761 lfs_sb_subnfiles(fs, 1); 762 763 lfs_preunlock(fs); 764 765 DEBUG_CHECK_FREELIST(fs); 766 767 return (0); 768 } 769 770 /* 771 * Sort the freelist and set up the free-inode bitmap. 772 * To be called by lfs_mountfs(). 773 * 774 * Takes the segmenet lock. 775 */ 776 void 777 lfs_order_freelist(struct lfs *fs, ino_t **orphanp, size_t *norphanp) 778 { 779 CLEANERINFO *cip; 780 IFILE *ifp = NULL; 781 struct buf *bp; 782 ino_t ino, firstino, lastino, maxino; 783 ino_t *orphan = NULL; 784 size_t norphan = 0; 785 size_t norphan_alloc = 0; 786 787 ASSERT_NO_SEGLOCK(fs); 788 lfs_prelock(fs, 0); 789 790 DEBUG_CHECK_FREELIST(fs); 791 792 /* largest inode on fs */ 793 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) - 794 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs); 795 796 /* allocate the in-memory inode freemap */ 797 /* XXX: assert that fs->lfs_ino_bitmap is null here */ 798 fs->lfs_ino_bitmap = 799 malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t), 800 M_SEGMENT, M_WAITOK | M_ZERO); 801 KASSERT(fs->lfs_ino_bitmap != NULL); 802 803 /* 804 * Scan the ifile. 805 */ 806 807 firstino = lastino = LFS_UNUSED_INUM; 808 for (ino = 0; ino < maxino; ino++) { 809 /* Load this inode's ifile entry. */ 810 if (ino % lfs_sb_getifpb(fs) == 0) 811 LFS_IENTRY(ifp, fs, ino, bp); 812 else 813 LFS_IENTRY_NEXT(ifp, fs); 814 815 /* Don't put zero or ifile on the free list */ 816 if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM) 817 continue; 818 819 /* 820 * Address orphaned files. 821 * 822 * The idea of this is to free inodes belonging to 823 * files that were unlinked but not reclaimed, I guess 824 * because if we're going to scan the whole ifile 825 * anyway it costs very little to do this. I don't 826 * immediately see any reason this should be disabled, 827 * but presumably it doesn't work... not sure what 828 * happens to such files currently. -- dholland 20160806 829 */ 830 if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) { 831 if (orphan == NULL) { 832 norphan_alloc = 32; /* XXX pulled from arse */ 833 orphan = kmem_zalloc(sizeof(orphan[0]) * 834 norphan_alloc, KM_SLEEP); 835 } else if (norphan == norphan_alloc) { 836 ino_t *orphan_new; 837 if (norphan_alloc >= 4096) 838 norphan_alloc += 4096; 839 else 840 norphan_alloc *= 2; 841 orphan_new = kmem_zalloc(sizeof(orphan[0]) * 842 norphan_alloc, KM_SLEEP); 843 memcpy(orphan_new, orphan, sizeof(orphan[0]) * 844 norphan); 845 kmem_free(orphan, sizeof(orphan[0]) * norphan); 846 orphan = orphan_new; 847 } 848 orphan[norphan++] = ino; 849 } 850 851 if (DADDR_IS_BAD(lfs_if_getdaddr(fs, ifp))) { 852 853 /* 854 * This inode is free. Put it on the free list. 855 */ 856 857 if (firstino == LFS_UNUSED_INUM) { 858 /* XXX: assert lastino == LFS_UNUSED_INUM? */ 859 /* remember the first free inode */ 860 firstino = ino; 861 } else { 862 /* release this inode's ifile entry */ 863 brelse(bp, 0); 864 865 /* XXX: assert lastino != LFS_UNUSED_INUM? */ 866 867 /* load lastino's ifile entry */ 868 LFS_IENTRY(ifp, fs, lastino, bp); 869 /* set the list pointer */ 870 lfs_if_setnextfree(fs, ifp, ino); 871 /* write the block */ 872 LFS_WRITEIENTRY(ifp, fs, lastino, bp); 873 874 /* reload this inode's ifile entry */ 875 LFS_IENTRY(ifp, fs, ino, bp); 876 } 877 /* remember the last free inode seen so far */ 878 lastino = ino; 879 880 /* Mark this inode free in the in-memory freemap */ 881 SET_BITMAP_FREE(fs, ino); 882 } 883 884 /* If moving to the next ifile block, release the buffer. */ 885 if ((ino + 1) % lfs_sb_getifpb(fs) == 0) 886 brelse(bp, 0); 887 } 888 889 /* Write the freelist head and tail pointers */ 890 /* XXX: do we need to mark the superblock dirty? */ 891 LFS_PUT_HEADFREE(fs, cip, bp, firstino); 892 LFS_PUT_TAILFREE(fs, cip, bp, lastino); 893 894 /* done */ 895 lfs_preunlock(fs); 896 897 /* 898 * Shrink the array of orphans so we don't have to carry around 899 * the allocation size. 900 */ 901 if (norphan < norphan_alloc) { 902 ino_t *orphan_new = kmem_alloc(sizeof(orphan[0]) * norphan, 903 KM_SLEEP); 904 memcpy(orphan_new, orphan, sizeof(orphan[0]) * norphan); 905 kmem_free(orphan, sizeof(orphan[0]) * norphan_alloc); 906 orphan = orphan_new; 907 norphan_alloc = norphan; 908 } 909 910 *orphanp = orphan; 911 *norphanp = norphan; 912 913 DEBUG_CHECK_FREELIST(fs); 914 } 915 916 /* 917 * Handle files deleted from the file system namespace. 918 * 919 * When inodes are reclaimed, they are added back to the free list as 920 * usual; but if the system crashes before they can be reclaimed, they 921 * will need to be reclaimed at next mount. We therefore set their 922 * nextfree field to the magic value LFS_ORPHAN_NEXTFREE so we can 923 * identify them. 924 * 925 * 926 * The caller holds a reference to vp, and the buffer cache provides 927 * exclusive access to the "nextfree" entry. 928 */ 929 void 930 lfs_orphan(struct lfs *fs, struct vnode *vp) 931 { 932 IFILE *ifp; 933 struct buf *bp; 934 struct inode *ip; 935 ino_t nextfree; 936 int mincount; 937 938 ip = VTOI(vp); 939 940 ASSERT_NO_SEGLOCK(fs); 941 KASSERT(ip->i_nlink == 0); 942 943 /* 944 * Check reference count. 945 * 946 * Even if the file is not still referenced, it holds a 947 * reference associated with VU_DIROP. This creates an 948 * opportunity for fhopen() to re-open the file, which is 949 * illegal. Therefore we count the number of references that 950 * would come from VDIROP and IN_CLEANING, and compare that 951 * against the vnode ref count. If the usecount can be 952 * accounted for by VDIROP and IN_CLEANING, mark the node 953 * IN_DEAD. 954 */ 955 mincount = 1; /* The caller holds one */ 956 mutex_enter(&lfs_lock); 957 if (ip->i_state & IN_CLEANING) 958 ++mincount; 959 if (vp->v_uflag & VU_DIROP) 960 ++mincount; 961 mutex_exit(&lfs_lock); 962 mutex_enter(vp->v_interlock); 963 if (vp->v_usecount <= mincount) 964 ip->i_state |= IN_DEAD; 965 mutex_exit(vp->v_interlock); 966 967 /* If not already done, mark this inode orphaned. */ 968 lfs_fraglock_enter(fs, RW_READER); 969 LFS_IENTRY(ifp, fs, ip->i_number, bp); 970 nextfree = lfs_if_getnextfree(fs, ifp); 971 if (nextfree == LFS_ORPHAN_NEXTFREE(fs)) { 972 brelse(bp, 0); 973 lfs_fraglock_exit(fs); 974 return; 975 } 976 KASSERT(nextfree == LFS_UNUSED_INUM); 977 lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE(fs)); 978 LFS_WRITEIENTRY(ifp, fs, ip->i_number, bp); 979 lfs_fraglock_exit(fs); 980 } 981 982 /* 983 * Free orphans discovered during mount using vget/vput. 984 * Ideally this would be merged with lfs_order_freelist but 985 * the free list is not available when lfs_order_freelist is running. 986 */ 987 void 988 lfs_free_orphans(struct lfs *fs, ino_t *orphan, size_t norphan) 989 { 990 struct vnode *vp; 991 size_t i; 992 int error; 993 994 ASSERT_NO_SEGLOCK(fs); 995 DEBUG_CHECK_FREELIST(fs); 996 997 for (i = 0; i < norphan; i++) { 998 error = VFS_VGET(fs->lfs_ivnode->v_mount, orphan[i], 999 LK_EXCLUSIVE, &vp); 1000 if (error) { 1001 printf("lfs_free_orphan vget ino %jd error %d\n", 1002 (intmax_t)orphan[i], error); 1003 continue; 1004 } 1005 vput(vp); 1006 } 1007 1008 if (orphan) 1009 kmem_free(orphan, sizeof(orphan[0]) * norphan); 1010 1011 DEBUG_CHECK_FREELIST(fs); 1012 } 1013 1014 #ifdef DEBUG 1015 static void dump_freelist(struct lfs *); 1016 1017 void 1018 lfs_check_freelist(struct lfs *fs, const char *func, int line) 1019 { 1020 ino_t i, headino, maxino, thisino, tailino, nextfree; 1021 int nfree, count; 1022 struct inode *ip; 1023 IFILE *ifp; 1024 CLEANERINFO *cip; 1025 struct buf *bp; 1026 1027 if (!lfs_do_check_freelist) 1028 return; 1029 1030 lfs_prelock(fs, 0); 1031 1032 ip = VTOI(fs->lfs_ivnode); 1033 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) - 1034 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs); 1035 1036 /* 1037 * First, check every node. Every inode that doesn't have a disk 1038 * address must have a nextfree pointer, the only exception 1039 * being the tail of the free list. 1040 */ 1041 LFS_GET_TAILFREE(fs, cip, bp, &tailino); 1042 nfree = 0; 1043 for (i = LFS_IFILE_INUM + 1; i < maxino; ++i) { 1044 LFS_IENTRY(ifp, fs, i, bp); 1045 if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) { 1046 ++nfree; 1047 if (lfs_if_getnextfree(fs, ifp) == LFS_UNUSED_INUM 1048 && i != tailino) { 1049 brelse(bp, 0); 1050 dump_freelist(fs); 1051 printf("At %s:%d:\n", func, line); 1052 printf("tailino=%jd, but ino=%jd" 1053 " neither daddr nor nextfree\n", 1054 (intmax_t)tailino, (intmax_t)i); 1055 panic("Free list leak\n"); 1056 } 1057 } 1058 if (i == tailino 1059 || (!INUM_IS_BAD(fs, lfs_if_getnextfree(fs, ifp)))) { 1060 if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR) { 1061 brelse(bp, 0); 1062 dump_freelist(fs); 1063 printf("At %s:%d:\n", func, line); 1064 printf("with tailino=%jd, ino=%jd" 1065 " daddr=0x%jx, nextfree=0x%jx\n", 1066 (intmax_t)tailino, 1067 (intmax_t)i, 1068 (intmax_t)lfs_if_getdaddr(fs, ifp), 1069 (intmax_t)lfs_if_getnextfree(fs, ifp)); 1070 panic("In use inode on free list\n"); 1071 } 1072 } 1073 brelse(bp, 0); 1074 } 1075 1076 /* 1077 * Walk the free list from head to tail. We should end up with 1078 * the same number of free inodes as we counted above. 1079 */ 1080 1081 /* Get head of inode freelist */ 1082 LFS_GET_HEADFREE(fs, cip, bp, &headino); 1083 count = 0; 1084 thisino = headino; 1085 while (thisino != LFS_UNUSED_INUM) { 1086 if (++count > maxino) 1087 break; 1088 /* read this ifile entry */ 1089 LFS_IENTRY(ifp, fs, thisino, bp); 1090 nextfree = lfs_if_getnextfree(fs, ifp); 1091 brelse(bp, 0); 1092 if (nextfree == LFS_UNUSED_INUM) 1093 break; 1094 thisino = nextfree; 1095 } 1096 if (count > maxino) { 1097 dump_freelist(fs); 1098 printf("At %s:%d:\n", func, line); 1099 printf("count=%jd, maxino=%jd:\n", 1100 (intmax_t)count, (intmax_t)maxino); 1101 panic("loop in free list"); 1102 } 1103 if (count != nfree) { 1104 dump_freelist(fs); 1105 printf("At %s:%d:\n", func, line); 1106 printf("%d inodes without addresses, %d on free list\n", 1107 nfree, count); 1108 panic("Bad free list count"); 1109 } 1110 if (thisino != tailino) { 1111 dump_freelist(fs); 1112 printf("At %s:%d:\n", func, line); 1113 printf("Last ino %jd but tail %jd\n", 1114 (intmax_t)thisino, (intmax_t)tailino); 1115 panic("Bad tail"); 1116 } 1117 lfs_preunlock(fs); 1118 } 1119 1120 static void 1121 dump_freelist(struct lfs *fs) 1122 { 1123 ino_t i, ni, maxino, headino, tailino; 1124 struct inode *ip; 1125 IFILE *ifp; 1126 CLEANERINFO *cip; 1127 struct buf *bp; 1128 int count; 1129 1130 ip = VTOI(fs->lfs_ivnode); 1131 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) - 1132 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs); 1133 1134 LFS_GET_HEADFREE(fs, cip, bp, &headino); 1135 printf(" head: %jd\n", (intmax_t)headino); 1136 LFS_GET_TAILFREE(fs, cip, bp, &tailino); 1137 printf(" tail: %jd\n", (intmax_t)tailino); 1138 count = 0; 1139 for (i = LFS_IFILE_INUM + 1; i < maxino; ++i) { 1140 LFS_IENTRY(ifp, fs, i, bp); 1141 ni = lfs_if_getnextfree(fs, ifp); 1142 if (ni != LFS_UNUSED_DADDR) { 1143 printf("%jd -> %jd\n", 1144 (intmax_t)i, (intmax_t)ni); 1145 if (++count > 30) { 1146 printf("...\n"); 1147 i = maxino; /* terminate loop */ 1148 } 1149 } 1150 brelse(bp, 0); 1151 } 1152 } 1153 #endif /* DEBUG */ 1154