Home | History | Annotate | Line # | Download | only in lfs
lfs_alloc.c revision 1.139
      1 /*	$NetBSD: lfs_alloc.c,v 1.139 2020/02/22 00:32:08 kamil Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 /*
     32  * Copyright (c) 1991, 1993
     33  *	The Regents of the University of California.  All rights reserved.
     34  *
     35  * Redistribution and use in source and binary forms, with or without
     36  * modification, are permitted provided that the following conditions
     37  * are met:
     38  * 1. Redistributions of source code must retain the above copyright
     39  *    notice, this list of conditions and the following disclaimer.
     40  * 2. Redistributions in binary form must reproduce the above copyright
     41  *    notice, this list of conditions and the following disclaimer in the
     42  *    documentation and/or other materials provided with the distribution.
     43  * 3. Neither the name of the University nor the names of its contributors
     44  *    may be used to endorse or promote products derived from this software
     45  *    without specific prior written permission.
     46  *
     47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     57  * SUCH DAMAGE.
     58  *
     59  *	@(#)lfs_alloc.c	8.4 (Berkeley) 1/4/94
     60  */
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.139 2020/02/22 00:32:08 kamil Exp $");
     64 
     65 #if defined(_KERNEL_OPT)
     66 #include "opt_quota.h"
     67 #endif
     68 
     69 #include <sys/param.h>
     70 #include <sys/systm.h>
     71 #include <sys/kernel.h>
     72 #include <sys/buf.h>
     73 #include <sys/lock.h>
     74 #include <sys/vnode.h>
     75 #include <sys/syslog.h>
     76 #include <sys/mount.h>
     77 #include <sys/malloc.h>
     78 #include <sys/pool.h>
     79 #include <sys/proc.h>
     80 #include <sys/kauth.h>
     81 
     82 #include <ufs/lfs/ulfs_quotacommon.h>
     83 #include <ufs/lfs/ulfs_inode.h>
     84 #include <ufs/lfs/ulfsmount.h>
     85 #include <ufs/lfs/ulfs_extern.h>
     86 
     87 #include <ufs/lfs/lfs.h>
     88 #include <ufs/lfs/lfs_accessors.h>
     89 #include <ufs/lfs/lfs_extern.h>
     90 #include <ufs/lfs/lfs_kernel.h>
     91 
     92 /* Constants for inode free bitmap */
     93 #define BMSHIFT 5	/* 2 ** 5 = 32 */
     94 #define BMMASK  ((1 << BMSHIFT) - 1)
     95 #define SET_BITMAP_FREE(F, I) do { \
     96 	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), 	\
     97 	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
     98 	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1U << ((I) & BMMASK));	\
     99 } while (0)
    100 #define CLR_BITMAP_FREE(F, I) do { \
    101 	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), 	\
    102 	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
    103 	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1U << ((I) & BMMASK));	\
    104 } while(0)
    105 
    106 #define ISSET_BITMAP_FREE(F, I) \
    107 	((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1U << ((I) & BMMASK)))
    108 
    109 /*
    110  * Add a new block to the Ifile, to accommodate future file creations.
    111  * Called with the segment lock held.
    112  */
    113 int
    114 lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
    115 {
    116 	struct vnode *vp;
    117 	struct inode *ip;
    118 	IFILE64 *ifp64;
    119 	IFILE32 *ifp32;
    120 	IFILE_V1 *ifp_v1;
    121 	struct buf *bp, *cbp;
    122 	int error;
    123 	daddr_t i, blkno, xmax;
    124 	ino_t oldlast, maxino;
    125 	CLEANERINFO *cip;
    126 
    127 	ASSERT_SEGLOCK(fs);
    128 
    129 	/* XXX should check or assert that we aren't readonly. */
    130 
    131 	/*
    132 	 * Get a block and extend the ifile inode. Leave the buffer for
    133 	 * the block in bp.
    134 	 */
    135 
    136 	vp = fs->lfs_ivnode;
    137 	ip = VTOI(vp);
    138 	blkno = lfs_lblkno(fs, ip->i_size);
    139 	if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
    140 				&bp)) != 0) {
    141 		return (error);
    142 	}
    143 	ip->i_size += lfs_sb_getbsize(fs);
    144 	lfs_dino_setsize(fs, ip->i_din, ip->i_size);
    145 	uvm_vnp_setsize(vp, ip->i_size);
    146 
    147 	/*
    148 	 * Compute the new number of inodes, and reallocate the in-memory
    149 	 * inode freemap.
    150 	 */
    151 
    152 	maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
    153 		  lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
    154 	fs->lfs_ino_bitmap = (lfs_bm_t *)
    155 		realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
    156 			sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
    157 	KASSERT(fs->lfs_ino_bitmap != NULL);
    158 
    159 	/* first new inode number */
    160 	i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
    161 		lfs_sb_getifpb(fs);
    162 
    163 	/*
    164 	 * We insert the new inodes at the head of the free list.
    165 	 * Under normal circumstances, the free list is empty here,
    166 	 * so we are also incidentally placing them at the end (which
    167 	 * we must do if we are to keep them in order).
    168 	 */
    169 	LFS_GET_HEADFREE(fs, cip, cbp, &oldlast);
    170 	LFS_PUT_HEADFREE(fs, cip, cbp, i);
    171 	KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
    172 	    "inode 0 allocated [2]");
    173 
    174 	/* inode number to stop at (XXX: why *x*max?) */
    175 	xmax = i + lfs_sb_getifpb(fs);
    176 
    177 	/*
    178 	 * Initialize the ifile block.
    179 	 *
    180 	 * XXX: these loops should be restructured to use the accessor
    181 	 * functions instead of using cutpaste polymorphism.
    182 	 */
    183 
    184 	if (fs->lfs_is64) {
    185 		for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
    186 			SET_BITMAP_FREE(fs, i);
    187 			ifp64->if_version = 1;
    188 			ifp64->if_daddr = LFS_UNUSED_DADDR;
    189 			ifp64->if_nextfree = ++i;
    190 		}
    191 		ifp64--;
    192 		ifp64->if_nextfree = oldlast;
    193 	} else if (lfs_sb_getversion(fs) > 1) {
    194 		for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
    195 			SET_BITMAP_FREE(fs, i);
    196 			ifp32->if_version = 1;
    197 			ifp32->if_daddr = LFS_UNUSED_DADDR;
    198 			ifp32->if_nextfree = ++i;
    199 		}
    200 		ifp32--;
    201 		ifp32->if_nextfree = oldlast;
    202 	} else {
    203 		for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
    204 			SET_BITMAP_FREE(fs, i);
    205 			ifp_v1->if_version = 1;
    206 			ifp_v1->if_daddr = LFS_UNUSED_DADDR;
    207 			ifp_v1->if_nextfree = ++i;
    208 		}
    209 		ifp_v1--;
    210 		ifp_v1->if_nextfree = oldlast;
    211 	}
    212 	LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1);
    213 
    214 	/*
    215 	 * Write out the new block.
    216 	 */
    217 
    218 	(void) LFS_BWRITE_LOG(bp); /* Ifile */
    219 
    220 	return 0;
    221 }
    222 
    223 /*
    224  * Allocate an inode for a new file.
    225  *
    226  * Takes the segment lock. Also (while holding it) takes lfs_lock
    227  * to frob fs->lfs_fmod.
    228  *
    229  * XXX: the mode argument is unused; should just get rid of it.
    230  */
    231 /* ARGSUSED */
    232 /* VOP_BWRITE 2i times */
    233 int
    234 lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
    235     ino_t *ino, int *gen)
    236 {
    237 	struct lfs *fs;
    238 	struct buf *bp, *cbp;
    239 	IFILE *ifp;
    240 	int error;
    241 	CLEANERINFO *cip;
    242 
    243 	fs = VTOI(pvp)->i_lfs;
    244 	if (fs->lfs_ronly)
    245 		return EROFS;
    246 
    247 	ASSERT_NO_SEGLOCK(fs);
    248 
    249 	lfs_seglock(fs, SEGM_PROT);
    250 
    251 	/* Get the head of the freelist. */
    252 	LFS_GET_HEADFREE(fs, cip, cbp, ino);
    253 
    254 	/* paranoia */
    255 	KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
    256 	DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
    257 	     *ino));
    258 
    259 	/* Update the in-memory inode freemap */
    260 	CLR_BITMAP_FREE(fs, *ino);
    261 
    262 	/*
    263 	 * Fetch the ifile entry and make sure the inode is really
    264 	 * free.
    265 	 */
    266 	LFS_IENTRY(ifp, fs, *ino, bp);
    267 	if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
    268 		panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
    269 		    *ino);
    270 
    271 	/* Update the inode freelist head in the superblock. */
    272 	LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
    273 	DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
    274 	     *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
    275 
    276 	/*
    277 	 * Retrieve the version number from the ifile entry. It was
    278 	 * bumped by vfree, so don't bump it again.
    279 	 */
    280 	*gen = lfs_if_getversion(fs, ifp);
    281 
    282 	/* Done with ifile entry */
    283 	brelse(bp, 0);
    284 
    285 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
    286 		/*
    287 		 * No more inodes; extend the ifile so that the next
    288 		 * lfs_valloc will succeed.
    289 		 */
    290 		if ((error = lfs_extend_ifile(fs, cred)) != 0) {
    291 			/* restore the freelist */
    292 			LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
    293 
    294 			/* unlock and return */
    295 			lfs_segunlock(fs);
    296 			return error;
    297 		}
    298 	}
    299 	KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
    300 	    "inode 0 allocated [3]");
    301 
    302 	/* Set superblock modified bit */
    303 	mutex_enter(&lfs_lock);
    304 	fs->lfs_fmod = 1;
    305 	mutex_exit(&lfs_lock);
    306 
    307 	/* increment file count */
    308 	lfs_sb_addnfiles(fs, 1);
    309 
    310 	/* done */
    311 	lfs_segunlock(fs);
    312 	return 0;
    313 }
    314 
    315 /*
    316  * Allocate an inode for a new file, with given inode number and
    317  * version.
    318  *
    319  * Called in the same context as lfs_valloc and therefore shares the
    320  * same locking assumptions.
    321  */
    322 int
    323 lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
    324 {
    325 	IFILE *ifp;
    326 	struct buf *bp, *cbp;
    327 	ino_t headino, thisino, oldnext;
    328 	CLEANERINFO *cip;
    329 
    330 	if (fs->lfs_ronly)
    331 		return EROFS;
    332 
    333 	ASSERT_NO_SEGLOCK(fs);
    334 
    335 	lfs_seglock(fs, SEGM_PROT);
    336 
    337 	/*
    338 	 * If the ifile is too short to contain this inum, extend it.
    339 	 *
    340 	 * XXX: lfs_extend_ifile should take a size instead of always
    341 	 * doing just one block at time.
    342 	 */
    343 	while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
    344 		lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
    345 		<< lfs_sb_getbshift(fs)) {
    346 		lfs_extend_ifile(fs, NOCRED);
    347 	}
    348 
    349 	/*
    350 	 * fetch the ifile entry; get the inode freelist next pointer,
    351 	 * and set the version as directed.
    352 	 */
    353 	LFS_IENTRY(ifp, fs, ino, bp);
    354 	oldnext = lfs_if_getnextfree(fs, ifp);
    355 	lfs_if_setversion(fs, ifp, vers);
    356 	brelse(bp, 0);
    357 
    358 	/* Get head of inode freelist */
    359 	LFS_GET_HEADFREE(fs, cip, cbp, &headino);
    360 	if (headino == ino) {
    361 		/* Easy case: the inode we wanted was at the head */
    362 		LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
    363 	} else {
    364 		ino_t nextfree;
    365 
    366 		/* Have to find the desired inode in the freelist... */
    367 
    368 		thisino = headino;
    369 		while (1) {
    370 			/* read this ifile entry */
    371 			LFS_IENTRY(ifp, fs, thisino, bp);
    372 			nextfree = lfs_if_getnextfree(fs, ifp);
    373 			/* stop if we find it or we hit the end */
    374 			if (nextfree == ino ||
    375 			    nextfree == LFS_UNUSED_INUM)
    376 				break;
    377 			/* nope, keep going... */
    378 			thisino = nextfree;
    379 			brelse(bp, 0);
    380 		}
    381 		if (nextfree == LFS_UNUSED_INUM) {
    382 			/* hit the end -- this inode is not available */
    383 			brelse(bp, 0);
    384 			lfs_segunlock(fs);
    385 			return ENOENT;
    386 		}
    387 		/* found it; update the next pointer */
    388 		lfs_if_setnextfree(fs, ifp, oldnext);
    389 		/* write the ifile block */
    390 		LFS_BWRITE_LOG(bp);
    391 	}
    392 
    393 	/* done */
    394 	lfs_segunlock(fs);
    395 	return 0;
    396 }
    397 
    398 #if 0
    399 /*
    400  * Find the highest-numbered allocated inode.
    401  * This will be used to shrink the Ifile.
    402  */
    403 static inline ino_t
    404 lfs_last_alloc_ino(struct lfs *fs)
    405 {
    406 	ino_t ino, maxino;
    407 
    408 	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
    409 		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
    410 		lfs_sb_getifpb(fs);
    411 	for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
    412 		if (ISSET_BITMAP_FREE(fs, ino) == 0)
    413 			break;
    414 	}
    415 	return ino;
    416 }
    417 #endif
    418 
    419 /*
    420  * Find the previous (next lowest numbered) free inode, if any.
    421  * If there is none, return LFS_UNUSED_INUM.
    422  *
    423  * XXX: locking?
    424  */
    425 static inline ino_t
    426 lfs_freelist_prev(struct lfs *fs, ino_t ino)
    427 {
    428 	ino_t tino, bound, bb, freehdbb;
    429 
    430 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
    431 		/* No free inodes at all */
    432 		return LFS_UNUSED_INUM;
    433 	}
    434 
    435 	/* Search our own word first */
    436 	bound = ino & ~BMMASK;
    437 	for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
    438 		if (ISSET_BITMAP_FREE(fs, tino))
    439 			return tino;
    440 	/* If there are no lower words to search, just return */
    441 	if (ino >> BMSHIFT == 0)
    442 		return LFS_UNUSED_INUM;
    443 
    444 	/*
    445 	 * Find a word with a free inode in it.  We have to be a bit
    446 	 * careful here since ino_t is unsigned.
    447 	 */
    448 	freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
    449 	for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
    450 		if (fs->lfs_ino_bitmap[bb])
    451 			break;
    452 	if (fs->lfs_ino_bitmap[bb] == 0)
    453 		return LFS_UNUSED_INUM;
    454 
    455 	/* Search the word we found */
    456 	for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
    457 	     tino > LFS_UNUSED_INUM; tino--)
    458 		if (ISSET_BITMAP_FREE(fs, tino))
    459 			break;
    460 
    461 	/* Avoid returning reserved inode numbers */
    462 	if (tino <= LFS_IFILE_INUM)
    463 		tino = LFS_UNUSED_INUM;
    464 
    465 	return tino;
    466 }
    467 
    468 /*
    469  * Free an inode.
    470  *
    471  * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
    472  */
    473 /* ARGUSED */
    474 /* VOP_BWRITE 2i times */
    475 int
    476 lfs_vfree(struct vnode *vp, ino_t ino, int mode)
    477 {
    478 	SEGUSE *sup;
    479 	CLEANERINFO *cip;
    480 	struct buf *cbp, *bp;
    481 	IFILE *ifp;
    482 	struct inode *ip;
    483 	struct lfs *fs;
    484 	daddr_t old_iaddr;
    485 	ino_t otail;
    486 
    487 	/* Get the inode number and file system. */
    488 	ip = VTOI(vp);
    489 	fs = ip->i_lfs;
    490 	ino = ip->i_number;
    491 
    492 	/* XXX: assert not readonly */
    493 
    494 	ASSERT_NO_SEGLOCK(fs);
    495 	DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
    496 
    497 	/* Drain of pending writes */
    498 	mutex_enter(vp->v_interlock);
    499 	while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
    500 		cv_wait(&vp->v_cv, vp->v_interlock);
    501 	}
    502 	mutex_exit(vp->v_interlock);
    503 
    504 	lfs_seglock(fs, SEGM_PROT);
    505 
    506 	/*
    507 	 * If the inode was in a dirop, it isn't now.
    508 	 *
    509 	 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP)
    510 	 * not updated together in one function? (and why do both exist,
    511 	 * anyway?)
    512 	 */
    513 	UNMARK_VNODE(vp);
    514 
    515 	mutex_enter(&lfs_lock);
    516 	if (vp->v_uflag & VU_DIROP) {
    517 		vp->v_uflag &= ~VU_DIROP;
    518 		--lfs_dirvcount;
    519 		--fs->lfs_dirvcount;
    520 		TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
    521 		wakeup(&fs->lfs_dirvcount);
    522 		wakeup(&lfs_dirvcount);
    523 		mutex_exit(&lfs_lock);
    524 		vrele(vp);
    525 
    526 		/*
    527 		 * If this inode is not going to be written any more, any
    528 		 * segment accounting left over from its truncation needs
    529 		 * to occur at the end of the next dirops flush.  Attach
    530 		 * them to the fs-wide list for that purpose.
    531 		 */
    532 		if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
    533 			struct segdelta *sd;
    534 
    535 			while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
    536 				LIST_REMOVE(sd, list);
    537 				LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
    538 			}
    539 		}
    540 	} else {
    541 		/*
    542 		 * If it's not a dirop, we can finalize right away.
    543 		 */
    544 		mutex_exit(&lfs_lock);
    545 		lfs_finalize_ino_seguse(fs, ip);
    546 	}
    547 
    548 	/* it is no longer an unwritten inode, so update the counts */
    549 	mutex_enter(&lfs_lock);
    550 	LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
    551 	mutex_exit(&lfs_lock);
    552 
    553 	/* Turn off all inode modification flags */
    554 	ip->i_state &= ~IN_ALLMOD;
    555 
    556 	/* Mark it deleted */
    557 	ip->i_lfs_iflags |= LFSI_DELETED;
    558 
    559 	/* Mark it free in the in-memory inode freemap */
    560 	SET_BITMAP_FREE(fs, ino);
    561 
    562 	/*
    563 	 * Set the ifile's inode entry to unused, increment its version number
    564 	 * and link it onto the free chain.
    565 	 */
    566 
    567 	/* fetch the ifile entry */
    568 	LFS_IENTRY(ifp, fs, ino, bp);
    569 
    570 	/* update the on-disk address (to "nowhere") */
    571 	old_iaddr = lfs_if_getdaddr(fs, ifp);
    572 	lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
    573 
    574 	/* bump the version */
    575 	lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
    576 
    577 	if (lfs_sb_getversion(fs) == 1) {
    578 		ino_t nextfree;
    579 
    580 		/* insert on freelist */
    581 		LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
    582 		lfs_if_setnextfree(fs, ifp, nextfree);
    583 		LFS_PUT_HEADFREE(fs, cip, cbp, ino);
    584 
    585 		/* write the ifile block */
    586 		(void) LFS_BWRITE_LOG(bp); /* Ifile */
    587 	} else {
    588 		ino_t tino, onf;
    589 
    590 		/*
    591 		 * Clear the freelist next pointer and write the ifile
    592 		 * block. XXX: why? I'm sure there must be a reason but
    593 		 * it seems both silly and dangerous.
    594 		 */
    595 		lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
    596 		(void) LFS_BWRITE_LOG(bp); /* Ifile */
    597 
    598 		/*
    599 		 * Insert on freelist in order.
    600 		 */
    601 
    602 		/* Find the next lower (by number) free inode */
    603 		tino = lfs_freelist_prev(fs, ino);
    604 
    605 		if (tino == LFS_UNUSED_INUM) {
    606 			ino_t nextfree;
    607 
    608 			/*
    609 			 * There isn't one; put us on the freelist head.
    610 			 */
    611 
    612 			/* reload the ifile block */
    613 			LFS_IENTRY(ifp, fs, ino, bp);
    614 			/* update the list */
    615 			LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
    616 			lfs_if_setnextfree(fs, ifp, nextfree);
    617 			LFS_PUT_HEADFREE(fs, cip, cbp, ino);
    618 			DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
    619 			     (long long)nextfree, (long long)ino));
    620 			/* write the ifile block */
    621 			LFS_BWRITE_LOG(bp); /* Ifile */
    622 
    623 			/* If the list was empty, set tail too */
    624 			LFS_GET_TAILFREE(fs, cip, cbp, &otail);
    625 			if (otail == LFS_UNUSED_INUM) {
    626 				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
    627 				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
    628 				      "-> %lld\n", (long long)otail,
    629 				      (long long)ino));
    630 			}
    631 		} else {
    632 			/*
    633 			 * Insert this inode into the list after tino.
    634 			 * We hold the segment lock so we don't have to
    635 			 * worry about blocks being written out of order.
    636 			 */
    637 
    638 			DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
    639 			      " after %lld\n", ino, tino));
    640 
    641 			/* load the previous inode's ifile block */
    642 			LFS_IENTRY(ifp, fs, tino, bp);
    643 			/* update the list pointer */
    644 			onf = lfs_if_getnextfree(fs, ifp);
    645 			lfs_if_setnextfree(fs, ifp, ino);
    646 			/* write the block */
    647 			LFS_BWRITE_LOG(bp);	/* Ifile */
    648 
    649 			/* load this inode's ifile block */
    650 			LFS_IENTRY(ifp, fs, ino, bp);
    651 			/* update the list pointer */
    652 			lfs_if_setnextfree(fs, ifp, onf);
    653 			/* write the block */
    654 			LFS_BWRITE_LOG(bp);	/* Ifile */
    655 
    656 			/* If we're last, put us on the tail */
    657 			if (onf == LFS_UNUSED_INUM) {
    658 				LFS_GET_TAILFREE(fs, cip, cbp, &otail);
    659 				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
    660 				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
    661 				      "-> %lld\n", (long long)otail,
    662 				      (long long)ino));
    663 			}
    664 		}
    665 	}
    666 	/* XXX: shouldn't this check be further up *before* we trash the fs? */
    667 	KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed");
    668 
    669 	/*
    670 	 * Update the segment summary for the segment where the on-disk
    671 	 * copy used to be.
    672 	 */
    673 	if (old_iaddr != LFS_UNUSED_DADDR) {
    674 		/* load it */
    675 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
    676 		/* the number of bytes in the segment should not become < 0 */
    677 		KASSERTMSG((sup->su_nbytes >= DINOSIZE(fs)),
    678 		    "lfs_vfree: negative byte count"
    679 		    " (segment %" PRIu32 " short by %d)\n",
    680 		    lfs_dtosn(fs, old_iaddr),
    681 		    (int)DINOSIZE(fs) - sup->su_nbytes);
    682 		/* update the number of bytes in the segment */
    683 		sup->su_nbytes -= DINOSIZE(fs);
    684 		/* write the segment entry */
    685 		LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
    686 	}
    687 
    688 	/* Set superblock modified bit. */
    689 	mutex_enter(&lfs_lock);
    690 	fs->lfs_fmod = 1;
    691 	mutex_exit(&lfs_lock);
    692 
    693 	/* Decrement file count. */
    694 	lfs_sb_subnfiles(fs, 1);
    695 
    696 	lfs_segunlock(fs);
    697 
    698 	return (0);
    699 }
    700 
    701 /*
    702  * Sort the freelist and set up the free-inode bitmap.
    703  * To be called by lfs_mountfs().
    704  *
    705  * Takes the segmenet lock.
    706  */
    707 void
    708 lfs_order_freelist(struct lfs *fs)
    709 {
    710 	CLEANERINFO *cip;
    711 	IFILE *ifp = NULL;
    712 	struct buf *bp;
    713 	ino_t ino, firstino, lastino, maxino;
    714 #ifdef notyet
    715 	struct vnode *vp;
    716 #endif
    717 
    718 	ASSERT_NO_SEGLOCK(fs);
    719 	lfs_seglock(fs, SEGM_PROT);
    720 
    721 	/* largest inode on fs */
    722 	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
    723 		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
    724 
    725 	/* allocate the in-memory inode freemap */
    726 	/* XXX: assert that fs->lfs_ino_bitmap is null here */
    727 	fs->lfs_ino_bitmap =
    728 		malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
    729 		       M_SEGMENT, M_WAITOK | M_ZERO);
    730 	KASSERT(fs->lfs_ino_bitmap != NULL);
    731 
    732 	/*
    733 	 * Scan the ifile.
    734 	 */
    735 
    736 	firstino = lastino = LFS_UNUSED_INUM;
    737 	for (ino = 0; ino < maxino; ino++) {
    738 		/* Load this inode's ifile entry. */
    739 		if (ino % lfs_sb_getifpb(fs) == 0)
    740 			LFS_IENTRY(ifp, fs, ino, bp);
    741 		else
    742 			LFS_IENTRY_NEXT(ifp, fs);
    743 
    744 		/* Don't put zero or ifile on the free list */
    745 		if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
    746 			continue;
    747 
    748 #ifdef notyet
    749 		/*
    750 		 * Address orphaned files.
    751 		 *
    752 		 * The idea of this is to free inodes belonging to
    753 		 * files that were unlinked but not reclaimed, I guess
    754 		 * because if we're going to scan the whole ifile
    755 		 * anyway it costs very little to do this. I don't
    756 		 * immediately see any reason this should be disabled,
    757 		 * but presumably it doesn't work... not sure what
    758 		 * happens to such files currently. -- dholland 20160806
    759 		 */
    760 		if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE &&
    761 		    VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp)
    762 		     == 0) {
    763 			unsigned segno;
    764 
    765 			/* get the segment the inode in on disk  */
    766 			segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
    767 
    768 			/* truncate the inode */
    769 			lfs_truncate(vp, 0, 0, NOCRED);
    770 			vput(vp);
    771 
    772 			/* load the segment summary */
    773 			LFS_SEGENTRY(sup, fs, segno, bp);
    774 			/* update the number of bytes in the segment */
    775 			KASSERT(sup->su_nbytes >= DINOSIZE(fs));
    776 			sup->su_nbytes -= DINOSIZE(fs);
    777 			/* write the segment summary */
    778 			LFS_WRITESEGENTRY(sup, fs, segno, bp);
    779 
    780 			/* Drop the on-disk address */
    781 			lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
    782 			/* write the ifile entry */
    783 			LFS_BWRITE_LOG(bp);
    784 
    785 			/*
    786 			 * and reload it (XXX: why? I guess
    787 			 * LFS_BWRITE_LOG drops it...)
    788 			 */
    789 			LFS_IENTRY(ifp, fs, ino, bp);
    790 
    791 			/* Fall through to next if block */
    792 		}
    793 #endif
    794 
    795 		if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
    796 
    797 			/*
    798 			 * This inode is free. Put it on the free list.
    799 			 */
    800 
    801 			if (firstino == LFS_UNUSED_INUM) {
    802 				/* XXX: assert lastino == LFS_UNUSED_INUM? */
    803 				/* remember the first free inode */
    804 				firstino = ino;
    805 			} else {
    806 				/* release this inode's ifile entry */
    807 				brelse(bp, 0);
    808 
    809 				/* XXX: assert lastino != LFS_UNUSED_INUM? */
    810 
    811 				/* load lastino's ifile entry */
    812 				LFS_IENTRY(ifp, fs, lastino, bp);
    813 				/* set the list pointer */
    814 				lfs_if_setnextfree(fs, ifp, ino);
    815 				/* write the block */
    816 				LFS_BWRITE_LOG(bp);
    817 
    818 				/* reload this inode's ifile entry */
    819 				LFS_IENTRY(ifp, fs, ino, bp);
    820 			}
    821 			/* remember the last free inode seen so far */
    822 			lastino = ino;
    823 
    824 			/* Mark this inode free in the in-memory freemap */
    825 			SET_BITMAP_FREE(fs, ino);
    826 		}
    827 
    828 		/* If moving to the next ifile block, release the buffer. */
    829 		if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
    830 			brelse(bp, 0);
    831 	}
    832 
    833 	/* Write the freelist head and tail pointers */
    834 	/* XXX: do we need to mark the superblock dirty? */
    835 	LFS_PUT_HEADFREE(fs, cip, bp, firstino);
    836 	LFS_PUT_TAILFREE(fs, cip, bp, lastino);
    837 
    838 	/* done */
    839 	lfs_segunlock(fs);
    840 }
    841 
    842 /*
    843  * Mark a file orphaned (unlinked but not yet reclaimed) by inode
    844  * number. Do this with a magic freelist next pointer.
    845  *
    846  * XXX: howzabout some locking?
    847  */
    848 void
    849 lfs_orphan(struct lfs *fs, ino_t ino)
    850 {
    851 	IFILE *ifp;
    852 	struct buf *bp;
    853 
    854 	LFS_IENTRY(ifp, fs, ino, bp);
    855 	lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE);
    856 	LFS_BWRITE_LOG(bp);
    857 }
    858